mirror of
https://github.com/twitter/the-algorithm.git
synced 2025-06-12 07:38:18 -05:00
Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
This commit is contained in:
8
cr-mixer/server/src/main/resources/BUILD.bazel
Normal file
8
cr-mixer/server/src/main/resources/BUILD.bazel
Normal file
@ -0,0 +1,8 @@
|
||||
resources(
|
||||
sources = [
|
||||
"*.xml",
|
||||
"*.yml",
|
||||
"config/*.yml",
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
146
cr-mixer/server/src/main/resources/config/decider.yml
Normal file
146
cr-mixer/server/src/main/resources/config/decider.yml
Normal file
@ -0,0 +1,146 @@
|
||||
# The keys in this file correspond to the DeciderValues defined in
|
||||
# https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala
|
||||
|
||||
dark_traffic_filter:
|
||||
comment: Proportion of the requests that are forwarded as dark traffic to the proxy
|
||||
default_availability: 0
|
||||
|
||||
enable_tweet_recommendations_home_product:
|
||||
comment: Proportion of requests where we return an actual response for TweetRecommendations Home product
|
||||
default_availability: 10000
|
||||
|
||||
enable_tweet_health_score:
|
||||
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute TweetHealthModelScore"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_agatha_score:
|
||||
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute UserHealthModelScore"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_tweet_entity_graph_traffic:
|
||||
comment: "Enable the traffic to user entity tweet graph to fetch liked-by tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_tweet_graph_traffic:
|
||||
comment: "Enable the traffic to user tweet graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_video_graph_traffic:
|
||||
comment: "Enable the traffic to user video graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_ad_graph_traffic:
|
||||
comment: "Enable the traffic to user ad graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_qig_similar_tweets_traffic:
|
||||
comment: "Enable the traffic to QIG to fetch similar tweet candidates"
|
||||
default_availability: 0
|
||||
|
||||
enable_frs_traffic:
|
||||
comment: "Enable the traffic to FRS to fetch user follow recommendations"
|
||||
default_availability: 0
|
||||
|
||||
enable_hydra_dark_traffic:
|
||||
comment: "Enable dark traffic to hydra"
|
||||
default_availability: 0
|
||||
|
||||
enable_real_graph_mh_store:
|
||||
comment: "Enable traffic for the real graph manhattan based store"
|
||||
default_availability: 0
|
||||
|
||||
enable_simclusters_ann_experimental_dark_traffic:
|
||||
comment: "Enable dark traffic to simclusters-ann-experimental"
|
||||
default_availability: 0
|
||||
|
||||
enable_simclusters_ann_2_dark_traffic:
|
||||
comment: "Enable dark traffic to prod SimClustersANN2"
|
||||
default_availability: 0
|
||||
|
||||
enable_user_state_store:
|
||||
comment: "Enable traffic user state store to hydrate user state"
|
||||
default_availability: 0
|
||||
|
||||
upper_funnel_per_step_scribe_rate:
|
||||
comment: "Enable Upper Funnel Event Scribe Sampling (fetch, pre-rank, interleave etc.) for getTweetsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
kafka_message_scribe_sample_rate:
|
||||
comment: "Gates the production of forked scribe messages to kafka for the async feature hydrator"
|
||||
default_availability: 0
|
||||
|
||||
top_level_api_ddg_metrics_scribe_rate:
|
||||
comment: "Enable Top Level API DDG Metrics Scribe Sampling for getTweetsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
ads_recommendations_per_experiment_scribe_rate:
|
||||
comment: "Percentage of DDG traffic to Scribe for getAdsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Notifications:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Email:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet_MoreTweetsModule:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryAuthor:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryAuthor_MoreTweetsModule:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getFrsBasedTweetRecommendations_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getFrsBasedTweetRecommendations_Notifications:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_user_media_representation_store:
|
||||
comment: "Enable fetching user nudity rate signal from Media Understanding"
|
||||
default_availability: 0
|
||||
|
||||
enable_magic_recs_real_time_aggregates_store:
|
||||
comment: "Enable fetching real time aggregates features from Magic Recs memcache"
|
||||
default_availability: 0
|
||||
|
||||
enable_utg_realtime_tweet_engagement_score:
|
||||
comment: "Enable fetching real time tweet engagement score from utg-plus"
|
||||
default_availability: 0
|
||||
|
||||
get_tweet_recommendations_cache_rate:
|
||||
comment: "Proportion of users where getTweetRecommendations() request and responses will be cached"
|
||||
default_availability: 1000
|
||||
|
||||
enable_earlybird_traffic:
|
||||
comment: "Enable fetching tweet candidates from Earlybird"
|
||||
default_availability: 0
|
||||
|
||||
enable_scribe_for_blue_verified_tweet_candidates:
|
||||
comment: "Enable scribing for tweet candidates from Blue Verified users"
|
||||
default_availability: 0
|
168
cr-mixer/server/src/main/resources/logback.xml
Normal file
168
cr-mixer/server/src/main/resources/logback.xml
Normal file
@ -0,0 +1,168 @@
|
||||
<configuration>
|
||||
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Service Config -->
|
||||
<!-- ===================================================== -->
|
||||
<property name="DEFAULT_SERVICE_PATTERN"
|
||||
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
|
||||
|
||||
<property name="DEFAULT_ACCESS_PATTERN"
|
||||
value="%msg"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Common Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- JUL/JDK14 to Logback bridge -->
|
||||
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
|
||||
<resetJUL>true</resetJUL>
|
||||
</contextListener>
|
||||
|
||||
<!-- ====================================================================================== -->
|
||||
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
|
||||
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
|
||||
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
|
||||
<!-- ====================================================================================== -->
|
||||
|
||||
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.service.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
|
||||
<!-- keep 21 days' worth of history -->
|
||||
<maxHistory>21</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.access.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
|
||||
<!-- keep 21 days' worth of history -->
|
||||
<maxHistory>21</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!--LogLens -->
|
||||
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/service</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- LogLens Access -->
|
||||
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/access</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Pipeline Execution Logs -->
|
||||
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>allow_listed_pipeline_executions.log</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
|
||||
<!-- keep 7 days' worth of history -->
|
||||
<maxHistory>7</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Primary Async Appenders -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<property name="async_queue_size" value="${queue.size:-50000}"/>
|
||||
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
|
||||
|
||||
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="SERVICE"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS-ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Package Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- Per-Package Config -->
|
||||
<logger name="com.twitter" level="info"/>
|
||||
<logger name="com.twitter.wilyns" level="warn"/>
|
||||
<logger name="com.twitter.configbus.client.file" level="off"/>
|
||||
<logger name="com.twitter.finagle.mux" level="warn"/>
|
||||
<logger name="com.twitter.finagle.serverset2" level="warn"/>
|
||||
<logger name="com.twitter.logging.ScribeHandler" level="off"/>
|
||||
<logger name="com.twitter.zookeeper.client.internal" level="warn"/>
|
||||
<logger name="io.netty.handler.ssl.SslHandler" level="OFF"/>
|
||||
|
||||
|
||||
<!-- Root Config -->
|
||||
<root level="${log_level:-INFO}">
|
||||
<appender-ref ref="ASYNC-SERVICE"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS"/>
|
||||
</root>
|
||||
|
||||
<!-- Access Logging -->
|
||||
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter"
|
||||
level="info"
|
||||
additivity="false">
|
||||
<appender-ref ref="ASYNC-ACCESS"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
|
||||
</logger>
|
||||
|
||||
<!-- Pipeline Executions Log -->
|
||||
<logger name="com.twitter.product_mixer.core.service.pipeline_execution_logger"
|
||||
level="info"
|
||||
additivity="false">
|
||||
<appender-ref ref="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" />
|
||||
</logger>
|
||||
|
||||
</configuration>
|
@ -0,0 +1,48 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"cr-mixer/server/src/main/resources",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"finagle/finagle-core/src/main",
|
||||
"finagle/finagle-http/src/main/scala",
|
||||
"finagle/finagle-thriftmux/src/main/scala",
|
||||
"finatra-internal/mtls-http/src/main/scala",
|
||||
"finatra-internal/mtls-thriftmux/src/main/scala",
|
||||
"finatra/http-core/src/main/java/com/twitter/finatra/http",
|
||||
"finatra/inject/inject-app/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"finatra/inject/inject-server/src/main/scala",
|
||||
"finatra/inject/inject-utils/src/main/scala",
|
||||
"finatra/utils/src/main/java/com/twitter/finatra/annotations",
|
||||
"hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/controllers",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
|
||||
"src/thrift/com/twitter/timelines/render:thrift-scala",
|
||||
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms",
|
||||
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
|
||||
"timelines/src/main/scala/com/twitter/timelines/features/app",
|
||||
"twitter-server-internal",
|
||||
"twitter-server/server/src/main/scala",
|
||||
"util/util-app/src/main/scala",
|
||||
"util/util-core:scala",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,18 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.twitter.finatra.http.routing.HttpWarmup
|
||||
import com.twitter.finatra.httpclient.RequestBuilder._
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.inject.utils.Handler
|
||||
import com.twitter.util.Try
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class CrMixerHttpServerWarmupHandler @Inject() (warmup: HttpWarmup) extends Handler with Logging {
|
||||
|
||||
override def handle(): Unit = {
|
||||
Try(warmup.send(get("/admin/cr-mixer/product-pipelines"), admin = true)())
|
||||
.onFailure(e => error(e.getMessage, e))
|
||||
}
|
||||
}
|
@ -0,0 +1,229 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.twitter.cr_mixer.controller.CrMixerThriftController
|
||||
import com.twitter.cr_mixer.featureswitch.SetImpressedBucketsLocalContextFilter
|
||||
import com.twitter.cr_mixer.module.ActivePromotedTweetStoreModule
|
||||
import com.twitter.cr_mixer.module.CertoStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.CrMixerParamConfigModule
|
||||
import com.twitter.cr_mixer.module.EmbeddingStoreModule
|
||||
import com.twitter.cr_mixer.module.FrsStoreModule
|
||||
import com.twitter.cr_mixer.module.MHMtlsParamsModule
|
||||
import com.twitter.cr_mixer.module.OfflineCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.RealGraphStoreMhModule
|
||||
import com.twitter.cr_mixer.module.RealGraphOonStoreModule
|
||||
import com.twitter.cr_mixer.module.RepresentationManagerModule
|
||||
import com.twitter.cr_mixer.module.RepresentationScorerModule
|
||||
import com.twitter.cr_mixer.module.TweetInfoStoreModule
|
||||
import com.twitter.cr_mixer.module.TweetRecentEngagedUserStoreModule
|
||||
import com.twitter.cr_mixer.module.TweetRecommendationResultsStoreModule
|
||||
import com.twitter.cr_mixer.module.TripCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.TwhinCollabFilterStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.UserSignalServiceColumnModule
|
||||
import com.twitter.cr_mixer.module.UserSignalServiceStoreModule
|
||||
import com.twitter.cr_mixer.module.UserStateStoreModule
|
||||
import com.twitter.cr_mixer.module.core.ABDeciderModule
|
||||
import com.twitter.cr_mixer.module.core.CrMixerFlagModule
|
||||
import com.twitter.cr_mixer.module.core.CrMixerLoggingABDeciderModule
|
||||
import com.twitter.cr_mixer.module.core.FeatureContextBuilderModule
|
||||
import com.twitter.cr_mixer.module.core.FeatureSwitchesModule
|
||||
import com.twitter.cr_mixer.module.core.KafkaProducerModule
|
||||
import com.twitter.cr_mixer.module.core.LoggerFactoryModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUnifiedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.SimClustersANNSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUnifiedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedQigSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedTwHINSimlarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserTweetGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserVideoGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TwhinCollabFilterLookupSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserAdGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserTweetGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserVideoGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.DiffusionStoreModule
|
||||
import com.twitter.cr_mixer.module.EarlybirdRecencyBasedCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.TwiceClustersMembersStoreModule
|
||||
import com.twitter.cr_mixer.module.StrongTiePredictionStoreModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.EarlybirdSearchClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.FrsClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.QigServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.SimClustersAnnServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.TweetyPieClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphPlusClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserVideoGraphClientModule
|
||||
import com.twitter.cr_mixer.{thriftscala => st}
|
||||
import com.twitter.finagle.Filter
|
||||
import com.twitter.finatra.annotations.DarkTrafficFilterType
|
||||
import com.twitter.finatra.decider.modules.DeciderModule
|
||||
import com.twitter.finatra.http.HttpServer
|
||||
import com.twitter.finatra.http.routing.HttpRouter
|
||||
import com.twitter.finatra.jackson.modules.ScalaObjectMapperModule
|
||||
import com.twitter.finatra.mtls.http.{Mtls => HttpMtls}
|
||||
import com.twitter.finatra.mtls.thriftmux.Mtls
|
||||
import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
|
||||
import com.twitter.finatra.thrift.ThriftServer
|
||||
import com.twitter.finatra.thrift.filters._
|
||||
import com.twitter.finatra.thrift.routing.ThriftRouter
|
||||
import com.twitter.hydra.common.model_config.{ConfigModule => HydraConfigModule}
|
||||
import com.twitter.inject.thrift.modules.ThriftClientIdModule
|
||||
import com.twitter.product_mixer.core.module.LoggingThrowableExceptionMapper
|
||||
import com.twitter.product_mixer.core.module.StratoClientModule
|
||||
import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule
|
||||
import com.twitter.relevance_platform.common.filters.ClientStatsFilter
|
||||
import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
|
||||
import com.twitter.cr_mixer.module.SimClustersANNServiceNameToClientMapper
|
||||
import com.twitter.cr_mixer.module.SkitStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.BlueVerifiedAnnotationStoreModule
|
||||
import com.twitter.cr_mixer.module.core.TimeoutConfigModule
|
||||
import com.twitter.cr_mixer.module.grpc_client.NaviGRPCClientModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.CertoTopicTweetSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerBasedWalsSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.DiffusionBasedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.EarlybirdSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.SkitTopicTweetSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.UserTweetEntityGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.HydraPartitionClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.HydraRootClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserAdGraphClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetEntityGraphClientModule
|
||||
import com.twitter.thriftwebforms.MethodOptions
|
||||
|
||||
object CrMixerServerMain extends CrMixerServer
|
||||
|
||||
class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls {
|
||||
override val name = "cr-mixer-server"
|
||||
|
||||
private val coreModules = Seq(
|
||||
ABDeciderModule,
|
||||
CrMixerFlagModule,
|
||||
CrMixerLoggingABDeciderModule,
|
||||
CrMixerParamConfigModule,
|
||||
new DarkTrafficFilterModule[st.CrMixer.ReqRepServicePerEndpoint](),
|
||||
DeciderModule,
|
||||
FeatureContextBuilderModule,
|
||||
FeatureSwitchesModule,
|
||||
KafkaProducerModule,
|
||||
LoggerFactoryModule,
|
||||
MHMtlsParamsModule,
|
||||
ProductMixerFlagModule,
|
||||
ScalaObjectMapperModule,
|
||||
ThriftClientIdModule
|
||||
)
|
||||
|
||||
private val thriftClientModules = Seq(
|
||||
AnnQueryServiceClientModule,
|
||||
EarlybirdSearchClientModule,
|
||||
FrsClientModule,
|
||||
HydraPartitionClientModule,
|
||||
HydraRootClientModule,
|
||||
QigServiceClientModule,
|
||||
SimClustersAnnServiceClientModule,
|
||||
TweetyPieClientModule,
|
||||
UserAdGraphClientModule,
|
||||
UserTweetEntityGraphClientModule,
|
||||
UserTweetGraphClientModule,
|
||||
UserTweetGraphPlusClientModule,
|
||||
UserVideoGraphClientModule,
|
||||
)
|
||||
|
||||
private val grpcClientModules = Seq(
|
||||
NaviGRPCClientModule
|
||||
)
|
||||
|
||||
// Modules sorted alphabetically, please keep the order when adding a new module
|
||||
override val modules: Seq[Module] =
|
||||
coreModules ++ thriftClientModules ++ grpcClientModules ++
|
||||
Seq(
|
||||
ActivePromotedTweetStoreModule,
|
||||
CertoStratoStoreModule,
|
||||
CertoTopicTweetSimilarityEngineModule,
|
||||
ConsumersBasedUserAdGraphSimilarityEngineModule,
|
||||
ConsumersBasedUserTweetGraphStoreModule,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngineModule,
|
||||
ConsumersBasedUserVideoGraphStoreModule,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngineModule,
|
||||
ConsumerEmbeddingBasedTwHINSimilarityEngineModule,
|
||||
ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule,
|
||||
ConsumersBasedUserAdGraphStoreModule,
|
||||
ConsumerBasedWalsSimilarityEngineModule,
|
||||
DiffusionStoreModule,
|
||||
EmbeddingStoreModule,
|
||||
EarlybirdSimilarityEngineModule,
|
||||
EarlybirdRecencyBasedCandidateStoreModule,
|
||||
FrsStoreModule,
|
||||
HydraConfigModule,
|
||||
OfflineCandidateStoreModule,
|
||||
ProducerBasedUnifiedSimilarityEngineModule,
|
||||
ProducerBasedUserAdGraphSimilarityEngineModule,
|
||||
ProducerBasedUserTweetGraphSimilarityEngineModule,
|
||||
RealGraphOonStoreModule,
|
||||
RealGraphStoreMhModule,
|
||||
RepresentationManagerModule,
|
||||
RepresentationScorerModule,
|
||||
SimClustersANNServiceNameToClientMapper,
|
||||
SimClustersANNSimilarityEngineModule,
|
||||
SkitStratoStoreModule,
|
||||
SkitTopicTweetSimilarityEngineModule,
|
||||
StratoClientModule,
|
||||
StrongTiePredictionStoreModule,
|
||||
TimeoutConfigModule,
|
||||
TripCandidateStoreModule,
|
||||
TwiceClustersMembersStoreModule,
|
||||
TweetBasedQigSimilarityEngineModule,
|
||||
TweetBasedTwHINSimlarityEngineModule,
|
||||
TweetBasedUnifiedSimilarityEngineModule,
|
||||
TweetBasedUserAdGraphSimilarityEngineModule,
|
||||
TweetBasedUserTweetGraphSimilarityEngineModule,
|
||||
TweetBasedUserVideoGraphSimilarityEngineModule,
|
||||
TweetInfoStoreModule,
|
||||
TweetRecentEngagedUserStoreModule,
|
||||
TweetRecommendationResultsStoreModule,
|
||||
TwhinCollabFilterStratoStoreModule,
|
||||
TwhinCollabFilterLookupSimilarityEngineModule,
|
||||
UserSignalServiceColumnModule,
|
||||
UserSignalServiceStoreModule,
|
||||
UserStateStoreModule,
|
||||
UserTweetEntityGraphSimilarityEngineModule,
|
||||
DiffusionBasedSimilarityEngineModule,
|
||||
BlueVerifiedAnnotationStoreModule,
|
||||
new MtlsThriftWebFormsModule[st.CrMixer.MethodPerEndpoint](this) {
|
||||
override protected def defaultMethodAccess: MethodOptions.Access = {
|
||||
MethodOptions.Access.ByLdapGroup(
|
||||
Seq(
|
||||
"cr-mixer-admins",
|
||||
"recosplat-sensitive-data-medium",
|
||||
"recos-platform-admins",
|
||||
))
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def configureThrift(router: ThriftRouter): Unit = {
|
||||
router
|
||||
.filter[LoggingMDCFilter]
|
||||
.filter[TraceIdMDCFilter]
|
||||
.filter[ThriftMDCFilter]
|
||||
.filter[ClientStatsFilter]
|
||||
.filter[AccessLoggingFilter]
|
||||
.filter[SetImpressedBucketsLocalContextFilter]
|
||||
.filter[ExceptionMappingFilter]
|
||||
.filter[Filter.TypeAgnostic, DarkTrafficFilterType]
|
||||
.exceptionMapper[LoggingThrowableExceptionMapper]
|
||||
.add[CrMixerThriftController]
|
||||
}
|
||||
|
||||
override protected def warmup(): Unit = {
|
||||
handle[CrMixerThriftServerWarmupHandler]()
|
||||
handle[CrMixerHttpServerWarmupHandler]()
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.finatra.thrift.routing.ThriftWarmup
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.inject.utils.Handler
|
||||
import com.twitter.product_mixer.core.{thriftscala => pt}
|
||||
import com.twitter.cr_mixer.{thriftscala => st}
|
||||
import com.twitter.scrooge.Request
|
||||
import com.twitter.scrooge.Response
|
||||
import com.twitter.util.Return
|
||||
import com.twitter.util.Throw
|
||||
import com.twitter.util.Try
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class CrMixerThriftServerWarmupHandler @Inject() (warmup: ThriftWarmup)
|
||||
extends Handler
|
||||
with Logging {
|
||||
|
||||
private val clientId = ClientId("thrift-warmup-client")
|
||||
|
||||
def handle(): Unit = {
|
||||
val testIds = Seq(1, 2, 3)
|
||||
try {
|
||||
clientId.asCurrent {
|
||||
testIds.foreach { id =>
|
||||
val warmupReq = warmupQuery(id)
|
||||
info(s"Sending warm-up request to service with query: $warmupReq")
|
||||
warmup.sendRequest(
|
||||
method = st.CrMixer.GetTweetRecommendations,
|
||||
req = Request(st.CrMixer.GetTweetRecommendations.Args(warmupReq)))(assertWarmupResponse)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
case e: Throwable =>
|
||||
// we don't want a warmup failure to prevent start-up
|
||||
error(e.getMessage, e)
|
||||
}
|
||||
info("Warm-up done.")
|
||||
}
|
||||
|
||||
private def warmupQuery(userId: Long): st.CrMixerTweetRequest = {
|
||||
val clientContext = pt.ClientContext(
|
||||
userId = Some(userId),
|
||||
guestId = None,
|
||||
appId = Some(258901L),
|
||||
ipAddress = Some("0.0.0.0"),
|
||||
userAgent = Some("FAKE_USER_AGENT_FOR_WARMUPS"),
|
||||
countryCode = Some("US"),
|
||||
languageCode = Some("en"),
|
||||
isTwoffice = None,
|
||||
userRoles = None,
|
||||
deviceId = Some("FAKE_DEVICE_ID_FOR_WARMUPS")
|
||||
)
|
||||
st.CrMixerTweetRequest(
|
||||
clientContext = clientContext,
|
||||
product = st.Product.Home,
|
||||
productContext = Some(st.ProductContext.HomeContext(st.HomeContext())),
|
||||
)
|
||||
}
|
||||
|
||||
private def assertWarmupResponse(
|
||||
result: Try[Response[st.CrMixer.GetTweetRecommendations.SuccessType]]
|
||||
): Unit = {
|
||||
// we collect and log any exceptions from the result.
|
||||
result match {
|
||||
case Return(_) => // ok
|
||||
case Throw(exception) =>
|
||||
warn("Error performing warm-up request.")
|
||||
error(exception.getMessage, exception)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.mutable
|
||||
|
||||
@Singleton
|
||||
case class AdsBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Interleaves candidates by iteratively choosing InterestedIn candidates and TWISTLY candidates
|
||||
* in turn. InterestedIn candidates have no source signal, whereas TWISTLY candidates do. TWISTLY
|
||||
* candidates themselves are interleaved by source before equal blending with InterestedIn
|
||||
* candidates.
|
||||
*/
|
||||
def blend(
|
||||
inputCandidates: Seq[Seq[InitialAdsCandidate]],
|
||||
): Future[Seq[BlendedAdsCandidate]] = {
|
||||
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
val (interestedInCandidates, twistlyCandidates) =
|
||||
candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty)
|
||||
// First interleave twistly candidates
|
||||
val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates)
|
||||
|
||||
val twistlyAndInterestedInCandidates =
|
||||
Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates)
|
||||
|
||||
// then interleave twistly candidates with interested in to make them even
|
||||
val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
private def buildBlendedAdsCandidate(
|
||||
inputCandidates: Seq[Seq[InitialAdsCandidate]],
|
||||
interleavedCandidates: Seq[InitialAdsCandidate]
|
||||
): Seq[BlendedAdsCandidate] = {
|
||||
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
|
||||
interleavedCandidates.map { interleavedCandidate =>
|
||||
interleavedCandidate.toBlendedAdsCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
|
||||
}
|
||||
}
|
||||
|
||||
private def buildCandidateToCGInfosMap(
|
||||
candidateSeq: Seq[Seq[InitialAdsCandidate]],
|
||||
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
|
||||
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
|
||||
|
||||
candidateSeq.foreach { candidates =>
|
||||
candidates.foreach { candidate =>
|
||||
val candidateGenerationInfoSeq = {
|
||||
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
|
||||
}
|
||||
val candidateGenerationInfo = candidate.candidateGenerationInfo
|
||||
tweetIdMap.put(
|
||||
candidate.tweetId,
|
||||
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
|
||||
}
|
||||
}
|
||||
tweetIdMap.toMap
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,48 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import scala.collection.mutable
|
||||
|
||||
object BlendedCandidatesBuilder {
|
||||
|
||||
/**
|
||||
* @param inputCandidates input candidate prior to interleaving
|
||||
* @param interleavedCandidates after interleaving. These tweets are de-duplicated.
|
||||
*/
|
||||
def build(
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
interleavedCandidates: Seq[InitialCandidate]
|
||||
): Seq[BlendedCandidate] = {
|
||||
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
|
||||
interleavedCandidates.map { interleavedCandidate =>
|
||||
interleavedCandidate.toBlendedCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The same tweet can be generated by different sources.
|
||||
* This function tells you which CandidateGenerationInfo generated a given tweet
|
||||
*/
|
||||
private def buildCandidateToCGInfosMap(
|
||||
candidateSeq: Seq[Seq[InitialCandidate]],
|
||||
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
|
||||
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
|
||||
|
||||
candidateSeq.foreach { candidates =>
|
||||
candidates.foreach { candidate =>
|
||||
val candidateGenerationInfoSeq = {
|
||||
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
|
||||
}
|
||||
val candidateGenerationInfo = candidate.candidateGenerationInfo
|
||||
tweetIdMap.put(
|
||||
candidate.tweetId,
|
||||
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
|
||||
}
|
||||
}
|
||||
tweetIdMap.toMap
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
|
||||
case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Exposes multiple types of sorting relying only on Content Based signals
|
||||
* Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores
|
||||
* that come from the active SimilarityEngine and then sort on the standardized scores.
|
||||
*/
|
||||
def blend(
|
||||
params: Params,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match {
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency =>
|
||||
candidates.flatten.sortBy(c => getSnowflakeTimeStamp(c.tweetId)).reverse
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting =>
|
||||
candidates.flatten.sortBy(_ => scala.util.Random.nextDouble())
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.FavoriteCount =>
|
||||
candidates.flatten.sortBy(-_.tweetInfo.favCount)
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.SimilarityToSignalSorting =>
|
||||
standardizeAndSortByScore(flattenAndGroupByEngineTypeOrFirstContribEngine(candidates))
|
||||
case _ =>
|
||||
candidates.flatten.sortBy(-_.tweetInfo.favCount)
|
||||
}
|
||||
|
||||
stats.stat("candidates").add(sortedCandidates.size)
|
||||
|
||||
val blendedCandidates =
|
||||
BlendedCandidatesBuilder.build(inputCandidates, removeDuplicates(sortedCandidates))
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
private def removeDuplicates(candidates: Seq[InitialCandidate]): Seq[InitialCandidate] = {
|
||||
val seen = collection.mutable.Set.empty[Long]
|
||||
candidates.filter { c =>
|
||||
if (seen.contains(c.tweetId)) {
|
||||
false
|
||||
} else {
|
||||
seen += c.tweetId
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def groupByEngineTypeOrFirstContribEngine(
|
||||
candidates: Seq[InitialCandidate]
|
||||
): Map[SimilarityEngineType, Seq[InitialCandidate]] = {
|
||||
val grouped = candidates.groupBy { candidate =>
|
||||
val contrib = candidate.candidateGenerationInfo.contributingSimilarityEngines
|
||||
if (contrib.nonEmpty) {
|
||||
contrib.head.similarityEngineType
|
||||
} else {
|
||||
candidate.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
|
||||
}
|
||||
}
|
||||
grouped
|
||||
}
|
||||
|
||||
private def flattenAndGroupByEngineTypeOrFirstContribEngine(
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Seq[Seq[InitialCandidate]] = {
|
||||
val flat = candidates.flatten
|
||||
val grouped = groupByEngineTypeOrFirstContribEngine(flat)
|
||||
grouped.values.toSeq
|
||||
}
|
||||
|
||||
private def standardizeAndSortByScore(
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Seq[InitialCandidate] = {
|
||||
candidates
|
||||
.map { innerSeq =>
|
||||
val meanScore = innerSeq
|
||||
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
|
||||
.sum / innerSeq.length
|
||||
val stdDev = scala.math
|
||||
.sqrt(
|
||||
innerSeq
|
||||
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
|
||||
.map(a => a - meanScore)
|
||||
.map(a => a * a)
|
||||
.sum / innerSeq.length)
|
||||
innerSeq
|
||||
.map(c =>
|
||||
(
|
||||
c,
|
||||
c.candidateGenerationInfo.similarityEngineInfo.score
|
||||
.map { score =>
|
||||
if (stdDev != 0) (score - meanScore) / stdDev
|
||||
else 0.0
|
||||
}
|
||||
.getOrElse(0.0)))
|
||||
}.flatten.sortBy { case (_, standardizedScore) => -standardizedScore }
|
||||
.map { case (candidate, _) => candidate }
|
||||
}
|
||||
|
||||
private def getSnowflakeTimeStamp(tweetId: Long): Time = {
|
||||
val isSnowflake = SnowflakeId.isSnowflakeId(tweetId)
|
||||
if (isSnowflake) {
|
||||
SnowflakeId(tweetId).time
|
||||
} else {
|
||||
Time.fromMilliseconds(0L)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.util.CountWeightedInterleaveUtil
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* A weighted round robin interleaving algorithm.
|
||||
* The weight of each blending group based on the count of candidates in each blending group.
|
||||
* The more candidates under a blending group, the more candidates are selected from it during round
|
||||
* robin, which in effect prioritizes this group.
|
||||
*
|
||||
* Weights sum up to 1. For example:
|
||||
* total candidates = 8
|
||||
* Group Weight
|
||||
* [A1, A2, A3, A4] 4/8 = 0.5 // select 50% of results from group A
|
||||
* [B1, B2] 2/8 = 0.25 // 25% from group B
|
||||
* [C1, C2] 2/8 = 0.25 // 25% from group C
|
||||
*
|
||||
* Blended results = [A1, A2, B1, C1, A3, A4, B2, C2]
|
||||
* See @linht's go/weighted-interleave
|
||||
*/
|
||||
@Singleton
|
||||
case class CountWeightedInterleaveBlender @Inject() (globalStats: StatsReceiver) {
|
||||
import CountWeightedInterleaveBlender._
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
def blend(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
val weightedBlenderQuery = CountWeightedInterleaveBlender.paramToQuery(query.params)
|
||||
countWeightedInterleave(weightedBlenderQuery, inputCandidates)
|
||||
}
|
||||
|
||||
private[blender] def countWeightedInterleave(
|
||||
query: WeightedBlenderQuery,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
val candidatesAndWeightKeyByIndexId: Seq[(Seq[InitialCandidate], Double)] = {
|
||||
CountWeightedInterleaveUtil.buildInitialCandidatesWithWeightKeyByFeature(
|
||||
inputCandidates,
|
||||
query.rankerWeightShrinkage)
|
||||
}
|
||||
|
||||
val interleavedCandidates =
|
||||
InterleaveUtil.weightedInterleave(candidatesAndWeightKeyByIndexId, query.maxWeightAdjustments)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
}
|
||||
|
||||
object CountWeightedInterleaveBlender {
|
||||
|
||||
/**
|
||||
* We pass two parameters to the weighted interleaver:
|
||||
* @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we
|
||||
* stay to uniform sampling. The bigger the shrinkage the
|
||||
* closer we are to uniform round robin
|
||||
* @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to
|
||||
* uniform. Set so that we avoid infinite loops (e.g. if weights are
|
||||
* 0)
|
||||
*/
|
||||
case class WeightedBlenderQuery(
|
||||
rankerWeightShrinkage: Double,
|
||||
maxWeightAdjustments: Int)
|
||||
|
||||
def paramToQuery(params: Params): WeightedBlenderQuery = {
|
||||
val rankerWeightShrinkage: Double =
|
||||
params(BlenderParams.RankingInterleaveWeightShrinkageParam)
|
||||
val maxWeightAdjustments: Int =
|
||||
params(BlenderParams.RankingInterleaveMaxWeightAdjustments)
|
||||
|
||||
WeightedBlenderQuery(rankerWeightShrinkage, maxWeightAdjustments)
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class InterleaveBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Interleaves candidates, by taking 1 candidate from each Seq[Seq[InitialCandidate]] in sequence,
|
||||
* until we run out of candidates.
|
||||
*/
|
||||
def blend(
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
val interleavedCandidates = InterleaveUtil.interleave(inputCandidates)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypes
|
||||
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypesWithVideo
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
|
||||
case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Partition the candidates based on source type
|
||||
* Interleave the two partitions of candidates separately
|
||||
* Then append the back fill candidates to the end
|
||||
*/
|
||||
def blend(
|
||||
params: Params,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
|
||||
val backFillSourceTypes =
|
||||
if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo
|
||||
else BackFillSourceTypes
|
||||
// partition candidates based on their source types
|
||||
val (backFillCandidates, regularCandidates) =
|
||||
candidates.partition(
|
||||
_.head.candidateGenerationInfo.sourceInfoOpt
|
||||
.exists(sourceInfo => backFillSourceTypes.contains(sourceInfo.sourceType)))
|
||||
|
||||
val interleavedRegularCandidates = InterleaveUtil.interleave(regularCandidates)
|
||||
val interleavedBackFillCandidates =
|
||||
InterleaveUtil.interleave(backFillCandidates)
|
||||
stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size)
|
||||
// Append interleaved backfill candidates to the end
|
||||
val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object ImplicitSignalBackFillBlender {
|
||||
final val BackFillSourceTypesWithVideo: Set[SourceType] = Set(
|
||||
SourceType.UserRepeatedProfileVisit,
|
||||
SourceType.VideoTweetPlayback50,
|
||||
SourceType.VideoTweetQualityView)
|
||||
|
||||
final val BackFillSourceTypes: Set[SourceType] = Set(SourceType.UserRepeatedProfileVisit)
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.param.BlenderParams.BlendingAlgorithmEnum
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class SwitchBlender @Inject() (
|
||||
defaultBlender: InterleaveBlender,
|
||||
sourceTypeBackFillBlender: SourceTypeBackFillBlender,
|
||||
adsBlender: AdsBlender,
|
||||
contentSignalBlender: ContentSignalBlender,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
def blend(
|
||||
params: Params,
|
||||
userState: UserState,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
// Take out empty seq
|
||||
val nonEmptyCandidates = inputCandidates.collect {
|
||||
case candidates if candidates.nonEmpty =>
|
||||
candidates
|
||||
}
|
||||
stats.stat("num_of_sequences").add(inputCandidates.size)
|
||||
|
||||
// Sort the seqs in an order
|
||||
val innerSignalSorting = params(BlenderParams.SignalTypeSortingAlgorithmParam) match {
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.SourceSignalRecency =>
|
||||
SwitchBlender.TimestampOrder
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => SwitchBlender.RandomOrder
|
||||
case _ => SwitchBlender.TimestampOrder
|
||||
}
|
||||
|
||||
val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting)
|
||||
// Blend based on specified blender rules
|
||||
params(BlenderParams.BlendingAlgorithmParam) match {
|
||||
case BlendingAlgorithmEnum.RoundRobin =>
|
||||
defaultBlender.blend(candidatesToBlend)
|
||||
case BlendingAlgorithmEnum.SourceTypeBackFill =>
|
||||
sourceTypeBackFillBlender.blend(params, candidatesToBlend)
|
||||
case BlendingAlgorithmEnum.SourceSignalSorting =>
|
||||
contentSignalBlender.blend(params, candidatesToBlend)
|
||||
case _ => defaultBlender.blend(candidatesToBlend)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object SwitchBlender {
|
||||
|
||||
/**
|
||||
* Prefers candidates generated from sources with the latest timestamps.
|
||||
* The newer the source signal, the higher a candidate ranks.
|
||||
* This ordering biases against consumer-based candidates because their timestamp defaults to 0
|
||||
*
|
||||
* Within a Seq[Seq[Candidate]], all candidates within a inner Seq
|
||||
* are guaranteed to have the same sourceInfo because they are grouped by (sourceInfo, SE model).
|
||||
* Hence, we can pick .headOption to represent the whole list when filtering by the internalId of the sourceInfoOpt.
|
||||
* But of course the similarityEngine score in a CGInfo could be different.
|
||||
*/
|
||||
val TimestampOrder: Ordering[InitialCandidate] =
|
||||
math.Ordering
|
||||
.by[InitialCandidate, Time](
|
||||
_.candidateGenerationInfo.sourceInfoOpt
|
||||
.flatMap(_.sourceEventTime)
|
||||
.getOrElse(Time.fromMilliseconds(0L)))
|
||||
.reverse
|
||||
|
||||
private val RandomOrder: Ordering[InitialCandidate] =
|
||||
Ordering.by[InitialCandidate, Double](_ => scala.util.Random.nextDouble())
|
||||
}
|
@ -0,0 +1,140 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.blender.AdsBlender
|
||||
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.BlendedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.RankedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.param.AdsParams
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
|
||||
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
|
||||
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
|
||||
import com.twitter.cr_mixer.source_signal.UssSourceSignalFetcher
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class AdsCandidateGenerator @Inject() (
|
||||
ussSourceSignalFetcher: UssSourceSignalFetcher,
|
||||
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
|
||||
adsCandidateSourceRouter: AdsCandidateSourcesRouter,
|
||||
adsBlender: AdsBlender,
|
||||
scribeLogger: AdsRecommendationsScribeLogger,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSourcesStats = stats.scope("fetchSources")
|
||||
private val fetchRealGraphSeedsStats = stats.scope("fetchRealGraphSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val interleaveStats = stats.scope("interleave")
|
||||
private val rankStats = stats.scope("rank")
|
||||
|
||||
def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = {
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
// fetch source signals
|
||||
sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) {
|
||||
fetchSources(query)
|
||||
}
|
||||
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) {
|
||||
fetchSeeds(query)
|
||||
}
|
||||
// get initial candidates from similarity engines
|
||||
// hydrate lineItemInfo and filter out non active ads
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, sourceSignals, realGraphSeeds)
|
||||
}
|
||||
|
||||
// blend candidates
|
||||
blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
|
||||
interleave(initialCandidates)
|
||||
}
|
||||
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
rank(
|
||||
blendedCandidates,
|
||||
query.params(AdsParams.EnableScoreBoost),
|
||||
query.params(AdsParams.AdsCandidateGenerationScoreBoostFactor),
|
||||
rankStats)
|
||||
}
|
||||
} yield {
|
||||
rankedCandidates.take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def fetchSources(
|
||||
query: AdsCandidateGeneratorQuery
|
||||
): Future[Set[SourceInfo]] = {
|
||||
val fetcherQuery =
|
||||
FetcherQuery(query.userId, query.product, query.userState, query.params)
|
||||
ussSourceSignalFetcher.get(fetcherQuery).map(_.getOrElse(Seq.empty).toSet)
|
||||
}
|
||||
|
||||
private def fetchCandidates(
|
||||
query: AdsCandidateGeneratorQuery,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
realGraphSeeds: Map[UserId, Double]
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
scribeLogger.scribeInitialAdsCandidates(
|
||||
query,
|
||||
adsCandidateSourceRouter
|
||||
.fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params),
|
||||
query.params(AdsParams.EnableScribe)
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
private def fetchSeeds(
|
||||
query: AdsCandidateGeneratorQuery
|
||||
): Future[Map[UserId, Double]] = {
|
||||
if (query.params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
|
||||
realGraphInSourceGraphFetcher
|
||||
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
|
||||
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
|
||||
} else Future.value(Map.empty[UserId, Double])
|
||||
}
|
||||
|
||||
private def interleave(
|
||||
candidates: Seq[Seq[InitialAdsCandidate]]
|
||||
): Future[Seq[BlendedAdsCandidate]] = {
|
||||
adsBlender
|
||||
.blend(candidates)
|
||||
}
|
||||
|
||||
private def rank(
|
||||
candidates: Seq[BlendedAdsCandidate],
|
||||
enableScoreBoost: Boolean,
|
||||
scoreBoostFactor: Double,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Future[Seq[RankedAdsCandidate]] = {
|
||||
|
||||
val candidateSize = candidates.size
|
||||
val rankedCandidates = candidates.zipWithIndex.map {
|
||||
case (candidate, index) =>
|
||||
val score = 0.5 + 0.5 * ((candidateSize - index).toDouble / candidateSize)
|
||||
val boostedScore = if (enableScoreBoost) {
|
||||
statsReceiver.stat("boostedScore").add((100.0 * score * scoreBoostFactor).toFloat)
|
||||
score * scoreBoostFactor
|
||||
} else {
|
||||
statsReceiver.stat("score").add((100.0 * score).toFloat)
|
||||
score
|
||||
}
|
||||
candidate.toRankedAdsCandidate(boostedScore)
|
||||
}
|
||||
Future.value(rankedCandidates)
|
||||
}
|
||||
}
|
@ -0,0 +1,516 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.ModelConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
|
||||
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
|
||||
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.InterestedInParams
|
||||
import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.SimClustersANNParams
|
||||
import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.FilterUtil
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class AdsCandidateSourcesRouter @Inject() (
|
||||
activePromotedTweetStore: ReadableStore[TweetId, Seq[LineItemInfo]],
|
||||
decider: CrMixerDecider,
|
||||
@Named(ModuleNames.SimClustersANNSimilarityEngine) simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine)
|
||||
tweetBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine)
|
||||
consumersBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumersBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine)
|
||||
producerBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine)
|
||||
tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) consumerTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
|
||||
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumerBasedWalsSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
globalStats: StatsReceiver,
|
||||
) {
|
||||
|
||||
import AdsCandidateSourcesRouter._
|
||||
|
||||
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
def fetchCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
params: configapi.Params
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
|
||||
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
|
||||
|
||||
val tweetBasedSANNMinScore = params(
|
||||
TweetBasedCandidateGenerationParams.SimClustersMinScoreParam)
|
||||
val tweetBasedSANN1Candidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
tweetBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val tweetBasedSANN2Candidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
tweetBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val tweetBasedUagCandidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableUAGParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getTweetBasedUserAdGraphCandidates(Some(sourceInfo), params)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val realGraphInNetworkBasedUagCandidates =
|
||||
if (params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
|
||||
getRealGraphConsumersBasedUserAdGraphCandidates(realGraphSeeds, params).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val producerBasedUagCandidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableUAGParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getProducerBasedUserAdGraphCandidates(Some(sourceInfo), params)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val tweetBasedTwhinAdsCandidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getTwHINAdsCandidates(
|
||||
tweetBasedTwHINANNSimilarityEngine,
|
||||
SimilarityEngineType.TweetBasedTwHINANN,
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
ModelConfig.DebuggerDemo)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val producerBasedSANNMinScore = params(
|
||||
ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam)
|
||||
val producerBasedSANN1Candidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
producerBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
val producerBasedSANN2Candidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
producerBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val interestedInMinScore = params(InterestedInParams.MinScoreParam)
|
||||
val interestedInSANN1Candidates = if (params(InterestedInParams.EnableSimClustersANN1Param)) {
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
None,
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
interestedInMinScore).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val interestedInSANN2Candidates = if (params(InterestedInParams.EnableSimClustersANN2Param)) {
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
None,
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
interestedInMinScore).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val consumerTwHINAdsCandidates =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
getTwHINAdsCandidates(
|
||||
consumerTwHINANNSimilarityEngine,
|
||||
SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN,
|
||||
requestUserId,
|
||||
None,
|
||||
ModelConfig.DebuggerDemo).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val consumerBasedWalsCandidates =
|
||||
if (params(
|
||||
ConsumerBasedWalsParams.EnableSourceParam
|
||||
)) {
|
||||
getConsumerBasedWalsCandidates(sourceSignals, params)
|
||||
}.map {
|
||||
Seq(_)
|
||||
}
|
||||
else Future.value(Seq.empty)
|
||||
|
||||
Future
|
||||
.collect(Seq(
|
||||
tweetBasedSANN1Candidates,
|
||||
tweetBasedSANN2Candidates,
|
||||
tweetBasedUagCandidates,
|
||||
tweetBasedTwhinAdsCandidates,
|
||||
producerBasedUagCandidates,
|
||||
producerBasedSANN1Candidates,
|
||||
producerBasedSANN2Candidates,
|
||||
realGraphInNetworkBasedUagCandidates,
|
||||
interestedInSANN1Candidates,
|
||||
interestedInSANN2Candidates,
|
||||
consumerTwHINAdsCandidates,
|
||||
consumerBasedWalsCandidates,
|
||||
)).map(_.flatten).map { tweetsWithCGInfoSeq =>
|
||||
Future.collect(
|
||||
tweetsWithCGInfoSeq.map(candidates => convertToInitialCandidates(candidates, stats)))
|
||||
}.flatten.map { candidatesLists =>
|
||||
val result = candidatesLists.filter(_.nonEmpty)
|
||||
stats.stat("numOfSequences").add(result.size)
|
||||
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
stats: StatsReceiver
|
||||
): Future[Seq[InitialAdsCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
stats.stat("initialCandidateSizeBeforeLineItemFilter").add(tweetIds.size)
|
||||
Future.collect(activePromotedTweetStore.multiGet(tweetIds)).map { lineItemInfos =>
|
||||
/** *
|
||||
* If lineItemInfo does not exist, we will filter out the promoted tweet as it cannot be targeted and ranked in admixer
|
||||
*/
|
||||
val filteredCandidates = candidates.collect {
|
||||
case candidate if lineItemInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val lineItemInfo = lineItemInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialAdsCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
lineItemInfo = lineItemInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
stats.stat("initialCandidateSizeAfterLineItemFilter").add(filteredCandidates.size)
|
||||
filteredCandidates
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def getSimClustersANNCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params,
|
||||
configId: String,
|
||||
minScore: Double
|
||||
) = {
|
||||
|
||||
val simClustersModelVersion =
|
||||
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
|
||||
|
||||
val embeddingType =
|
||||
if (sourceInfo.isEmpty) {
|
||||
params(InterestedInParams.InterestedInEmbeddingIdParam).embeddingType
|
||||
} else getSimClustersANNEmbeddingType(sourceInfo.get)
|
||||
val query = SimClustersANNSimilarityEngine.fromParams(
|
||||
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
|
||||
embeddingType,
|
||||
simClustersModelVersion,
|
||||
configId,
|
||||
params
|
||||
)
|
||||
|
||||
// dark traffic to simclusters-ann-2
|
||||
if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) {
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val sann2Query = SimClustersANNSimilarityEngine.fromParams(
|
||||
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
|
||||
embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params
|
||||
)
|
||||
simClustersANNSimilarityEngine
|
||||
.getCandidates(sann2Query)
|
||||
}
|
||||
|
||||
simClustersANNSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.filter(_.score > minScore).map {
|
||||
tweetWithScore =>
|
||||
val similarityEngineInfo = SimClustersANNSimilarityEngine
|
||||
.toSimilarityEngineInfo(query, tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getProducerBasedUserAdGraphCandidates(
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams(
|
||||
sourceInfo.get.internalId,
|
||||
params
|
||||
)
|
||||
producerBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = ProducerBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getTweetBasedUserAdGraphCandidates(
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = TweetBasedUserAdGraphSimilarityEngine.fromParams(
|
||||
sourceInfo.get.internalId,
|
||||
params
|
||||
)
|
||||
tweetBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = TweetBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getRealGraphConsumersBasedUserAdGraphCandidates(
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = ConsumersBasedUserAdGraphSimilarityEngine
|
||||
.fromParams(realGraphSeeds, params)
|
||||
|
||||
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
|
||||
val sourceInfo = SourceInfo(
|
||||
sourceType = SourceType.RealGraphIn,
|
||||
internalId = InternalId.UserId(0L),
|
||||
sourceEventTime = None
|
||||
)
|
||||
consumersBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = ConsumersBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
Some(sourceInfo),
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getTwHINAdsCandidates(
|
||||
similarityEngine: HnswANNSimilarityEngine,
|
||||
similarityEngineType: SimilarityEngineType,
|
||||
requestUserId: UserId,
|
||||
sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine
|
||||
model: String
|
||||
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
|
||||
val internalId =
|
||||
if (sourceInfo.nonEmpty) sourceInfo.get.internalId else InternalId.UserId(requestUserId)
|
||||
similarityEngine
|
||||
.getCandidates(buildHnswANNQuery(internalId, model)).map(_.getOrElse(Seq.empty)).map(_.map {
|
||||
tweetWithScore =>
|
||||
val similarityEngineInfo = SimilarityEngineInfo(
|
||||
similarityEngineType = similarityEngineType,
|
||||
modelId = Some(model),
|
||||
score = Some(tweetWithScore.score))
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getConsumerBasedWalsCandidates(
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params
|
||||
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
|
||||
// Fetch source signals and filter them based on age.
|
||||
val signals = FilterUtil.tweetSourceAgeFilter(
|
||||
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
|
||||
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
|
||||
|
||||
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
|
||||
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
|
||||
)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
consumerBasedWalsSimilarityEngine.getScopedStats
|
||||
.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
} yield tweetsWithCandidateGenerationInfoOpt.toSeq.flatten
|
||||
}
|
||||
}
|
||||
|
||||
object AdsCandidateSourcesRouter {
|
||||
def getSimClustersANNEmbeddingType(
|
||||
sourceInfo: SourceInfo
|
||||
): EmbeddingType = {
|
||||
sourceInfo.sourceType match {
|
||||
case SourceType.TweetFavorite | SourceType.Retweet | SourceType.OriginalTweet |
|
||||
SourceType.Reply | SourceType.TweetShare | SourceType.NotificationClick |
|
||||
SourceType.GoodTweetClick | SourceType.VideoTweetQualityView |
|
||||
SourceType.VideoTweetPlayback50 =>
|
||||
EmbeddingType.LogFavLongestL2EmbeddingTweet
|
||||
case SourceType.UserFollow | SourceType.UserRepeatedProfileVisit | SourceType.RealGraphOon |
|
||||
SourceType.FollowRecommendation | SourceType.UserTrafficAttributionProfileVisit |
|
||||
SourceType.GoodProfileClick | SourceType.TwiceUserId =>
|
||||
EmbeddingType.FavBasedProducer
|
||||
case _ => throw new IllegalArgumentException("sourceInfo.sourceType not supported")
|
||||
}
|
||||
}
|
||||
|
||||
def buildHnswANNQuery(internalId: InternalId, modelId: String): HnswANNEngineQuery = {
|
||||
HnswANNEngineQuery(
|
||||
sourceId = internalId,
|
||||
modelId = modelId,
|
||||
params = Params.Empty
|
||||
)
|
||||
}
|
||||
|
||||
def getConsumerBasedWalsSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.TweetDontLike.value, //currently no-op
|
||||
SourceType.TweetReport.value, //currently no-op
|
||||
SourceType.AccountMute.value, //currently no-op
|
||||
SourceType.AccountBlock.value //currently no-op
|
||||
)
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"ann/src/main/scala/com/twitter/ann/hnsw",
|
||||
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"cuad/projects/hashspace/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
|
||||
"frigate/frigate-common:base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/constants",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
|
||||
"src/scala/com/twitter/ml/featurestore/lib",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/ml/api:embedding-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
|
||||
"src/thrift/com/twitter/search:earlybird-scala",
|
||||
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"strato/config/columns/cuad/hashspace:hashspace-strato-client",
|
||||
],
|
||||
)
|
@ -0,0 +1,536 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModelConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.model.TripTweetWithScore
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
|
||||
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.EngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.FilterUtil
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TripEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Route the SourceInfo to the associated Candidate Engines.
|
||||
*/
|
||||
@Singleton
|
||||
case class CandidateSourcesRouter @Inject() (
|
||||
customizedRetrievalCandidateGeneration: CustomizedRetrievalCandidateGeneration,
|
||||
simClustersInterestedInCandidateGeneration: SimClustersInterestedInCandidateGeneration,
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine)
|
||||
tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine)
|
||||
producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine)
|
||||
consumerEmbeddingBasedTripSimilarityEngine: StandardSimilarityEngine[
|
||||
TripEngineQuery,
|
||||
TripTweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine)
|
||||
consumerBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine)
|
||||
consumerBasedTwoTowerSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine)
|
||||
consumersBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
UserTweetEntityGraphSimilarityEngine.Query,
|
||||
TweetWithScoreAndSocialProof
|
||||
],
|
||||
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
|
||||
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumerBasedWalsSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver,
|
||||
) {
|
||||
|
||||
import CandidateSourcesRouter._
|
||||
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
def fetchCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]],
|
||||
params: configapi.Params,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
val tweetBasedCandidatesFuture = getCandidates(
|
||||
getTweetBasedSourceInfo(sourceSignals),
|
||||
params,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParams,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
|
||||
val producerBasedCandidatesFuture =
|
||||
getCandidates(
|
||||
getProducerBasedSourceInfo(sourceSignals),
|
||||
params,
|
||||
ProducerBasedUnifiedSimilarityEngine.fromParams(_, _),
|
||||
producerBasedUnifiedSimilarityEngine.getCandidates
|
||||
)
|
||||
|
||||
val simClustersInterestedInBasedCandidatesFuture =
|
||||
getCandidatesPerSimilarityEngineModel(
|
||||
requestUserId,
|
||||
params,
|
||||
SimClustersInterestedInCandidateGeneration.fromParams,
|
||||
simClustersInterestedInCandidateGeneration.get)
|
||||
|
||||
val consumerEmbeddingBasedLogFavBasedTripCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerEmbeddingBasedCandidateGenerationParams.EnableLogFavBasedSimClustersTripParam)) {
|
||||
getSimClustersTripCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
|
||||
ModelConfig.ConsumerLogFavBasedInterestedInEmbedding,
|
||||
InternalId.UserId(requestUserId),
|
||||
params
|
||||
),
|
||||
consumerEmbeddingBasedTripSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else
|
||||
Future.Nil
|
||||
|
||||
val consumersBasedUvgRealGraphInCandidatesFuture =
|
||||
if (params(ConsumersBasedUserVideoGraphParams.EnableSourceParam)) {
|
||||
val realGraphInGraphSourceInfoOpt =
|
||||
getGraphSourceInfoBySourceType(SourceType.RealGraphIn.name, sourceGraphs)
|
||||
|
||||
getGraphBasedCandidates(
|
||||
params,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine
|
||||
.fromParamsForRealGraphIn(
|
||||
realGraphInGraphSourceInfoOpt
|
||||
.map { graphSourceInfo => graphSourceInfo.seedWithScores }.getOrElse(Map.empty),
|
||||
params),
|
||||
consumersBasedUserVideoGraphSimilarityEngine,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine.toSimilarityEngineInfo,
|
||||
realGraphInGraphSourceInfoOpt
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedFollowBasedTripCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerEmbeddingBasedCandidateGenerationParams.EnableFollowBasedSimClustersTripParam)) {
|
||||
getSimClustersTripCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
|
||||
ModelConfig.ConsumerFollowBasedInterestedInEmbedding,
|
||||
InternalId.UserId(requestUserId),
|
||||
params
|
||||
),
|
||||
consumerEmbeddingBasedTripSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else
|
||||
Future.Nil
|
||||
|
||||
val consumerBasedWalsCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerBasedWalsParams.EnableSourceParam
|
||||
)) {
|
||||
getConsumerBasedWalsCandidates(sourceSignals, params)
|
||||
}.map { Seq(_) }
|
||||
else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedTwHINCandidatesFuture =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
getHnswCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTwHINSimilarityEngine.fromParams(
|
||||
InternalId.UserId(requestUserId),
|
||||
params),
|
||||
consumerBasedTwHINANNSimilarityEngine
|
||||
).map { Seq(_) }
|
||||
} else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedTwoTowerCandidatesFuture =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwoTowerParam)) {
|
||||
getHnswCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTwoTowerSimilarityEngine.fromParams(
|
||||
InternalId.UserId(requestUserId),
|
||||
params),
|
||||
consumerBasedTwoTowerSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else Future.Nil
|
||||
|
||||
val customizedRetrievalBasedCandidatesFuture =
|
||||
getCandidatesPerSimilarityEngineModel(
|
||||
requestUserId,
|
||||
params,
|
||||
CustomizedRetrievalCandidateGeneration.fromParams,
|
||||
customizedRetrievalCandidateGeneration.get)
|
||||
|
||||
Future
|
||||
.collect(
|
||||
Seq(
|
||||
tweetBasedCandidatesFuture,
|
||||
producerBasedCandidatesFuture,
|
||||
simClustersInterestedInBasedCandidatesFuture,
|
||||
consumerBasedWalsCandidatesFuture,
|
||||
consumerEmbeddingBasedLogFavBasedTripCandidatesFuture,
|
||||
consumerEmbeddingBasedFollowBasedTripCandidatesFuture,
|
||||
consumerEmbeddingBasedTwHINCandidatesFuture,
|
||||
consumerEmbeddingBasedTwoTowerCandidatesFuture,
|
||||
consumersBasedUvgRealGraphInCandidatesFuture,
|
||||
customizedRetrievalBasedCandidatesFuture
|
||||
)).map { candidatesList =>
|
||||
// remove empty innerSeq
|
||||
val result = candidatesList.flatten.filter(_.nonEmpty)
|
||||
stats.stat("numOfSequences").add(result.size)
|
||||
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
private def getGraphBasedCandidates[QueryType](
|
||||
params: configapi.Params,
|
||||
query: EngineQuery[QueryType],
|
||||
engine: StandardSimilarityEngine[QueryType, TweetWithScore],
|
||||
toSimilarityEngineInfo: Double => SimilarityEngineInfo,
|
||||
graphSourceInfoOpt: Option[GraphSourceInfo] = None
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val candidatesOptFut = engine.getCandidates(query)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
engine.getScopedStats.stat("sortedCandidates_size").add(sortedCandidates.size)
|
||||
val tweetsWithCandidateGenerationInfo = sortedCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo = toSimilarityEngineInfo(tweetWithScore.score)
|
||||
val sourceInfo = graphSourceInfoOpt.map { graphSourceInfo =>
|
||||
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
|
||||
SourceInfo(
|
||||
sourceType = graphSourceInfo.sourceType,
|
||||
internalId = InternalId.UserId(0L),
|
||||
sourceEventTime = None
|
||||
)
|
||||
}
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getCandidates[QueryType](
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params,
|
||||
fromParams: (SourceInfo, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val queries = sourceSignals.map { sourceInfo =>
|
||||
fromParams(sourceInfo, params)
|
||||
}.toSeq
|
||||
|
||||
Future
|
||||
.collect {
|
||||
queries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(candidates.toSeq.flatten)
|
||||
} yield {
|
||||
prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def getConsumerBasedWalsCandidates(
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
// Fetch source signals and filter them based on age.
|
||||
val signals = FilterUtil.tweetSourceAgeFilter(
|
||||
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
|
||||
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
|
||||
|
||||
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
|
||||
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
|
||||
)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
consumerBasedWalsSimilarityEngine.getScopedStats
|
||||
.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getSimClustersTripCandidates(
|
||||
params: configapi.Params,
|
||||
query: TripEngineQuery,
|
||||
engine: StandardSimilarityEngine[
|
||||
TripEngineQuery,
|
||||
TripTweetWithScore
|
||||
],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetsWithCandidatesGenerationInfoOptFut =
|
||||
engine.getCandidates(EngineQuery(query, params)).map {
|
||||
_.map {
|
||||
_.map { tweetWithScore =>
|
||||
// define filters
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
SimilarityEngineType.ExploreTripOfflineSimClustersTweets,
|
||||
None,
|
||||
Some(tweetWithScore.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidatesGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getHnswCandidates(
|
||||
params: configapi.Params,
|
||||
query: HnswANNEngineQuery,
|
||||
engine: HnswANNSimilarityEngine,
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val candidatesOptFut = engine.getCandidates(query)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
engine.getScopedStats.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
engine.toSimilarityEngineInfo(query, tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns candidates from each similarity engine separately.
|
||||
* For 1 requestUserId, it will fetch results from each similarity engine e_i,
|
||||
* and returns Seq[Seq[TweetCandidate]].
|
||||
*/
|
||||
private def getCandidatesPerSimilarityEngineModel[QueryType](
|
||||
requestUserId: UserId,
|
||||
params: configapi.Params,
|
||||
fromParams: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[
|
||||
Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val query = fromParams(InternalId.UserId(requestUserId), params)
|
||||
getFunc(query).flatMap { candidatesPerSimilarityEngineModelOpt =>
|
||||
val candidatesPerSimilarityEngineModel = candidatesPerSimilarityEngineModelOpt.toSeq.flatten
|
||||
Future.collect {
|
||||
candidatesPerSimilarityEngineModel.map(convertToInitialCandidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object CandidateSourcesRouter {
|
||||
def getGraphSourceInfoBySourceType(
|
||||
sourceTypeStr: String,
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]]
|
||||
): Option[GraphSourceInfo] = {
|
||||
sourceGraphs.getOrElse(sourceTypeStr, None)
|
||||
}
|
||||
|
||||
def getTweetBasedSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForTweetBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
def getProducerBasedSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForProducerBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
def getConsumerBasedWalsSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* Signal funneling should not exist in CG or even in any SimilarityEngine.
|
||||
* They will be in Router, or eventually, in CrCandidateGenerator.
|
||||
*/
|
||||
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.TweetDontLike.value, //currently no-op
|
||||
SourceType.TweetReport.value, //currently no-op
|
||||
SourceType.AccountMute.value, //currently no-op
|
||||
SourceType.AccountBlock.value //currently no-op
|
||||
)
|
||||
val AllowedSourceTypesForTweetBasedUnifiedSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.OriginalTweet.value,
|
||||
SourceType.Reply.value,
|
||||
SourceType.TweetShare.value,
|
||||
SourceType.NotificationClick.value,
|
||||
SourceType.GoodTweetClick.value,
|
||||
SourceType.VideoTweetQualityView.value,
|
||||
SourceType.VideoTweetPlayback50.value,
|
||||
SourceType.TweetAggregation.value,
|
||||
)
|
||||
val AllowedSourceTypesForProducerBasedUnifiedSE = Set(
|
||||
SourceType.UserFollow.value,
|
||||
SourceType.UserRepeatedProfileVisit.value,
|
||||
SourceType.RealGraphOon.value,
|
||||
SourceType.FollowRecommendation.value,
|
||||
SourceType.UserTrafficAttributionProfileVisit.value,
|
||||
SourceType.GoodProfileClick.value,
|
||||
SourceType.ProducerAggregation.value,
|
||||
)
|
||||
}
|
@ -0,0 +1,350 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.blender.SwitchBlender
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.filter.PostRankFilterRunner
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.param.RankerParams
|
||||
import com.twitter.cr_mixer.param.RecentNegativeSignalParams
|
||||
import com.twitter.cr_mixer.ranker.SwitchRanker
|
||||
import com.twitter.cr_mixer.source_signal.SourceInfoRouter
|
||||
import com.twitter.cr_mixer.source_signal.UssStore.EnabledNegativeSourceTypes
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.JavaTimer
|
||||
import com.twitter.util.Timer
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* For now it performs the main steps as follows:
|
||||
* 1. Source signal (via USS, FRS) fetch
|
||||
* 2. Candidate generation
|
||||
* 3. Filtering
|
||||
* 4. Interleave blender
|
||||
* 5. Ranker
|
||||
* 6. Post-ranker filter
|
||||
* 7. Truncation
|
||||
*/
|
||||
@Singleton
|
||||
class CrCandidateGenerator @Inject() (
|
||||
sourceInfoRouter: SourceInfoRouter,
|
||||
candidateSourceRouter: CandidateSourcesRouter,
|
||||
switchBlender: SwitchBlender,
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
postRankFilterRunner: PostRankFilterRunner,
|
||||
switchRanker: SwitchRanker,
|
||||
crMixerScribeLogger: CrMixerScribeLogger,
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
private val timer: Timer = new JavaTimer(true)
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
private val fetchSourcesStats = stats.scope("fetchSources")
|
||||
private val fetchPositiveSourcesStats = stats.scope("fetchPositiveSources")
|
||||
private val fetchNegativeSourcesStats = stats.scope("fetchNegativeSources")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val fetchCandidatesAfterFilterStats = stats.scope("fetchCandidatesAfterFilter")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
private val interleaveStats = stats.scope("interleave")
|
||||
private val rankStats = stats.scope("rank")
|
||||
private val postRankFilterStats = stats.scope("postRankFilter")
|
||||
private val blueVerifiedTweetStats = stats.scope("blueVerifiedTweetStats")
|
||||
private val blueVerifiedTweetStatsPerSimilarityEngine =
|
||||
stats.scope("blueVerifiedTweetStatsPerSimilarityEngine")
|
||||
|
||||
def get(query: CrCandidateGeneratorQuery): Future[Seq[RankedCandidate]] = {
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
val perProductBlueVerifiedStats =
|
||||
blueVerifiedTweetStats.scope("perProduct", query.product.toString)
|
||||
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
trackResultStats(perProductStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
val result = for {
|
||||
(sourceSignals, sourceGraphsMap) <- StatsUtil.trackBlockStats(fetchSourcesStats) {
|
||||
fetchSources(query)
|
||||
}
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) {
|
||||
// find the positive and negative signals
|
||||
val (positiveSignals, negativeSignals) = sourceSignals.partition { signal =>
|
||||
!EnabledNegativeSourceTypes.contains(signal.sourceType)
|
||||
}
|
||||
fetchPositiveSourcesStats.stat("size").add(positiveSignals.size)
|
||||
fetchNegativeSourcesStats.stat("size").add(negativeSignals.size)
|
||||
|
||||
// find the positive signals to keep, removing block and muted users
|
||||
val filteredSourceInfo =
|
||||
if (negativeSignals.nonEmpty && query.params(
|
||||
RecentNegativeSignalParams.EnableSourceParam)) {
|
||||
filterSourceInfo(positiveSignals, negativeSignals)
|
||||
} else {
|
||||
positiveSignals
|
||||
}
|
||||
|
||||
// fetch candidates from the positive signals
|
||||
StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, filteredSourceInfo, sourceGraphsMap)
|
||||
}
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
interleavedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
|
||||
interleave(query, filteredCandidates)
|
||||
}
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
val candidatesToRank =
|
||||
interleavedCandidates.take(query.params(RankerParams.MaxCandidatesToRank))
|
||||
rank(query, candidatesToRank)
|
||||
}
|
||||
postRankFilterCandidates <- StatsUtil.trackItemsStats(postRankFilterStats) {
|
||||
postRankFilter(query, rankedCandidates)
|
||||
}
|
||||
} yield {
|
||||
trackTopKStats(
|
||||
800,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = false,
|
||||
perProductBlueVerifiedStats)
|
||||
trackTopKStats(
|
||||
400,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = false,
|
||||
perProductBlueVerifiedStats)
|
||||
trackTopKStats(
|
||||
query.maxNumResults,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = true,
|
||||
perProductBlueVerifiedStats)
|
||||
|
||||
val (blueVerifiedTweets, remainingTweets) =
|
||||
postRankFilterCandidates.partition(
|
||||
_.tweetInfo.hasBlueVerifiedAnnotation.contains(true))
|
||||
val topKBlueVerified = blueVerifiedTweets.take(query.maxNumResults)
|
||||
val topKRemaining = remainingTweets.take(query.maxNumResults - topKBlueVerified.size)
|
||||
|
||||
trackBlueVerifiedTweetStats(topKBlueVerified, perProductBlueVerifiedStats)
|
||||
|
||||
if (topKBlueVerified.nonEmpty && query.params(RankerParams.EnableBlueVerifiedTopK)) {
|
||||
topKBlueVerified ++ topKRemaining
|
||||
} else {
|
||||
postRankFilterCandidates
|
||||
}
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.serviceTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def fetchSources(
|
||||
query: CrCandidateGeneratorQuery
|
||||
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
|
||||
crMixerScribeLogger.scribeSignalSources(
|
||||
query,
|
||||
sourceInfoRouter
|
||||
.get(query.userId, query.product, query.userState, query.params))
|
||||
}
|
||||
|
||||
private def filterSourceInfo(
|
||||
positiveSignals: Set[SourceInfo],
|
||||
negativeSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
val filterUsers: Set[Long] = negativeSignals.flatMap {
|
||||
case SourceInfo(_, InternalId.UserId(userId), _) => Some(userId)
|
||||
case _ => None
|
||||
}
|
||||
|
||||
positiveSignals.filter {
|
||||
case SourceInfo(_, InternalId.UserId(userId), _) => !filterUsers.contains(userId)
|
||||
case _ => true
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val initialCandidates = candidateSourceRouter
|
||||
.fetchCandidates(
|
||||
query.userId,
|
||||
sourceSignals,
|
||||
sourceGraphs,
|
||||
query.params
|
||||
)
|
||||
|
||||
initialCandidates.map(_.flatten.map { candidate =>
|
||||
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
|
||||
blueVerifiedTweetStatsPerSimilarityEngine
|
||||
.scope(query.product.toString).scope(
|
||||
candidate.candidateGenerationInfo.contributingSimilarityEngines.head.similarityEngineType.toString).counter(
|
||||
candidate.tweetInfo.authorId.toString).incr()
|
||||
}
|
||||
})
|
||||
|
||||
crMixerScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
initialCandidates
|
||||
)
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
crMixerScribeLogger.scribePreRankFilterCandidates(
|
||||
query,
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates))
|
||||
}
|
||||
|
||||
private def postRankFilter(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
postRankFilterRunner.run(query, candidates)
|
||||
}
|
||||
|
||||
private def interleave(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
crMixerScribeLogger.scribeInterleaveCandidates(
|
||||
query,
|
||||
switchBlender
|
||||
.blend(query.params, query.userState, candidates))
|
||||
}
|
||||
|
||||
private def rank(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[BlendedCandidate],
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
crMixerScribeLogger.scribeRankedCandidates(
|
||||
query,
|
||||
switchRanker.rank(query, candidates)
|
||||
)
|
||||
}
|
||||
|
||||
private def trackResultStats(
|
||||
stats: StatsReceiver
|
||||
)(
|
||||
fn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
fn.onSuccess { candidates =>
|
||||
trackReasonChosenSourceTypeStats(candidates, stats)
|
||||
trackReasonChosenSimilarityEngineStats(candidates, stats)
|
||||
trackPotentialReasonsSourceTypeStats(candidates, stats)
|
||||
trackPotentialReasonsSimilarityEngineStats(candidates, stats)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackReasonChosenSourceTypeStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.groupBy(_.reasonChosen.sourceInfoOpt.map(_.sourceType))
|
||||
.foreach {
|
||||
case (sourceTypeOpt, rankedCands) =>
|
||||
val sourceType = sourceTypeOpt.map(_.toString).getOrElse("RequesterId") // default
|
||||
stats.stat("reasonChosen", "sourceType", sourceType, "size").add(rankedCands.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackReasonChosenSimilarityEngineStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.groupBy(_.reasonChosen.similarityEngineInfo.similarityEngineType)
|
||||
.foreach {
|
||||
case (seInfoType, rankedCands) =>
|
||||
stats
|
||||
.stat("reasonChosen", "similarityEngine", seInfoType.toString, "size").add(
|
||||
rankedCands.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackPotentialReasonsSourceTypeStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.flatMap(_.potentialReasons.map(_.sourceInfoOpt.map(_.sourceType)))
|
||||
.groupBy(source => source)
|
||||
.foreach {
|
||||
case (sourceInfoOpt, seq) =>
|
||||
val sourceType = sourceInfoOpt.map(_.toString).getOrElse("RequesterId") // default
|
||||
stats.stat("potentialReasons", "sourceType", sourceType, "size").add(seq.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackPotentialReasonsSimilarityEngineStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.flatMap(_.potentialReasons.map(_.similarityEngineInfo.similarityEngineType))
|
||||
.groupBy(se => se)
|
||||
.foreach {
|
||||
case (seType, seq) =>
|
||||
stats.stat("potentialReasons", "similarityEngine", seType.toString, "size").add(seq.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackBlueVerifiedTweetStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
candidates.foreach { candidate =>
|
||||
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
|
||||
statsReceiver.counter(candidate.tweetInfo.authorId.toString).incr()
|
||||
statsReceiver
|
||||
.scope(candidate.tweetInfo.authorId.toString).counter(candidate.tweetId.toString).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def trackTopKStats(
|
||||
k: Int,
|
||||
tweetCandidates: Seq[RankedCandidate],
|
||||
isQueryK: Boolean,
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
val (topK, beyondK) = tweetCandidates.splitAt(k)
|
||||
|
||||
val blueVerifiedIds = tweetCandidates.collect {
|
||||
case candidate if candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true) =>
|
||||
candidate.tweetInfo.authorId
|
||||
}.toSet
|
||||
|
||||
blueVerifiedIds.foreach { blueVerifiedId =>
|
||||
val numTweetsTopK = topK.count(_.tweetInfo.authorId == blueVerifiedId)
|
||||
val numTweetsBeyondK = beyondK.count(_.tweetInfo.authorId == blueVerifiedId)
|
||||
|
||||
if (isQueryK) {
|
||||
statsReceiver.scope(blueVerifiedId.toString).stat(s"topK").add(numTweetsTopK)
|
||||
statsReceiver
|
||||
.scope(blueVerifiedId.toString).stat(s"beyondK").add(numTweetsBeyondK)
|
||||
} else {
|
||||
statsReceiver.scope(blueVerifiedId.toString).stat(s"top$k").add(numTweetsTopK)
|
||||
statsReceiver
|
||||
.scope(blueVerifiedId.toString).stat(s"beyond$k").add(numTweetsBeyondK)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,345 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.candidate_generation.CustomizedRetrievalCandidateGeneration.Query
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedCandidateGenerationParams._
|
||||
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedTwhinParams._
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.base.CandidateSource
|
||||
import com.twitter.frigate.common.base.Stats
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
/**
|
||||
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources
|
||||
*
|
||||
* Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different
|
||||
* similarity engines without blending. In other words, this class shall not be thought of as a
|
||||
* Unified Similarity Engine. It is a CG that calls multiple singular Similarity Engines.
|
||||
*/
|
||||
@Singleton
|
||||
case class CustomizedRetrievalCandidateGeneration @Inject() (
|
||||
@Named(ModuleNames.TwhinCollabFilterSimilarityEngine)
|
||||
twhinCollabFilterSimilarityEngine: LookupSimilarityEngine[
|
||||
TwhinCollabFilterSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.DiffusionBasedSimilarityEngine)
|
||||
diffusionBasedSimilarityEngine: LookupSimilarityEngine[
|
||||
DiffusionBasedSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
Query,
|
||||
Seq[TweetWithCandidateGenerationInfo]
|
||||
] {
|
||||
|
||||
override def name: String = this.getClass.getSimpleName
|
||||
|
||||
private val stats = statsReceiver.scope(name)
|
||||
private val fetchCandidatesStat = stats.scope("fetchCandidates")
|
||||
|
||||
/**
|
||||
* For each Similarity Engine Model, return a list of tweet candidates
|
||||
*/
|
||||
override def get(
|
||||
query: Query
|
||||
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
|
||||
query.internalId match {
|
||||
case InternalId.UserId(_) =>
|
||||
Stats.trackOption(fetchCandidatesStat) {
|
||||
val twhinCollabFilterForFollowCandidatesFut = if (query.enableTwhinCollabFilter) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinCollabFilterFollowQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinCollabFilterForEngagementCandidatesFut =
|
||||
if (query.enableTwhinCollabFilter) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(
|
||||
query.twhinCollabFilterEngagementQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinMultiClusterForFollowCandidatesFut = if (query.enableTwhinMultiCluster) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinMultiClusterFollowQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinMultiClusterForEngagementCandidatesFut =
|
||||
if (query.enableTwhinMultiCluster) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(
|
||||
query.twhinMultiClusterEngagementQuery)
|
||||
} else Future.None
|
||||
|
||||
val diffusionBasedSimilarityEngineCandidatesFut = if (query.enableRetweetBasedDiffusion) {
|
||||
diffusionBasedSimilarityEngine.getCandidates(query.diffusionBasedSimilarityEngineQuery)
|
||||
} else Future.None
|
||||
|
||||
Future
|
||||
.join(
|
||||
twhinCollabFilterForFollowCandidatesFut,
|
||||
twhinCollabFilterForEngagementCandidatesFut,
|
||||
twhinMultiClusterForFollowCandidatesFut,
|
||||
twhinMultiClusterForEngagementCandidatesFut,
|
||||
diffusionBasedSimilarityEngineCandidatesFut
|
||||
).map {
|
||||
case (
|
||||
twhinCollabFilterForFollowCandidates,
|
||||
twhinCollabFilterForEngagementCandidates,
|
||||
twhinMultiClusterForFollowCandidates,
|
||||
twhinMultiClusterForEngagementCandidates,
|
||||
diffusionBasedSimilarityEngineCandidates) =>
|
||||
val maxCandidateNumPerSourceKey = 200
|
||||
val twhinCollabFilterForFollowWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinCollabFilterForFollowCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinCollabFilterFollowQuery,
|
||||
)
|
||||
val twhinCollabFilterForEngagementWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinCollabFilterForEngagementCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinCollabFilterEngagementQuery,
|
||||
)
|
||||
val twhinMultiClusterForFollowWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinMultiClusterForFollowCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinMultiClusterFollowQuery,
|
||||
)
|
||||
val twhinMultiClusterForEngagementWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinMultiClusterForEngagementCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinMultiClusterEngagementQuery,
|
||||
)
|
||||
val retweetBasedDiffusionWithCGInfo =
|
||||
getDiffusionBasedCandidatesWithCGInfo(
|
||||
diffusionBasedSimilarityEngineCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.diffusionBasedSimilarityEngineQuery,
|
||||
)
|
||||
|
||||
val twhinCollabCandidateSourcesToBeInterleaved =
|
||||
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
|
||||
twhinCollabFilterForFollowWithCGInfo,
|
||||
twhinCollabFilterForEngagementWithCGInfo,
|
||||
)
|
||||
|
||||
val twhinMultiClusterCandidateSourcesToBeInterleaved =
|
||||
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
|
||||
twhinMultiClusterForFollowWithCGInfo,
|
||||
twhinMultiClusterForEngagementWithCGInfo,
|
||||
)
|
||||
|
||||
val interleavedTwhinCollabCandidates =
|
||||
InterleaveUtil.interleave(twhinCollabCandidateSourcesToBeInterleaved)
|
||||
|
||||
val interleavedTwhinMultiClusterCandidates =
|
||||
InterleaveUtil.interleave(twhinMultiClusterCandidateSourcesToBeInterleaved)
|
||||
|
||||
val twhinCollabFilterResults =
|
||||
if (interleavedTwhinCollabCandidates.nonEmpty) {
|
||||
Some(interleavedTwhinCollabCandidates.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
val twhinMultiClusterResults =
|
||||
if (interleavedTwhinMultiClusterCandidates.nonEmpty) {
|
||||
Some(interleavedTwhinMultiClusterCandidates.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
val diffusionResults =
|
||||
if (retweetBasedDiffusionWithCGInfo.nonEmpty) {
|
||||
Some(retweetBasedDiffusionWithCGInfo.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
Some(
|
||||
Seq(
|
||||
twhinCollabFilterResults,
|
||||
twhinMultiClusterResults,
|
||||
diffusionResults
|
||||
).flatten)
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
throw new IllegalArgumentException("sourceId_is_not_userId_cnt")
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */
|
||||
private def tweetAgeFilter(
|
||||
candidates: Seq[TweetWithScore],
|
||||
maxTweetAgeHours: Duration
|
||||
): Seq[TweetWithScore] = {
|
||||
// Tweet IDs are approximately chronological (see http://go/snowflake),
|
||||
// so we are building the earliest tweet id once
|
||||
// The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper.
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours)
|
||||
candidates.filter { candidate => candidate.tweetId >= earliestTweetId }
|
||||
}
|
||||
|
||||
/**
|
||||
* AgeFilters tweetCandidates with stats
|
||||
* Only age filter logic is effective here (through tweetAgeFilter). This function acts mostly for metric logging.
|
||||
*/
|
||||
private def ageFilterWithStats(
|
||||
offlineInterestedInCandidates: Seq[TweetWithScore],
|
||||
maxTweetAgeHours: Duration,
|
||||
scopedStatsReceiver: StatsReceiver
|
||||
): Seq[TweetWithScore] = {
|
||||
scopedStatsReceiver.stat("size").add(offlineInterestedInCandidates.size)
|
||||
val candidates = offlineInterestedInCandidates.map { candidate =>
|
||||
TweetWithScore(candidate.tweetId, candidate.score)
|
||||
}
|
||||
val filteredCandidates = tweetAgeFilter(candidates, maxTweetAgeHours)
|
||||
scopedStatsReceiver.stat(f"filtered_size").add(filteredCandidates.size)
|
||||
if (filteredCandidates.isEmpty) scopedStatsReceiver.counter(f"empty").incr()
|
||||
|
||||
filteredCandidates
|
||||
}
|
||||
|
||||
private def getTwhinCollabCandidatesWithCGInfo(
|
||||
tweetCandidates: Option[Seq[TweetWithScore]],
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
twhinCollabFilterQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
): Seq[TweetWithCandidateGenerationInfo] = {
|
||||
val twhinTweets = tweetCandidates match {
|
||||
case Some(tweetsWithScores) =>
|
||||
tweetsWithScores.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
TwhinCollabFilterSimilarityEngine
|
||||
.toSimilarityEngineInfo(twhinCollabFilterQuery, tweetWithScore.score),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
case _ => Seq.empty
|
||||
}
|
||||
twhinTweets.take(maxCandidateNumPerSourceKey)
|
||||
}
|
||||
|
||||
private def getDiffusionBasedCandidatesWithCGInfo(
|
||||
tweetCandidates: Option[Seq[TweetWithScore]],
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
|
||||
DiffusionBasedSimilarityEngine.Query
|
||||
],
|
||||
): Seq[TweetWithCandidateGenerationInfo] = {
|
||||
val diffusionTweets = tweetCandidates match {
|
||||
case Some(tweetsWithScores) =>
|
||||
tweetsWithScores.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
DiffusionBasedSimilarityEngine
|
||||
.toSimilarityEngineInfo(diffusionBasedSimilarityEngineQuery, tweetWithScore.score),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
case _ => Seq.empty
|
||||
}
|
||||
diffusionTweets.take(maxCandidateNumPerSourceKey)
|
||||
}
|
||||
}
|
||||
|
||||
object CustomizedRetrievalCandidateGeneration {
|
||||
|
||||
case class Query(
|
||||
internalId: InternalId,
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
maxTweetAgeHours: Duration,
|
||||
// twhinCollabFilter
|
||||
enableTwhinCollabFilter: Boolean,
|
||||
twhinCollabFilterFollowQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
twhinCollabFilterEngagementQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
// twhinMultiCluster
|
||||
enableTwhinMultiCluster: Boolean,
|
||||
twhinMultiClusterFollowQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
twhinMultiClusterEngagementQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
enableRetweetBasedDiffusion: Boolean,
|
||||
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
|
||||
DiffusionBasedSimilarityEngine.Query
|
||||
],
|
||||
)
|
||||
|
||||
def fromParams(
|
||||
internalId: InternalId,
|
||||
params: configapi.Params
|
||||
): Query = {
|
||||
val twhinCollabFilterFollowQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinCollabFilterFollowSource),
|
||||
params)
|
||||
|
||||
val twhinCollabFilterEngagementQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinCollabFilterEngagementSource),
|
||||
params)
|
||||
|
||||
val twhinMultiClusterFollowQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinMultiClusterFollowSource),
|
||||
params)
|
||||
|
||||
val twhinMultiClusterEngagementQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinMultiClusterEngagementSource),
|
||||
params)
|
||||
|
||||
val diffusionBasedSimilarityEngineQuery =
|
||||
DiffusionBasedSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedRetweetDiffusionSource),
|
||||
params)
|
||||
|
||||
Query(
|
||||
internalId = internalId,
|
||||
maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam),
|
||||
maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam),
|
||||
// twhinCollabFilter
|
||||
enableTwhinCollabFilter = params(EnableTwhinCollabFilterClusterParam),
|
||||
twhinCollabFilterFollowQuery = twhinCollabFilterFollowQuery,
|
||||
twhinCollabFilterEngagementQuery = twhinCollabFilterEngagementQuery,
|
||||
enableTwhinMultiCluster = params(EnableTwhinMultiClusterParam),
|
||||
twhinMultiClusterFollowQuery = twhinMultiClusterFollowQuery,
|
||||
twhinMultiClusterEngagementQuery = twhinMultiClusterEngagementQuery,
|
||||
enableRetweetBasedDiffusion = params(EnableRetweetBasedDiffusionParam),
|
||||
diffusionBasedSimilarityEngineQuery = diffusionBasedSimilarityEngineQuery
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,220 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithAuthor
|
||||
import com.twitter.cr_mixer.param.FrsParams
|
||||
import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineRouter
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweet
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.hermit.constants.AlgorithmFeedbackTokens
|
||||
import com.twitter.hermit.constants.AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* TweetCandidateGenerator based on FRS seed users. For now this candidate generator fetches seed
|
||||
* users from FRS, and retrieves the seed users' past tweets from Earlybird with Earlybird light
|
||||
* ranking models.
|
||||
*/
|
||||
@Singleton
|
||||
class FrsTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]],
|
||||
frsBasedSimilarityEngine: EarlybirdSimilarityEngineRouter,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
import FrsTweetCandidateGenerator._
|
||||
|
||||
private val timer = DefaultTimer
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSeedsStats = stats.scope("fetchSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val filterCandidatesStats = stats.scope("filterCandidates")
|
||||
private val hydrateCandidatesStats = stats.scope("hydrateCandidates")
|
||||
private val getCandidatesStats = stats.scope("getCandidates")
|
||||
|
||||
/**
|
||||
* The function retrieves the candidate for the given user as follows:
|
||||
* 1. Seed user fetch from FRS.
|
||||
* 2. Candidate fetch from Earlybird.
|
||||
* 3. Filtering.
|
||||
* 4. Candidate hydration.
|
||||
* 5. Truncation.
|
||||
*/
|
||||
def get(
|
||||
frsTweetCandidateGeneratorQuery: FrsTweetCandidateGeneratorQuery
|
||||
): Future[Seq[FrsTweet]] = {
|
||||
val userId = frsTweetCandidateGeneratorQuery.userId
|
||||
val product = frsTweetCandidateGeneratorQuery.product
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", product.name)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
val result = for {
|
||||
seedAuthorWithScores <- StatsUtil.trackOptionItemMapStats(fetchSeedsStats) {
|
||||
fetchSeeds(
|
||||
userId,
|
||||
frsTweetCandidateGeneratorQuery.impressedUserList,
|
||||
frsTweetCandidateGeneratorQuery.languageCodeOpt,
|
||||
frsTweetCandidateGeneratorQuery.countryCodeOpt,
|
||||
frsTweetCandidateGeneratorQuery.params,
|
||||
)
|
||||
}
|
||||
tweetCandidates <- StatsUtil.trackOptionItemsStats(fetchCandidatesStats) {
|
||||
fetchCandidates(
|
||||
userId,
|
||||
seedAuthorWithScores.map(_.keys.toSeq).getOrElse(Seq.empty),
|
||||
frsTweetCandidateGeneratorQuery.impressedTweetList,
|
||||
seedAuthorWithScores.map(_.mapValues(_.score)).getOrElse(Map.empty),
|
||||
frsTweetCandidateGeneratorQuery.params
|
||||
)
|
||||
}
|
||||
filteredTweetCandidates <- StatsUtil.trackOptionItemsStats(filterCandidatesStats) {
|
||||
filterCandidates(
|
||||
tweetCandidates,
|
||||
frsTweetCandidateGeneratorQuery.params
|
||||
)
|
||||
}
|
||||
hydratedTweetCandidates <- StatsUtil.trackOptionItemsStats(hydrateCandidatesStats) {
|
||||
hydrateCandidates(
|
||||
seedAuthorWithScores,
|
||||
filteredTweetCandidates
|
||||
)
|
||||
}
|
||||
} yield {
|
||||
hydratedTweetCandidates
|
||||
.map(_.take(frsTweetCandidateGeneratorQuery.maxNumResults)).getOrElse(Seq.empty)
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.frsBasedTweetEndpointTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch recommended seed users from FRS
|
||||
*/
|
||||
private def fetchSeeds(
|
||||
userId: UserId,
|
||||
userDenyList: Set[UserId],
|
||||
languageCodeOpt: Option[String],
|
||||
countryCodeOpt: Option[String],
|
||||
params: Params
|
||||
): Future[Option[Map[UserId, FrsQueryResult]]] = {
|
||||
frsStore
|
||||
.get(
|
||||
FrsStore.Query(
|
||||
userId,
|
||||
params(FrsParams.FrsBasedCandidateGenerationMaxSeedsNumParam),
|
||||
params(FrsParams.FrsBasedCandidateGenerationDisplayLocationParam).displayLocation,
|
||||
userDenyList.toSeq,
|
||||
languageCodeOpt,
|
||||
countryCodeOpt
|
||||
)).map {
|
||||
_.map { seedAuthors =>
|
||||
seedAuthors.map(user => user.userId -> user).toMap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch tweet candidates from Earlybird
|
||||
*/
|
||||
private def fetchCandidates(
|
||||
searcherUserId: UserId,
|
||||
seedAuthors: Seq[UserId],
|
||||
impressedTweetList: Set[TweetId],
|
||||
frsUserToScores: Map[UserId, Double],
|
||||
params: Params
|
||||
): Future[Option[Seq[TweetWithAuthor]]] = {
|
||||
if (seedAuthors.nonEmpty) {
|
||||
// call earlybird
|
||||
val query = EarlybirdSimilarityEngineRouter.queryFromParams(
|
||||
Some(searcherUserId),
|
||||
seedAuthors,
|
||||
impressedTweetList,
|
||||
frsUserToScoresForScoreAdjustment = Some(frsUserToScores),
|
||||
params
|
||||
)
|
||||
frsBasedSimilarityEngine.get(query)
|
||||
} else Future.None
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter candidates that do not pass visibility filter policy
|
||||
*/
|
||||
private def filterCandidates(
|
||||
candidates: Option[Seq[TweetWithAuthor]],
|
||||
params: Params
|
||||
): Future[Option[Seq[TweetWithAuthor]]] = {
|
||||
val tweetIds = candidates.map(_.map(_.tweetId).toSet).getOrElse(Set.empty)
|
||||
if (params(FrsParams.FrsBasedCandidateGenerationEnableVisibilityFilteringParam))
|
||||
Future
|
||||
.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
candidates.map {
|
||||
// If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
_.filter(candidate => tweetInfos.getOrElse(candidate.tweetId, None).isDefined)
|
||||
}
|
||||
}
|
||||
else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hydrate the candidates with the FRS candidate sources and scores
|
||||
*/
|
||||
private def hydrateCandidates(
|
||||
frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]],
|
||||
candidates: Option[Seq[TweetWithAuthor]]
|
||||
): Future[Option[Seq[FrsTweet]]] = {
|
||||
Future.value {
|
||||
candidates.map {
|
||||
_.map { tweetWithAuthor =>
|
||||
val frsQueryResult = frsAuthorWithScores.flatMap(_.get(tweetWithAuthor.authorId))
|
||||
FrsTweet(
|
||||
tweetId = tweetWithAuthor.tweetId,
|
||||
authorId = tweetWithAuthor.authorId,
|
||||
frsPrimarySource = frsQueryResult.flatMap(_.primarySource),
|
||||
frsAuthorScore = frsQueryResult.map(_.score),
|
||||
frsCandidateSourceScores = frsQueryResult.flatMap { result =>
|
||||
result.sourceWithScores.map {
|
||||
_.collect {
|
||||
// see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
|
||||
// see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
|
||||
case (candidateSourceAlgoStr, score)
|
||||
if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains(
|
||||
candidateSourceAlgoStr) =>
|
||||
AlgorithmToFeedbackTokenMap.getOrElse(
|
||||
AlgorithmFeedbackTokens.TokenStrToAlgorithmMap
|
||||
.getOrElse(candidateSourceAlgoStr, DefaultAlgo),
|
||||
DefaultAlgoToken) -> score
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object FrsTweetCandidateGenerator {
|
||||
val DefaultAlgo: Algorithm.Value = Algorithm.Other
|
||||
// 9999 is the token for Algorithm.Other
|
||||
val DefaultAlgoToken: Int = AlgorithmToFeedbackTokenMap.getOrElse(DefaultAlgo, 9999)
|
||||
}
|
@ -0,0 +1,156 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RelatedTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
relatedTweetScribeLogger: RelatedTweetScribeLogger,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
|
||||
def get(
|
||||
query: RelatedTweetCandidateGeneratorQuery
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query)
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
} yield {
|
||||
filteredCandidates.headOption
|
||||
.getOrElse(
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedTweetCandidateGenerator results invalid")
|
||||
).take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
relatedTweetScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
query.internalId match {
|
||||
case InternalId.TweetId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case InternalId.UserId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
ProducerBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
|
||||
producerBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedTweetCandidateGenerator gets invalid InternalId")
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/***
|
||||
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
|
||||
* and apply VF filter based on TweetInfoStore
|
||||
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
|
||||
* instead of a Seq[Candidate] even though we only have a Seq in it.
|
||||
*/
|
||||
private def getCandidatesFromSimilarityEngine[QueryType](
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
fromParamsForRelatedTweet: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
/***
|
||||
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
|
||||
* and as a result, it will return Seq[Seq[InitialCandidate]]
|
||||
*/
|
||||
val engineQueries =
|
||||
Seq(fromParamsForRelatedTweet(query.internalId, query.params))
|
||||
|
||||
Future
|
||||
.collect {
|
||||
engineQueries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(
|
||||
candidates.toSeq.flatten
|
||||
)
|
||||
} yield prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
relatedTweetScribeLogger.scribePreRankFilterCandidates(
|
||||
query,
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates))
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
* This tweetInfo filter also acts as the VF filter
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,139 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RelatedVideoTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
|
||||
def get(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query)
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
} yield {
|
||||
filteredCandidates.headOption
|
||||
.getOrElse(
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedVideoTweetCandidateGenerator results invalid")
|
||||
).take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
query.internalId match {
|
||||
case InternalId.TweetId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedVideoTweet,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedVideoTweetCandidateGenerator gets invalid InternalId")
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
|
||||
* and apply VF filter based on TweetInfoStore
|
||||
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
|
||||
* instead of a Seq[Candidate] even though we only have a Seq in it.
|
||||
*/
|
||||
private def getCandidatesFromSimilarityEngine[QueryType](
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery,
|
||||
fromParamsForRelatedVideoTweet: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
/***
|
||||
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
|
||||
* and as a result, it will return Seq[Seq[InitialCandidate]]
|
||||
*/
|
||||
val engineQueries =
|
||||
Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params))
|
||||
|
||||
Future
|
||||
.collect {
|
||||
engineQueries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(
|
||||
candidates.toSeq.flatten
|
||||
)
|
||||
} yield prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates)
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
* This tweetInfo filter also acts as the VF filter
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,640 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.InterestedInParams
|
||||
import com.twitter.cr_mixer.param.SimClustersANNParams
|
||||
import com.twitter.cr_mixer.similarity_engine.EngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.base.CandidateSource
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
|
||||
/**
|
||||
* This store looks for similar tweets for a given UserId that generates UserInterestedIn
|
||||
* from SimClustersANN. It will be a standalone CandidateGeneration class moving forward.
|
||||
*
|
||||
* After the abstraction improvement (apply SimilarityEngine trait)
|
||||
* these CG will be subjected to change.
|
||||
*/
|
||||
@Singleton
|
||||
case class SimClustersInterestedInCandidateGeneration @Inject() (
|
||||
@Named(ModuleNames.SimClustersANNSimilarityEngine)
|
||||
simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
SimClustersANNSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
SimClustersInterestedInCandidateGeneration.Query,
|
||||
Seq[TweetWithCandidateGenerationInfo]
|
||||
] {
|
||||
|
||||
override def name: String = this.getClass.getSimpleName
|
||||
private val stats = statsReceiver.scope(name)
|
||||
private val fetchCandidatesStat = stats.scope("fetchCandidates")
|
||||
|
||||
override def get(
|
||||
query: SimClustersInterestedInCandidateGeneration.Query
|
||||
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
|
||||
|
||||
query.internalId match {
|
||||
case _: InternalId.UserId =>
|
||||
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
|
||||
// UserInterestedIn Queries
|
||||
val userInterestedInCandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInExperimentalSANNCandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN1CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN2CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN3CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN5CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN4CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
// UserNextInterestedIn Queries
|
||||
val userNextInterestedInCandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInExperimentalSANNCandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN1CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN2CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN3CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN5CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN4CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
// AddressBookInterestedIn Queries
|
||||
val userAddressBookInterestedInCandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookExperimentalSANNCandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN1CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN2CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN3CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN5CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN4CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
Future
|
||||
.collect(
|
||||
Seq(
|
||||
userInterestedInCandidateResultFut,
|
||||
userNextInterestedInCandidateResultFut,
|
||||
userAddressBookInterestedInCandidateResultFut,
|
||||
userInterestedInExperimentalSANNCandidateResultFut,
|
||||
userNextInterestedInExperimentalSANNCandidateResultFut,
|
||||
userAddressBookExperimentalSANNCandidateResultFut,
|
||||
userInterestedInSANN1CandidateResultFut,
|
||||
userNextInterestedInSANN1CandidateResultFut,
|
||||
userAddressBookSANN1CandidateResultFut,
|
||||
userInterestedInSANN2CandidateResultFut,
|
||||
userNextInterestedInSANN2CandidateResultFut,
|
||||
userAddressBookSANN2CandidateResultFut,
|
||||
userInterestedInSANN3CandidateResultFut,
|
||||
userNextInterestedInSANN3CandidateResultFut,
|
||||
userAddressBookSANN3CandidateResultFut,
|
||||
userInterestedInSANN5CandidateResultFut,
|
||||
userNextInterestedInSANN5CandidateResultFut,
|
||||
userAddressBookSANN5CandidateResultFut,
|
||||
userInterestedInSANN4CandidateResultFut,
|
||||
userNextInterestedInSANN4CandidateResultFut,
|
||||
userAddressBookSANN4CandidateResultFut
|
||||
)
|
||||
).map { candidateResults =>
|
||||
Some(
|
||||
candidateResults.map(candidateResult => candidateResult.getOrElse(Seq.empty))
|
||||
)
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
stats.counter("sourceId_is_not_userId_cnt").incr()
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
|
||||
private def simClustersCandidateMinScoreFilter(
|
||||
simClustersAnnCandidates: Seq[TweetWithScore],
|
||||
simClustersInterestedInMinScore: Double,
|
||||
simClustersANNConfigId: String
|
||||
): Seq[TweetWithScore] = {
|
||||
val filteredCandidates = simClustersAnnCandidates
|
||||
.filter { candidate =>
|
||||
candidate.score > simClustersInterestedInMinScore
|
||||
}
|
||||
|
||||
stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size)
|
||||
stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr()
|
||||
if (filteredCandidates.isEmpty)
|
||||
stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr()
|
||||
|
||||
filteredCandidates.map { candidate =>
|
||||
TweetWithScore(candidate.tweetId, candidate.score)
|
||||
}
|
||||
}
|
||||
|
||||
private def getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
SimClustersANNSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
simClustersInterestedInMinScore: Double,
|
||||
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
|
||||
val interestedInCandidatesFut =
|
||||
simClustersANNSimilarityEngine.getCandidates(simClustersANNQuery)
|
||||
|
||||
val interestedInCandidateResultFut = interestedInCandidatesFut.map { interestedInCandidates =>
|
||||
stats.stat("candidateSize").add(interestedInCandidates.size)
|
||||
|
||||
val embeddingCandidatesStat = stats.scope(
|
||||
simClustersANNQuery.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.name)
|
||||
|
||||
embeddingCandidatesStat.stat("candidateSize").add(interestedInCandidates.size)
|
||||
if (interestedInCandidates.isEmpty) {
|
||||
embeddingCandidatesStat.counter("empty_results").incr()
|
||||
}
|
||||
embeddingCandidatesStat.counter("requests").incr()
|
||||
|
||||
val filteredTweets = simClustersCandidateMinScoreFilter(
|
||||
interestedInCandidates.toSeq.flatten,
|
||||
simClustersInterestedInMinScore,
|
||||
simClustersANNQuery.storeQuery.simClustersANNConfigId)
|
||||
|
||||
val interestedInTweetsWithCGInfo = filteredTweets.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimClustersANNSimilarityEngine
|
||||
.toSimilarityEngineInfo(simClustersANNQuery, tweetWithScore.score),
|
||||
Seq.empty // SANN is an atomic SE, and hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
val interestedInResults = if (interestedInTweetsWithCGInfo.nonEmpty) {
|
||||
Some(interestedInTweetsWithCGInfo)
|
||||
} else None
|
||||
interestedInResults
|
||||
}
|
||||
interestedInCandidateResultFut
|
||||
}
|
||||
}
|
||||
|
||||
object SimClustersInterestedInCandidateGeneration {
|
||||
|
||||
case class Query(
|
||||
internalId: InternalId,
|
||||
enableUserInterestedIn: Boolean,
|
||||
enableUserNextInterestedIn: Boolean,
|
||||
enableAddressBookNextInterestedIn: Boolean,
|
||||
enableProdSimClustersANNSimilarityEngine: Boolean,
|
||||
enableExperimentalSimClustersANNSimilarityEngine: Boolean,
|
||||
enableSimClustersANN1SimilarityEngine: Boolean,
|
||||
enableSimClustersANN2SimilarityEngine: Boolean,
|
||||
enableSimClustersANN3SimilarityEngine: Boolean,
|
||||
enableSimClustersANN5SimilarityEngine: Boolean,
|
||||
enableSimClustersANN4SimilarityEngine: Boolean,
|
||||
simClustersInterestedInMinScore: Double,
|
||||
simClustersNextInterestedInMinScore: Double,
|
||||
simClustersAddressBookInterestedInMinScore: Double,
|
||||
interestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInExperimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInExperimentalSimClustersANNQuery: EngineQuery[
|
||||
SimClustersANNSimilarityEngine.Query
|
||||
],
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery: EngineQuery[
|
||||
SimClustersANNSimilarityEngine.Query
|
||||
],
|
||||
interestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
)
|
||||
|
||||
def fromParams(
|
||||
internalId: InternalId,
|
||||
params: configapi.Params,
|
||||
): Query = {
|
||||
// SimClusters common configs
|
||||
val simClustersModelVersion =
|
||||
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
|
||||
val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId)
|
||||
val experimentalSimClustersANNConfigId = params(
|
||||
SimClustersANNParams.ExperimentalSimClustersANNConfigId)
|
||||
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId)
|
||||
val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId)
|
||||
val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId)
|
||||
|
||||
val simClustersInterestedInMinScore = params(InterestedInParams.MinScoreParam)
|
||||
val simClustersNextInterestedInMinScore = params(
|
||||
InterestedInParams.MinScoreSequentialModelParam)
|
||||
val simClustersAddressBookInterestedInMinScore = params(
|
||||
InterestedInParams.MinScoreAddressBookParam)
|
||||
|
||||
// InterestedIn embeddings parameters
|
||||
val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam)
|
||||
val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam)
|
||||
val addressbookInterestedInEmbedding = params(
|
||||
InterestedInParams.AddressBookInterestedInEmbeddingIdParam)
|
||||
|
||||
// Prod SimClustersANN Query
|
||||
val interestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
// Experimental SANN cluster Query
|
||||
val interestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 1 Query
|
||||
val interestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 2 Query
|
||||
val interestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 3 Query
|
||||
val interestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 5 Query
|
||||
val interestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
// SimClusters ANN cluster 4 Query
|
||||
val interestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
Query(
|
||||
internalId = internalId,
|
||||
enableUserInterestedIn = params(InterestedInParams.EnableSourceParam),
|
||||
enableUserNextInterestedIn = params(InterestedInParams.EnableSourceSequentialModelParam),
|
||||
enableAddressBookNextInterestedIn = params(InterestedInParams.EnableSourceAddressBookParam),
|
||||
enableProdSimClustersANNSimilarityEngine =
|
||||
params(InterestedInParams.EnableProdSimClustersANNParam),
|
||||
enableExperimentalSimClustersANNSimilarityEngine =
|
||||
params(InterestedInParams.EnableExperimentalSimClustersANNParam),
|
||||
enableSimClustersANN1SimilarityEngine = params(InterestedInParams.EnableSimClustersANN1Param),
|
||||
enableSimClustersANN2SimilarityEngine = params(InterestedInParams.EnableSimClustersANN2Param),
|
||||
enableSimClustersANN3SimilarityEngine = params(InterestedInParams.EnableSimClustersANN3Param),
|
||||
enableSimClustersANN5SimilarityEngine = params(InterestedInParams.EnableSimClustersANN5Param),
|
||||
enableSimClustersANN4SimilarityEngine = params(InterestedInParams.EnableSimClustersANN4Param),
|
||||
simClustersInterestedInMinScore = simClustersInterestedInMinScore,
|
||||
simClustersNextInterestedInMinScore = simClustersNextInterestedInMinScore,
|
||||
simClustersAddressBookInterestedInMinScore = simClustersAddressBookInterestedInMinScore,
|
||||
interestedInSimClustersANNQuery = interestedInSimClustersANNQuery,
|
||||
nextInterestedInSimClustersANNQuery = nextInterestedInSimClustersANNQuery,
|
||||
addressbookInterestedInSimClustersANNQuery = addressbookInterestedInSimClustersANNQuery,
|
||||
interestedInExperimentalSimClustersANNQuery = interestedInExperimentalSimClustersANNQuery,
|
||||
nextInterestedInExperimentalSimClustersANNQuery =
|
||||
nextInterestedInExperimentalSimClustersANNQuery,
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery =
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery,
|
||||
interestedInSimClustersANN1Query = interestedInSimClustersANN1Query,
|
||||
nextInterestedInSimClustersANN1Query = nextInterestedInSimClustersANN1Query,
|
||||
addressbookInterestedInSimClustersANN1Query = addressbookInterestedInSimClustersANN1Query,
|
||||
interestedInSimClustersANN2Query = interestedInSimClustersANN2Query,
|
||||
nextInterestedInSimClustersANN2Query = nextInterestedInSimClustersANN2Query,
|
||||
addressbookInterestedInSimClustersANN2Query = addressbookInterestedInSimClustersANN2Query,
|
||||
interestedInSimClustersANN3Query = interestedInSimClustersANN3Query,
|
||||
nextInterestedInSimClustersANN3Query = nextInterestedInSimClustersANN3Query,
|
||||
addressbookInterestedInSimClustersANN3Query = addressbookInterestedInSimClustersANN3Query,
|
||||
interestedInSimClustersANN5Query = interestedInSimClustersANN5Query,
|
||||
nextInterestedInSimClustersANN5Query = nextInterestedInSimClustersANN5Query,
|
||||
addressbookInterestedInSimClustersANN5Query = addressbookInterestedInSimClustersANN5Query,
|
||||
interestedInSimClustersANN4Query = interestedInSimClustersANN4Query,
|
||||
nextInterestedInSimClustersANN4Query = nextInterestedInSimClustersANN4Query,
|
||||
addressbookInterestedInSimClustersANN4Query = addressbookInterestedInSimClustersANN4Query,
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,232 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TopicTweetWithScore
|
||||
import com.twitter.cr_mixer.param.TopicTweetParams
|
||||
import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweet
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.servo.util.MemoizingStatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Formerly CrTopic in legacy Content Recommender. This generator finds top Tweets per Topic.
|
||||
*/
|
||||
@Singleton
|
||||
class TopicTweetCandidateGenerator @Inject() (
|
||||
certoTopicTweetSimilarityEngine: CertoTopicTweetSimilarityEngine,
|
||||
skitTopicTweetSimilarityEngine: SkitTopicTweetSimilarityEngine,
|
||||
skitHighPrecisionTopicTweetSimilarityEngine: SkitHighPrecisionTopicTweetSimilarityEngine,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
private val timer = DefaultTimer
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val filterCandidatesStats = stats.scope("filterCandidates")
|
||||
private val tweetyPieFilteredStats = filterCandidatesStats.stat("tweetypie_filtered")
|
||||
private val memoizedStatsReceiver = new MemoizingStatsReceiver(stats)
|
||||
|
||||
def get(
|
||||
query: TopicTweetCandidateGeneratorQuery
|
||||
): Future[Map[Long, Seq[TopicTweet]]] = {
|
||||
val maxTweetAge = query.params(TopicTweetParams.MaxTweetAge)
|
||||
val product = query.product
|
||||
val allStats = memoizedStatsReceiver.scope("all")
|
||||
val perProductStats = memoizedStatsReceiver.scope("perProduct", product.name)
|
||||
StatsUtil.trackMapValueStats(allStats) {
|
||||
StatsUtil.trackMapValueStats(perProductStats) {
|
||||
val result = for {
|
||||
retrievedTweets <- fetchCandidates(query)
|
||||
initialTweetCandidates <- convertToInitialCandidates(retrievedTweets)
|
||||
filteredTweetCandidates <- filterCandidates(
|
||||
initialTweetCandidates,
|
||||
maxTweetAge,
|
||||
query.isVideoOnly,
|
||||
query.impressedTweetList)
|
||||
rankedTweetCandidates = rankCandidates(filteredTweetCandidates)
|
||||
hydratedTweetCandidates = hydrateCandidates(rankedTweetCandidates)
|
||||
} yield {
|
||||
hydratedTweetCandidates.map {
|
||||
case (topicId, topicTweets) =>
|
||||
val topKTweets = topicTweets.take(query.maxNumResults)
|
||||
topicId -> topKTweets
|
||||
}
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.topicTweetEndpointTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def fetchCandidates(
|
||||
query: TopicTweetCandidateGeneratorQuery
|
||||
): Future[Map[TopicId, Option[Seq[TopicTweetWithScore]]]] = {
|
||||
Future.collect {
|
||||
query.topicIds.map { topicId =>
|
||||
topicId -> StatsUtil.trackOptionStats(fetchCandidatesStats) {
|
||||
Future
|
||||
.join(
|
||||
certoTopicTweetSimilarityEngine.get(CertoTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params)),
|
||||
skitTopicTweetSimilarityEngine
|
||||
.get(SkitTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params)),
|
||||
skitHighPrecisionTopicTweetSimilarityEngine
|
||||
.get(SkitHighPrecisionTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params))
|
||||
).map {
|
||||
case (certoTopicTweets, skitTfgTopicTweets, skitHighPrecisionTopicTweets) =>
|
||||
val uniqueCandidates = (certoTopicTweets.getOrElse(Nil) ++
|
||||
skitTfgTopicTweets.getOrElse(Nil) ++
|
||||
skitHighPrecisionTopicTweets.getOrElse(Nil))
|
||||
.groupBy(_.tweetId).map {
|
||||
case (_, dupCandidates) => dupCandidates.head
|
||||
}.toSeq
|
||||
Some(uniqueCandidates)
|
||||
}
|
||||
}
|
||||
}.toMap
|
||||
}
|
||||
}
|
||||
|
||||
private def convertToInitialCandidates(
|
||||
candidatesMap: Map[TopicId, Option[Seq[TopicTweetWithScore]]]
|
||||
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
|
||||
val initialCandidates = candidatesMap.map {
|
||||
case (topicId, candidatesOpt) =>
|
||||
val candidates = candidatesOpt.getOrElse(Nil)
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
val numTweetsPreFilter = tweetIds.size
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/** *
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
val tweetyPieFilteredInitialCandidates = candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
similarityEngineType = candidate.similarityEngineType,
|
||||
modelId = None,
|
||||
score = Some(candidate.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
val numTweetsPostFilter = tweetyPieFilteredInitialCandidates.size
|
||||
tweetyPieFilteredStats.add(numTweetsPreFilter - numTweetsPostFilter)
|
||||
topicId -> tweetyPieFilteredInitialCandidates
|
||||
}
|
||||
}
|
||||
|
||||
Future.collect(initialCandidates.toSeq).map(_.toMap)
|
||||
}
|
||||
|
||||
private def filterCandidates(
|
||||
topicTweetMap: Map[TopicId, Seq[InitialCandidate]],
|
||||
maxTweetAge: Duration,
|
||||
isVideoOnly: Boolean,
|
||||
excludeTweetIds: Set[TweetId]
|
||||
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
|
||||
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
|
||||
|
||||
val filteredResults = topicTweetMap.map {
|
||||
case (topicId, tweetsWithScore) =>
|
||||
topicId -> StatsUtil.trackItemsStats(filterCandidatesStats) {
|
||||
|
||||
val timeFilteredTweets =
|
||||
tweetsWithScore.filter { tweetWithScore =>
|
||||
tweetWithScore.tweetId >= earliestTweetId && !excludeTweetIds.contains(
|
||||
tweetWithScore.tweetId)
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("exclude_and_time_filtered").add(tweetsWithScore.size - timeFilteredTweets.size)
|
||||
|
||||
val tweetNudityFilteredTweets =
|
||||
timeFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.isPassTweetMediaNudityTag.contains(true) => tweet
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("tweet_nudity_filtered").add(
|
||||
timeFilteredTweets.size - tweetNudityFilteredTweets.size)
|
||||
|
||||
val userNudityFilteredTweets =
|
||||
tweetNudityFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.isPassUserNudityRateStrict.contains(true) => tweet
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("user_nudity_filtered").add(
|
||||
tweetNudityFilteredTweets.size - userNudityFilteredTweets.size)
|
||||
|
||||
val videoFilteredTweets = {
|
||||
if (isVideoOnly) {
|
||||
userNudityFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.hasVideo.contains(true) => tweet
|
||||
}
|
||||
} else {
|
||||
userNudityFilteredTweets
|
||||
}
|
||||
}
|
||||
|
||||
Future.value(videoFilteredTweets)
|
||||
}
|
||||
}
|
||||
Future.collect(filteredResults)
|
||||
}
|
||||
|
||||
private def rankCandidates(
|
||||
tweetCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
|
||||
): Map[TopicId, Seq[InitialCandidate]] = {
|
||||
tweetCandidatesMap.mapValues { tweetCandidates =>
|
||||
tweetCandidates.sortBy { candidate =>
|
||||
-candidate.tweetInfo.favCount
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def hydrateCandidates(
|
||||
topicCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
|
||||
): Map[Long, Seq[TopicTweet]] = {
|
||||
topicCandidatesMap.map {
|
||||
case (topicId, tweetsWithScore) =>
|
||||
topicId.entityId ->
|
||||
tweetsWithScore.map { tweetWithScore =>
|
||||
val similarityEngineType: SimilarityEngineType =
|
||||
tweetWithScore.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
|
||||
TopicTweet(
|
||||
tweetId = tweetWithScore.tweetId,
|
||||
score = tweetWithScore.getSimilarityScore,
|
||||
similarityEngineType = similarityEngineType
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,179 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
|
||||
import com.twitter.cr_mixer.filter.UtegFilterRunner
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
|
||||
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class UtegTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
UserTweetEntityGraphSimilarityEngine.Query,
|
||||
TweetWithScoreAndSocialProof
|
||||
],
|
||||
utegTweetScribeLogger: UtegTweetScribeLogger,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
|
||||
utegFilterRunner: UtegFilterRunner,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSeedsStats = stats.scope("fetchSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val utegFilterStats = stats.scope("utegFilter")
|
||||
private val rankStats = stats.scope("rank")
|
||||
|
||||
def get(
|
||||
query: UtegTweetCandidateGeneratorQuery
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
|
||||
/**
|
||||
* The candidate we return in the end needs a social proof field, which isn't
|
||||
* supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof
|
||||
* instead.
|
||||
*
|
||||
* However, filters and light ranker expect Candidate-typed param to work. In order to minimise the
|
||||
* changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate
|
||||
* in this method.
|
||||
*/
|
||||
for {
|
||||
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchSeedsStats) {
|
||||
fetchSeeds(query)
|
||||
}
|
||||
initialTweets <- StatsUtil.trackItemsStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, realGraphSeeds)
|
||||
}
|
||||
initialCandidates <- convertToInitialCandidates(initialTweets)
|
||||
filteredCandidates <- StatsUtil.trackItemsStats(utegFilterStats) {
|
||||
utegFilter(query, initialCandidates)
|
||||
}
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
rankCandidates(query, filteredCandidates)
|
||||
}
|
||||
} yield {
|
||||
val topTweets = rankedCandidates.take(query.maxNumResults)
|
||||
convertToTweets(topTweets, initialTweets.map(tweet => tweet.tweetId -> tweet).toMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def utegFilter(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[InitialCandidate]
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
utegFilterRunner.runSequentialFilters(query, Seq(candidates)).map(_.flatten)
|
||||
}
|
||||
|
||||
private def fetchSeeds(
|
||||
query: UtegTweetCandidateGeneratorQuery
|
||||
): Future[Map[UserId, Double]] = {
|
||||
realGraphInSourceGraphFetcher
|
||||
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
|
||||
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
|
||||
}
|
||||
|
||||
private[candidate_generation] def rankCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
filteredCandidates: Seq[InitialCandidate],
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
val blendedCandidates = filteredCandidates.map(candidate =>
|
||||
candidate.toBlendedCandidate(Seq(candidate.candidateGenerationInfo)))
|
||||
|
||||
Future(
|
||||
blendedCandidates.map { candidate =>
|
||||
val score = candidate.getSimilarityScore
|
||||
candidate.toRankedCandidate(score)
|
||||
}
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
val engineQuery = UserTweetEntityGraphSimilarityEngine.fromParams(
|
||||
query.userId,
|
||||
realGraphSeeds,
|
||||
Some(query.impressedTweetList.toSeq),
|
||||
query.params
|
||||
)
|
||||
|
||||
utegTweetScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
userTweetEntityGraphSimilarityEngine.getCandidates(engineQuery).map(_.toSeq.flatten)
|
||||
)
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/** *
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
similarityEngineType = SimilarityEngineType.Uteg,
|
||||
modelId = None,
|
||||
score = Some(candidate.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToTweets(
|
||||
candidates: Seq[RankedCandidate],
|
||||
tweetMap: Map[TweetId, TweetWithScoreAndSocialProof]
|
||||
): Seq[TweetWithScoreAndSocialProof] = {
|
||||
candidates.map { candidate =>
|
||||
tweetMap
|
||||
.get(candidate.tweetId).map { tweet =>
|
||||
TweetWithScoreAndSocialProof(
|
||||
tweet.tweetId,
|
||||
candidate.predictionScore,
|
||||
tweet.socialProofByType
|
||||
)
|
||||
// The exception should never be thrown
|
||||
}.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets"))
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"configapi/configapi-core",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,473 @@
|
||||
package com.twitter.cr_mixer.config
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.exception.InvalidSANNConfigException
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
|
||||
import com.twitter.simclustersann.thriftscala.{SimClustersANNConfig => ThriftSimClustersANNConfig}
|
||||
import com.twitter.util.Duration
|
||||
|
||||
case class SimClustersANNConfig(
|
||||
maxNumResults: Int,
|
||||
minScore: Double,
|
||||
candidateEmbeddingType: EmbeddingType,
|
||||
maxTopTweetsPerCluster: Int,
|
||||
maxScanClusters: Int,
|
||||
maxTweetCandidateAge: Duration,
|
||||
minTweetCandidateAge: Duration,
|
||||
annAlgorithm: ScoringAlgorithm) {
|
||||
val toSANNConfigThrift: ThriftSimClustersANNConfig = ThriftSimClustersANNConfig(
|
||||
maxNumResults = maxNumResults,
|
||||
minScore = minScore,
|
||||
candidateEmbeddingType = candidateEmbeddingType,
|
||||
maxTopTweetsPerCluster = maxTopTweetsPerCluster,
|
||||
maxScanClusters = maxScanClusters,
|
||||
maxTweetCandidateAgeHours = maxTweetCandidateAge.inHours,
|
||||
minTweetCandidateAgeHours = minTweetCandidateAge.inHours,
|
||||
annAlgorithm = annAlgorithm,
|
||||
)
|
||||
}
|
||||
|
||||
object SimClustersANNConfig {
|
||||
|
||||
final val DefaultConfig = SimClustersANNConfig(
|
||||
maxNumResults = 200,
|
||||
minScore = 0.0,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
|
||||
maxTopTweetsPerCluster = 800,
|
||||
maxScanClusters = 50,
|
||||
maxTweetCandidateAge = 24.hours,
|
||||
minTweetCandidateAge = 0.hours,
|
||||
annAlgorithm = ScoringAlgorithm.CosineSimilarity,
|
||||
)
|
||||
|
||||
/*
|
||||
SimClustersANNConfigId: String
|
||||
Format: Prod - “EmbeddingType_ModelVersion_Default”
|
||||
Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD
|
||||
*/
|
||||
|
||||
private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val FavBasedProducer_Model20m145k2020_20220617_06 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val FavBasedProducer_Model20m145k2020_20220801 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220810 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220818 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220819 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val FavBasedProducer_Model20m145k2020_20221221 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val FavBasedProducer_Model20m145k2020_20221220 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
// SANN-4 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220617_06 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220801 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_20220617_06.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220810 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220818 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220819 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20221221 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20221220 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default =
|
||||
DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220617_06 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220801 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220810 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220818 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220819 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20221221 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20221220 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow
|
||||
private val FollowBasedProducer_Model20m145k2020_Default =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy()
|
||||
|
||||
// Experimental SANN config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220801 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220810 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220818 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220819 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20221221 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20221220 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map(
|
||||
"FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default,
|
||||
"FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06,
|
||||
"FavBasedProducer_Model20m145k2020_20220801" -> FavBasedProducer_Model20m145k2020_20220801,
|
||||
"FavBasedProducer_Model20m145k2020_20220810" -> FavBasedProducer_Model20m145k2020_20220810,
|
||||
"FavBasedProducer_Model20m145k2020_20220818" -> FavBasedProducer_Model20m145k2020_20220818,
|
||||
"FavBasedProducer_Model20m145k2020_20220819" -> FavBasedProducer_Model20m145k2020_20220819,
|
||||
"FavBasedProducer_Model20m145k2020_20221221" -> FavBasedProducer_Model20m145k2020_20221221,
|
||||
"FavBasedProducer_Model20m145k2020_20221220" -> FavBasedProducer_Model20m145k2020_20221220,
|
||||
"FollowBasedProducer_Model20m145k2020_Default" -> FollowBasedProducer_Model20m145k2020_Default,
|
||||
"FollowBasedProducer_Model20m145k2020_20220801" -> FollowBasedProducer_Model20m145k2020_20220801,
|
||||
"FollowBasedProducer_Model20m145k2020_20220810" -> FollowBasedProducer_Model20m145k2020_20220810,
|
||||
"FollowBasedProducer_Model20m145k2020_20220818" -> FollowBasedProducer_Model20m145k2020_20220818,
|
||||
"FollowBasedProducer_Model20m145k2020_20220819" -> FollowBasedProducer_Model20m145k2020_20220819,
|
||||
"FollowBasedProducer_Model20m145k2020_20221221" -> FollowBasedProducer_Model20m145k2020_20221221,
|
||||
"FollowBasedProducer_Model20m145k2020_20221220" -> FollowBasedProducer_Model20m145k2020_20221220,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_Default" -> UnfilteredUserInterestedIn_Model20m145k2020_Default,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220617_06" -> UnfilteredUserInterestedIn_Model20m145k2020_20220617_06,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220801" -> UnfilteredUserInterestedIn_Model20m145k2020_20220801,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220810" -> UnfilteredUserInterestedIn_Model20m145k2020_20220810,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220818" -> UnfilteredUserInterestedIn_Model20m145k2020_20220818,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220819" -> UnfilteredUserInterestedIn_Model20m145k2020_20220819,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20221221" -> UnfilteredUserInterestedIn_Model20m145k2020_20221221,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20221220" -> UnfilteredUserInterestedIn_Model20m145k2020_20221220,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220,
|
||||
"UserNextInterestedIn_Model20m145k2020_Default" -> UserNextInterestedIn_Model20m145k2020_Default,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220617_06" -> UserNextInterestedIn_Model20m145k2020_20220617_06,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220801" -> UserNextInterestedIn_Model20m145k2020_20220801,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220810" -> UserNextInterestedIn_Model20m145k2020_20220810,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220818" -> UserNextInterestedIn_Model20m145k2020_20220818,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220819" -> UserNextInterestedIn_Model20m145k2020_20220819,
|
||||
"UserNextInterestedIn_Model20m145k2020_20221221" -> UserNextInterestedIn_Model20m145k2020_20221221,
|
||||
"UserNextInterestedIn_Model20m145k2020_20221220" -> UserNextInterestedIn_Model20m145k2020_20221220,
|
||||
)
|
||||
|
||||
def getConfig(
|
||||
embeddingType: String,
|
||||
modelVersion: String,
|
||||
id: String
|
||||
): SimClustersANNConfig = {
|
||||
val configName = embeddingType + "_" + modelVersion + "_" + id
|
||||
DefaultConfigMappings.get(configName) match {
|
||||
case Some(config) => config
|
||||
case None =>
|
||||
throw InvalidSANNConfigException(s"Incorrect config id passed in for SANN $configName")
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package com.twitter.cr_mixer.config
|
||||
|
||||
import com.twitter.util.Duration
|
||||
|
||||
case class TimeoutConfig(
|
||||
/* Default timeouts for candidate generator */
|
||||
serviceTimeout: Duration,
|
||||
signalFetchTimeout: Duration,
|
||||
similarityEngineTimeout: Duration,
|
||||
annServiceClientTimeout: Duration,
|
||||
/* For Uteg Candidate Generator */
|
||||
utegSimilarityEngineTimeout: Duration,
|
||||
/* For User State Store */
|
||||
userStateUnderlyingStoreTimeout: Duration,
|
||||
userStateStoreTimeout: Duration,
|
||||
/* For FRS based tweets */
|
||||
// Timeout passed to EarlyBird server
|
||||
earlybirdServerTimeout: Duration,
|
||||
// Timeout set on CrMixer side
|
||||
earlybirdSimilarityEngineTimeout: Duration,
|
||||
frsBasedTweetEndpointTimeout: Duration,
|
||||
topicTweetEndpointTimeout: Duration,
|
||||
// Timeout Settings for Navi gRPC Client
|
||||
naviRequestTimeout: Duration)
|
@ -0,0 +1,48 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/debug",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"finagle/finagle-base-http/src/main",
|
||||
"finagle/finagle-core/src/main",
|
||||
"finagle/finagle-http/src/main/scala",
|
||||
"finatra/http-server/src/main/scala/com/twitter/finatra/http:controller",
|
||||
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/ads/schema:common-scala",
|
||||
"src/thrift/com/twitter/context:twitter-context-scala",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/timelines/render:thrift-scala",
|
||||
"src/thrift/com/twitter/timelines/timeline_logging:thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"stringcenter/client",
|
||||
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview",
|
||||
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview/funnelseries",
|
||||
"twitter-context/src/main/scala",
|
||||
"user-signal-service/thrift/src/main/thrift:thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,757 @@
|
||||
package com.twitter.cr_mixer.controller
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.candidate_generation.AdsCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.CrCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.FrsTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.RelatedTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.RelatedVideoTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.TopicTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.UtegTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.featureswitch.ParamsBuilder
|
||||
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
|
||||
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeMetadata
|
||||
import com.twitter.cr_mixer.logging.ScribeMetadata
|
||||
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.AdsParams
|
||||
import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.RelatedTweetGlobalParams
|
||||
import com.twitter.cr_mixer.param.RelatedVideoTweetGlobalParams
|
||||
import com.twitter.cr_mixer.param.TopicTweetParams
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.param.decider.EndpointLoadShedder
|
||||
import com.twitter.cr_mixer.thriftscala.AdTweetRecommendation
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRequest
|
||||
import com.twitter.cr_mixer.thriftscala.AdsResponse
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweet
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweet
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweet
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.TweetRecommendation
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweet
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
|
||||
import com.twitter.cr_mixer.util.MetricTagUtil
|
||||
import com.twitter.cr_mixer.util.SignalTimestampStatsUtil
|
||||
import com.twitter.cr_mixer.{thriftscala => t}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finatra.thrift.Controller
|
||||
import com.twitter.hermit.store.common.ReadableWritableStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.timeline_logging.{thriftscala => thriftlog}
|
||||
import com.twitter.timelines.tracing.lensview.funnelseries.TweetScoreFunnelSeries
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import java.util.UUID
|
||||
import javax.inject.Inject
|
||||
import org.apache.commons.lang.exception.ExceptionUtils
|
||||
|
||||
class CrMixerThriftController @Inject() (
|
||||
crCandidateGenerator: CrCandidateGenerator,
|
||||
relatedTweetCandidateGenerator: RelatedTweetCandidateGenerator,
|
||||
relatedVideoTweetCandidateGenerator: RelatedVideoTweetCandidateGenerator,
|
||||
utegTweetCandidateGenerator: UtegTweetCandidateGenerator,
|
||||
frsTweetCandidateGenerator: FrsTweetCandidateGenerator,
|
||||
topicTweetCandidateGenerator: TopicTweetCandidateGenerator,
|
||||
crMixerScribeLogger: CrMixerScribeLogger,
|
||||
relatedTweetScribeLogger: RelatedTweetScribeLogger,
|
||||
utegTweetScribeLogger: UtegTweetScribeLogger,
|
||||
adsRecommendationsScribeLogger: AdsRecommendationsScribeLogger,
|
||||
adsCandidateGenerator: AdsCandidateGenerator,
|
||||
decider: CrMixerDecider,
|
||||
paramsBuilder: ParamsBuilder,
|
||||
endpointLoadShedder: EndpointLoadShedder,
|
||||
signalTimestampStatsUtil: SignalTimestampStatsUtil,
|
||||
tweetRecommendationResultsStore: ReadableWritableStore[UserId, CrMixerTweetResponse],
|
||||
userStateStore: ReadableStore[UserId, UserState],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends Controller(t.CrMixer) {
|
||||
|
||||
lazy private val tweetScoreFunnelSeries = new TweetScoreFunnelSeries(statsReceiver)
|
||||
|
||||
private def logErrMessage(endpoint: String, e: Throwable): Unit = {
|
||||
val msg = Seq(
|
||||
s"Failed endpoint $endpoint: ${e.getLocalizedMessage}",
|
||||
ExceptionUtils.getStackTrace(e)
|
||||
).mkString("\n")
|
||||
|
||||
/** *
|
||||
* We chose logger.info() here to print message instead of logger.error since that
|
||||
* logger.error sometimes suppresses detailed stacktrace.
|
||||
*/
|
||||
logger.info(msg)
|
||||
}
|
||||
|
||||
private def generateRequestUUID(): Long = {
|
||||
|
||||
/** *
|
||||
* We generate unique UUID via bitwise operations. See the below link for more:
|
||||
* https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid
|
||||
*/
|
||||
UUID.randomUUID().getMostSignificantBits & Long.MaxValue
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args =>
|
||||
val endpointName = "getTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val userId = args.request.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val queryFut = buildCrCandidateGeneratorQuery(args.request, requestUUID, userId)
|
||||
queryFut.flatMap { query =>
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
|
||||
val response = crCandidateGenerator.get(query)
|
||||
|
||||
val blueVerifiedScribedResponse = response.flatMap { rankedCandidates =>
|
||||
val hasBlueVerifiedCandidate = rankedCandidates.exists { tweet =>
|
||||
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
|
||||
}
|
||||
|
||||
if (hasBlueVerifiedCandidate) {
|
||||
crMixerScribeLogger.scribeGetTweetRecommendationsForBlueVerified(
|
||||
scribeMetadata,
|
||||
response)
|
||||
} else {
|
||||
response
|
||||
}
|
||||
}
|
||||
|
||||
val thriftResponse = blueVerifiedScribedResponse.map { candidates =>
|
||||
if (query.product == t.Product.Home) {
|
||||
scribeTweetScoreFunnelSeries(candidates)
|
||||
}
|
||||
buildThriftResponse(candidates)
|
||||
}
|
||||
|
||||
cacheTweetRecommendationResults(args.request, thriftResponse)
|
||||
|
||||
crMixerScribeLogger.scribeGetTweetRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
thriftResponse)
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(CrMixerTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(CrMixerTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** *
|
||||
* GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially
|
||||
* doing very similar things, except that one passes in TweetId which calls TweetBased engine,
|
||||
* and the other passes in AuthorId which calls ProducerBased engine.
|
||||
*/
|
||||
handle(t.CrMixer.GetRelatedTweetsForQueryTweet) {
|
||||
args: t.CrMixer.GetRelatedTweetsForQueryTweet.Args =>
|
||||
val endpointName = "getRelatedTweetsForQueryTweet"
|
||||
getRelatedTweets(endpointName, args.request)
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetRelatedVideoTweetsForQueryTweet) {
|
||||
args: t.CrMixer.GetRelatedVideoTweetsForQueryTweet.Args =>
|
||||
val endpointName = "getRelatedVideoTweetsForQueryVideoTweet"
|
||||
getRelatedVideoTweets(endpointName, args.request)
|
||||
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetRelatedTweetsForQueryAuthor) {
|
||||
args: t.CrMixer.GetRelatedTweetsForQueryAuthor.Args =>
|
||||
val endpointName = "getRelatedTweetsForQueryAuthor"
|
||||
getRelatedTweets(endpointName, args.request)
|
||||
}
|
||||
|
||||
private def getRelatedTweets(
|
||||
endpointName: String,
|
||||
request: RelatedTweetRequest
|
||||
): Future[RelatedTweetResponse] = {
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val queryFut = buildRelatedTweetQuery(request, requestUUID)
|
||||
|
||||
queryFut.flatMap { query =>
|
||||
val relatedTweetScribeMetadata = RelatedTweetScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
relatedTweetScribeLogger.scribeGetRelatedTweets(
|
||||
request,
|
||||
startTime,
|
||||
relatedTweetScribeMetadata,
|
||||
relatedTweetCandidateGenerator
|
||||
.get(query)
|
||||
.map(buildRelatedTweetResponse))
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(RelatedTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(RelatedTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def getRelatedVideoTweets(
|
||||
endpointName: String,
|
||||
request: RelatedVideoTweetRequest
|
||||
): Future[RelatedVideoTweetResponse] = {
|
||||
val requestUUID = generateRequestUUID()
|
||||
val queryFut = buildRelatedVideoTweetQuery(request, requestUUID)
|
||||
|
||||
queryFut.flatMap { query =>
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
relatedVideoTweetCandidateGenerator.get(query).map { initialCandidateSeq =>
|
||||
buildRelatedVideoTweetResponse(initialCandidateSeq)
|
||||
}
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(RelatedVideoTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(RelatedVideoTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetFrsBasedTweetRecommendations) {
|
||||
args: t.CrMixer.GetFrsBasedTweetRecommendations.Args =>
|
||||
val endpointName = "getFrsBasedTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val queryFut = buildFrsBasedTweetQuery(args.request, requestUUID)
|
||||
queryFut.flatMap { query =>
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
frsTweetCandidateGenerator.get(query).map(FrsTweetResponse(_))
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(FrsTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetTopicTweetRecommendations) {
|
||||
args: t.CrMixer.GetTopicTweetRecommendations.Args =>
|
||||
val endpointName = "getTopicTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val query = buildTopicTweetQuery(args.request, requestUUID)
|
||||
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
topicTweetCandidateGenerator.get(query).map(TopicTweetResponse(_))
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(TopicTweetResponse(Map.empty[Long, Seq[TopicTweet]]))
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetUtegTweetRecommendations) {
|
||||
args: t.CrMixer.GetUtegTweetRecommendations.Args =>
|
||||
val endpointName = "getUtegTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val queryFut = buildUtegTweetQuery(args.request, requestUUID)
|
||||
queryFut
|
||||
.flatMap { query =>
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
utegTweetScribeLogger.scribeGetUtegTweetRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
utegTweetCandidateGenerator
|
||||
.get(query)
|
||||
.map(buildUtegTweetResponse)
|
||||
)
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(UtegTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetAdsRecommendations) { args: t.CrMixer.GetAdsRecommendations.Args =>
|
||||
val endpointName = "getAdsRecommendations"
|
||||
val queryFut = buildAdsCandidateGeneratorQuery(args.request)
|
||||
val startTime = Time.now.inMilliseconds
|
||||
queryFut.flatMap { query =>
|
||||
{
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
val response = adsCandidateGenerator
|
||||
.get(query).map { candidates =>
|
||||
buildAdsResponse(candidates)
|
||||
}
|
||||
adsRecommendationsScribeLogger.scribeGetAdsRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
response,
|
||||
query.params(AdsParams.EnableScribe)
|
||||
)
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(AdsResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildCrCandidateGeneratorQuery(
|
||||
thriftRequest: CrMixerTweetRequest,
|
||||
requestUUID: Long,
|
||||
userId: Long
|
||||
): Future[CrCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("CrMixerTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(9999)
|
||||
case (t.Product.Notifications, Some(t.ProductContext.NotificationsContext(cxt))) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.Email, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.ImmersiveMediaViewer, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.VideoCarousel, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
CrCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID,
|
||||
languageCode = thriftRequest.clientContext.languageCode
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedTweetQuery(
|
||||
thriftRequest: RelatedTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[RelatedTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("RelatedTweetRequest")
|
||||
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
|
||||
case Some(userId) => userStateStore.get(userId)
|
||||
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
|
||||
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
|
||||
|
||||
userStateFut.map { userState =>
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
// Currently, Home takes 10, and RUX takes 100
|
||||
val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam)
|
||||
|
||||
RelatedTweetCandidateGeneratorQuery(
|
||||
internalId = thriftRequest.internalId,
|
||||
clientContext = thriftRequest.clientContext,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildAdsCandidateGeneratorQuery(
|
||||
thriftRequest: AdsRequest
|
||||
): Future[AdsCandidateGeneratorQuery] = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val requestUUID = generateRequestUUID()
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
val maxNumResults = params(AdsParams.AdsCandidateGenerationMaxCandidatesNumParam)
|
||||
AdsCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
params = params,
|
||||
maxNumResults = maxNumResults,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedVideoTweetQuery(
|
||||
thriftRequest: RelatedVideoTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[RelatedVideoTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("RelatedVideoTweetRequest")
|
||||
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
|
||||
case Some(userId) => userStateStore.get(userId)
|
||||
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
|
||||
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
|
||||
|
||||
userStateFut.map { userState =>
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
|
||||
val maxNumResults = params(RelatedVideoTweetGlobalParams.MaxCandidatesPerRequestParam)
|
||||
|
||||
RelatedVideoTweetCandidateGeneratorQuery(
|
||||
internalId = thriftRequest.internalId,
|
||||
clientContext = thriftRequest.clientContext,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildUtegTweetQuery(
|
||||
thriftRequest: UtegTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[UtegTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("UtegTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(9999)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
UtegTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildTopicTweetQuery(
|
||||
thriftRequest: TopicTweetRequest,
|
||||
requestUUID: Long
|
||||
): TopicTweetCandidateGeneratorQuery = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in the TopicTweetRequest clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val isVideoOnly = (product, productContext) match {
|
||||
case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) =>
|
||||
context.isVideoOnly
|
||||
case (t.Product.TopicLandingPage, None) =>
|
||||
false
|
||||
case (t.Product.HomeTopicsBackfill, None) =>
|
||||
false
|
||||
case (t.Product.TopicTweetsStrato, None) =>
|
||||
false
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
statsReceiver.scope(product.toString).counter(TopicTweetRequest.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
product,
|
||||
UserState.EnumUnknownUserState(100)
|
||||
)
|
||||
|
||||
val topicIds = thriftRequest.topicIds.map { topicId =>
|
||||
TopicId(
|
||||
entityId = topicId,
|
||||
language = thriftRequest.clientContext.languageCode,
|
||||
country = None
|
||||
)
|
||||
}.toSet
|
||||
|
||||
TopicTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
topicIds = topicIds,
|
||||
product = product,
|
||||
maxNumResults = params(TopicTweetParams.MaxTopicTweetCandidatesParam),
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID,
|
||||
isVideoOnly = isVideoOnly
|
||||
)
|
||||
}
|
||||
|
||||
private def buildFrsBasedTweetQuery(
|
||||
thriftRequest: FrsTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[FrsTweetCandidateGeneratorQuery] = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in the FrsTweetRequest clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("FrsTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(
|
||||
params(FrsBasedCandidateGenerationMaxCandidatesNumParam))
|
||||
case _ =>
|
||||
params(FrsBasedCandidateGenerationMaxCandidatesNumParam)
|
||||
}
|
||||
|
||||
FrsTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
impressedUserList = thriftRequest.excludedUserIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
languageCodeOpt = thriftRequest.clientContext.languageCode,
|
||||
countryCodeOpt = thriftRequest.clientContext.countryCode,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildThriftResponse(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): CrMixerTweetResponse = {
|
||||
|
||||
val tweets = candidates.map { candidate =>
|
||||
TweetRecommendation(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.predictionScore,
|
||||
metricTags = Some(MetricTagUtil.buildMetricTags(candidate)),
|
||||
latestSourceSignalTimestampInMillis =
|
||||
SignalTimestampStatsUtil.buildLatestSourceSignalTimestamp(candidate)
|
||||
)
|
||||
}
|
||||
signalTimestampStatsUtil.statsSignalTimestamp(tweets)
|
||||
CrMixerTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def scribeTweetScoreFunnelSeries(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Seq[RankedCandidate] = {
|
||||
// 202210210901 is a random number for code search of Lensview
|
||||
tweetScoreFunnelSeries.startNewSpan(
|
||||
name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType",
|
||||
codePtr = 202210210901L) {
|
||||
(
|
||||
candidates,
|
||||
candidates.map { candidate =>
|
||||
thriftlog.TweetDimensionMeasure(
|
||||
dimension = Some(
|
||||
thriftlog
|
||||
.RequestTweetDimension(
|
||||
candidate.tweetId,
|
||||
candidate.reasonChosen.similarityEngineInfo.similarityEngineType.value)),
|
||||
measure = Some(thriftlog.RequestTweetMeasure(candidate.predictionScore))
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedTweetResponse(candidates: Seq[InitialCandidate]): RelatedTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
RelatedTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
authorId = Some(candidate.tweetInfo.authorId)
|
||||
)
|
||||
}
|
||||
RelatedTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildRelatedVideoTweetResponse(
|
||||
candidates: Seq[InitialCandidate]
|
||||
): RelatedVideoTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
RelatedVideoTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = Some(candidate.getSimilarityScore)
|
||||
)
|
||||
}
|
||||
RelatedVideoTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildUtegTweetResponse(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof]
|
||||
): UtegTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
UtegTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.score,
|
||||
socialProofByType = candidate.socialProofByType
|
||||
)
|
||||
}
|
||||
UtegTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildAdsResponse(
|
||||
candidates: Seq[RankedAdsCandidate]
|
||||
): AdsResponse = {
|
||||
AdsResponse(ads = candidates.map { candidate =>
|
||||
AdTweetRecommendation(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.predictionScore,
|
||||
lineItems = Some(candidate.lineItemInfo))
|
||||
})
|
||||
}
|
||||
|
||||
private def cacheTweetRecommendationResults(
|
||||
request: CrMixerTweetRequest,
|
||||
response: Future[CrMixerTweetResponse]
|
||||
): Unit = {
|
||||
|
||||
val userId = request.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in getTweetRecommendations() Thrift clientContext"))
|
||||
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.getTweetRecommendationsCacheRate)) {
|
||||
response.map { crMixerTweetResponse =>
|
||||
{
|
||||
(
|
||||
request.product,
|
||||
request.clientContext.userId,
|
||||
crMixerTweetResponse.tweets.nonEmpty) match {
|
||||
case (t.Product.Home, Some(userId), true) =>
|
||||
tweetRecommendationResultsStore.put((userId, crMixerTweetResponse))
|
||||
case _ => Future.value(Unit)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [],
|
||||
)
|
@ -0,0 +1,4 @@
|
||||
package com.twitter.cr_mixer
|
||||
package exception
|
||||
|
||||
case class InvalidSANNConfigException(msg: String) extends Exception(msg)
|
@ -0,0 +1,35 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"abdecider/src/main/scala",
|
||||
"configapi/configapi-abdecider",
|
||||
"configapi/configapi-core",
|
||||
"configapi/configapi-featureswitches:v2",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
|
||||
"featureswitches/featureswitches-core",
|
||||
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,79 @@
|
||||
package com.twitter.cr_mixer
|
||||
package featureswitch
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.abdecider.LoggingABDecider
|
||||
import com.twitter.abdecider.Recipient
|
||||
import com.twitter.abdecider.Bucket
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.util.Local
|
||||
import scala.collection.concurrent.{Map => ConcurrentMap}
|
||||
|
||||
/**
|
||||
* Wraps a LoggingABDecider, so all impressed buckets are recorded to a 'LocalContext' on a given request.
|
||||
*
|
||||
* Contexts (https://twitter.github.io/finagle/guide/Contexts.html) are Finagle's mechanism for
|
||||
* storing state/variables without having to pass these variables all around the request.
|
||||
*
|
||||
* In order for this class to be used the [[SetImpressedBucketsLocalContextFilter]] must be applied
|
||||
* at the beginning of the request, to initialize a concurrent map used to store impressed buckets.
|
||||
*
|
||||
* Whenever we get an a/b impression, the bucket information is logged to the concurrent hashmap.
|
||||
*/
|
||||
case class CrMixerLoggingABDecider(
|
||||
loggingAbDecider: LoggingABDecider,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends LoggingABDecider {
|
||||
|
||||
private val scopedStatsReceiver = statsReceiver.scope("cr_logging_ab_decider")
|
||||
|
||||
override def impression(
|
||||
experimentName: String,
|
||||
recipient: Recipient
|
||||
): Option[Bucket] = {
|
||||
|
||||
StatsUtil.trackNonFutureBlockStats(scopedStatsReceiver.scope("log_impression")) {
|
||||
val maybeBuckets = loggingAbDecider.impression(experimentName, recipient)
|
||||
maybeBuckets.foreach { b =>
|
||||
scopedStatsReceiver.counter("impressions").incr()
|
||||
CrMixerImpressedBuckets.recordImpressedBucket(b)
|
||||
}
|
||||
maybeBuckets
|
||||
}
|
||||
}
|
||||
|
||||
override def track(
|
||||
experimentName: String,
|
||||
eventName: String,
|
||||
recipient: Recipient
|
||||
): Unit = {
|
||||
loggingAbDecider.track(experimentName, eventName, recipient)
|
||||
}
|
||||
|
||||
override def bucket(
|
||||
experimentName: String,
|
||||
recipient: Recipient
|
||||
): Option[Bucket] = {
|
||||
loggingAbDecider.bucket(experimentName, recipient)
|
||||
}
|
||||
|
||||
override def experiments: Seq[String] = loggingAbDecider.experiments
|
||||
|
||||
override def experiment(experimentName: String) =
|
||||
loggingAbDecider.experiment(experimentName)
|
||||
}
|
||||
|
||||
object CrMixerImpressedBuckets {
|
||||
private[featureswitch] val localImpressedBucketsMap = new Local[ConcurrentMap[Bucket, Boolean]]
|
||||
|
||||
/**
|
||||
* Gets all impressed buckets for this request.
|
||||
**/
|
||||
def getAllImpressedBuckets: Option[List[Bucket]] = {
|
||||
localImpressedBucketsMap.apply().map(_.map { case (k, _) => k }.toList)
|
||||
}
|
||||
|
||||
private[featureswitch] def recordImpressedBucket(bucket: Bucket) = {
|
||||
localImpressedBucketsMap().foreach { m => m += bucket -> true }
|
||||
}
|
||||
}
|
@ -0,0 +1,151 @@
|
||||
package com.twitter.cr_mixer.featureswitch
|
||||
|
||||
import com.twitter.abdecider.LoggingABDecider
|
||||
import com.twitter.abdecider.UserRecipient
|
||||
import com.twitter.cr_mixer.{thriftscala => t}
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.discovery.common.configapi.FeatureContextBuilder
|
||||
import com.twitter.featureswitches.FSRecipient
|
||||
import com.twitter.featureswitches.UserAgent
|
||||
import com.twitter.featureswitches.{Recipient => FeatureSwitchRecipient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.timelines.configapi.Config
|
||||
import com.twitter.timelines.configapi.FeatureValue
|
||||
import com.twitter.timelines.configapi.ForcedFeatureContext
|
||||
import com.twitter.timelines.configapi.OrElseFeatureContext
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.timelines.configapi.RequestContext
|
||||
import com.twitter.timelines.configapi.abdecider.LoggingABDeciderExperimentContext
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/** Singleton object for building [[Params]] to override */
|
||||
@Singleton
|
||||
class ParamsBuilder @Inject() (
|
||||
globalStats: StatsReceiver,
|
||||
abDecider: LoggingABDecider,
|
||||
featureContextBuilder: FeatureContextBuilder,
|
||||
config: Config) {
|
||||
|
||||
private val stats = globalStats.scope("params")
|
||||
|
||||
def buildFromClientContext(
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState,
|
||||
userRoleOverride: Option[Set[String]] = None,
|
||||
featureOverrides: Map[String, FeatureValue] = Map.empty,
|
||||
): Params = {
|
||||
clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
val userRecipient = buildFeatureSwitchRecipient(
|
||||
userId,
|
||||
userRoleOverride,
|
||||
clientContext,
|
||||
product,
|
||||
userState
|
||||
)
|
||||
|
||||
val featureContext = OrElseFeatureContext(
|
||||
ForcedFeatureContext(featureOverrides),
|
||||
featureContextBuilder(
|
||||
Some(userId),
|
||||
Some(userRecipient)
|
||||
))
|
||||
|
||||
config(
|
||||
requestContext = RequestContext(
|
||||
userId = Some(userId),
|
||||
experimentContext = LoggingABDeciderExperimentContext(
|
||||
abDecider,
|
||||
Some(UserRecipient(userId, Some(userId)))),
|
||||
featureContext = featureContext
|
||||
),
|
||||
stats
|
||||
)
|
||||
case None =>
|
||||
val guestRecipient =
|
||||
buildFeatureSwitchRecipientWithGuestId(clientContext: ClientContext, product, userState)
|
||||
|
||||
val featureContext = OrElseFeatureContext(
|
||||
ForcedFeatureContext(featureOverrides),
|
||||
featureContextBuilder(
|
||||
clientContext.userId,
|
||||
Some(guestRecipient)
|
||||
)
|
||||
) //ExperimentContext with GuestRecipient is not supported as there is no active use-cases yet in CrMixer
|
||||
|
||||
config(
|
||||
requestContext = RequestContext(
|
||||
userId = clientContext.userId,
|
||||
featureContext = featureContext
|
||||
),
|
||||
stats
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildFeatureSwitchRecipientWithGuestId(
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState
|
||||
): FeatureSwitchRecipient = {
|
||||
|
||||
val recipient = FSRecipient(
|
||||
userId = None,
|
||||
userRoles = None,
|
||||
deviceId = clientContext.deviceId,
|
||||
guestId = clientContext.guestId,
|
||||
languageCode = clientContext.languageCode,
|
||||
countryCode = clientContext.countryCode,
|
||||
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
|
||||
isVerified = None,
|
||||
isTwoffice = None,
|
||||
tooClient = None,
|
||||
highWaterMark = None
|
||||
)
|
||||
|
||||
recipient.withCustomFields(
|
||||
(ParamsBuilder.ProductCustomField, product.toString),
|
||||
(ParamsBuilder.UserStateCustomField, userState.toString)
|
||||
)
|
||||
}
|
||||
|
||||
private def buildFeatureSwitchRecipient(
|
||||
userId: Long,
|
||||
userRolesOverride: Option[Set[String]],
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState
|
||||
): FeatureSwitchRecipient = {
|
||||
val userRoles = userRolesOverride match {
|
||||
case Some(overrides) => Some(overrides)
|
||||
case _ => clientContext.userRoles.map(_.toSet)
|
||||
}
|
||||
|
||||
val recipient = FSRecipient(
|
||||
userId = Some(userId),
|
||||
userRoles = userRoles,
|
||||
deviceId = clientContext.deviceId,
|
||||
guestId = clientContext.guestId,
|
||||
languageCode = clientContext.languageCode,
|
||||
countryCode = clientContext.countryCode,
|
||||
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
|
||||
isVerified = None,
|
||||
isTwoffice = None,
|
||||
tooClient = None,
|
||||
highWaterMark = None
|
||||
)
|
||||
|
||||
recipient.withCustomFields(
|
||||
(ParamsBuilder.ProductCustomField, product.toString),
|
||||
(ParamsBuilder.UserStateCustomField, userState.toString)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object ParamsBuilder {
|
||||
private val ProductCustomField = "product_id"
|
||||
private val UserStateCustomField = "user_state"
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package com.twitter.cr_mixer.featureswitch
|
||||
|
||||
import com.twitter.finagle.Filter
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.concurrent.TrieMap
|
||||
import com.twitter.abdecider.Bucket
|
||||
import com.twitter.finagle.Service
|
||||
|
||||
@Singleton
|
||||
class SetImpressedBucketsLocalContextFilter @Inject() () extends Filter.TypeAgnostic {
|
||||
override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
|
||||
(request: Req, service: Service[Req, Rep]) => {
|
||||
|
||||
val concurrentTrieMap = TrieMap
|
||||
.empty[Bucket, Boolean] // Trie map has no locks and O(1) inserts
|
||||
CrMixerImpressedBuckets.localImpressedBucketsMap.let(concurrentTrieMap) {
|
||||
service(request)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"finagle/finagle-core/src/main",
|
||||
"frigate/frigate-common:util",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,22 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
|
||||
trait FilterBase {
|
||||
def name: String
|
||||
|
||||
type ConfigType
|
||||
|
||||
def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]]
|
||||
|
||||
/**
|
||||
* Build the config params here. passing in param() into the filter is strongly discouraged
|
||||
* because param() can be slow when called many times
|
||||
*/
|
||||
def requestToConfig[CGQueryType <: CandidateGeneratorQuery](request: CGQueryType): ConfigType
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class ImpressedTweetlistFilter() extends FilterBase {
|
||||
import ImpressedTweetlistFilter._
|
||||
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
|
||||
/*
|
||||
Filtering removes some candidates based on configurable criteria.
|
||||
*/
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: FilterConfig
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
// Remove candidates which match a source tweet, or which are passed in impressedTweetList
|
||||
val sourceTweetsMatch = candidates
|
||||
.flatMap {
|
||||
|
||||
/***
|
||||
* Within a Seq[Seq[InitialCandidate]], all candidates within a inner Seq
|
||||
* are guaranteed to have the same sourceInfo. Hence, we can pick .headOption
|
||||
* to represent the whole list when filtering by the internalId of the sourceInfoOpt.
|
||||
* But of course the similarityEngineInfo could be different.
|
||||
*/
|
||||
_.headOption.flatMap { candidate =>
|
||||
candidate.candidateGenerationInfo.sourceInfoOpt.map(_.internalId)
|
||||
}
|
||||
}.collect {
|
||||
case InternalId.TweetId(id) => id
|
||||
}
|
||||
|
||||
val impressedTweetList: Set[TweetId] =
|
||||
config.impressedTweetList ++ sourceTweetsMatch
|
||||
|
||||
val filteredCandidateMap: Seq[Seq[InitialCandidate]] =
|
||||
candidates.map {
|
||||
_.filterNot { candidate =>
|
||||
impressedTweetList.contains(candidate.tweetId)
|
||||
}
|
||||
}
|
||||
Future.value(filteredCandidateMap)
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType
|
||||
): FilterConfig = {
|
||||
FilterConfig(request.impressedTweetList)
|
||||
}
|
||||
}
|
||||
|
||||
object ImpressedTweetlistFilter {
|
||||
case class FilterConfig(impressedTweetList: Set[TweetId])
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters in-network tweets
|
||||
*/
|
||||
@Singleton
|
||||
case class InNetworkFilter @Inject() (
|
||||
@Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq],
|
||||
globalStats: StatsReceiver)
|
||||
extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
import InNetworkFilter._
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val filterCandidatesStats = stats.scope("filter_candidates")
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filterConfig: FilterConfig,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
StatsUtil.trackItemsStats(filterCandidatesStats) {
|
||||
filterCandidates(candidates, filterConfig)
|
||||
}
|
||||
}
|
||||
|
||||
private def filterCandidates(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filterConfig: FilterConfig,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
if (!filterConfig.enableInNetworkFilter) {
|
||||
Future.value(candidates)
|
||||
} else {
|
||||
filterConfig.userIdOpt match {
|
||||
case Some(userId) =>
|
||||
realGraphStoreMh
|
||||
.get(userId).map(_.map(_.candidates.map(_.userId)).getOrElse(Seq.empty).toSet).map {
|
||||
realGraphInNetworkAuthorsSet =>
|
||||
candidates.map(_.filterNot { candidate =>
|
||||
realGraphInNetworkAuthorsSet.contains(candidate.tweetInfo.authorId)
|
||||
})
|
||||
}
|
||||
case None => Future.value(candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType
|
||||
): FilterConfig = {
|
||||
request match {
|
||||
case UtegTweetCandidateGeneratorQuery(userId, _, _, _, _, params, _) =>
|
||||
FilterConfig(Some(userId), params(UtegTweetGlobalParams.EnableInNetworkFilterParam))
|
||||
case _ => FilterConfig(None, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object InNetworkFilter {
|
||||
case class FilterConfig(
|
||||
userIdOpt: Option[UserId],
|
||||
enableInNetworkFilter: Boolean)
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class PostRankFilterRunner @Inject() (
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
private val beforeCount = scopedStats.stat("candidate_count", "before")
|
||||
private val afterCount = scopedStats.stat("candidate_count", "after")
|
||||
|
||||
def run(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
|
||||
beforeCount.add(candidates.size)
|
||||
|
||||
Future(
|
||||
removeBadRecentNotificationCandidates(candidates)
|
||||
).map { results =>
|
||||
afterCount.add(results.size)
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove "bad" quality candidates generated by recent notifications
|
||||
* A candidate is bad when it is generated by a single RecentNotification
|
||||
* SourceKey.
|
||||
* e.x:
|
||||
* tweetA {recent notification1} -> bad
|
||||
* tweetB {recent notification1 recent notification2} -> good
|
||||
*tweetC {recent notification1 recent follow1} -> bad
|
||||
* SD-19397
|
||||
*/
|
||||
private[filter] def removeBadRecentNotificationCandidates(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Seq[RankedCandidate] = {
|
||||
candidates.filterNot {
|
||||
isBadQualityRecentNotificationCandidate
|
||||
}
|
||||
}
|
||||
|
||||
private def isBadQualityRecentNotificationCandidate(candidate: RankedCandidate): Boolean = {
|
||||
candidate.potentialReasons.size == 1 &&
|
||||
candidate.potentialReasons.head.sourceInfoOpt.nonEmpty &&
|
||||
candidate.potentialReasons.head.sourceInfoOpt.get.sourceType == SourceType.NotificationClick
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,99 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class PreRankFilterRunner @Inject() (
|
||||
impressedTweetListFilter: ImpressedTweetlistFilter,
|
||||
tweetAgeFilter: TweetAgeFilter,
|
||||
videoTweetFilter: VideoTweetFilter,
|
||||
tweetReplyFilter: ReplyFilter,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
/***
|
||||
* The order of the filters does not matter as long as we do not apply .take(N) truncation
|
||||
* across all filters. In other words, it is fine that we first do tweetAgeFilter, and then
|
||||
* we do impressedTweetListFilter, or the other way around.
|
||||
* Same idea applies to the signal based filter - it is ok that we apply signal based filters
|
||||
* before impressedTweetListFilter.
|
||||
*
|
||||
* We move all signal based filters before tweetAgeFilter and impressedTweetListFilter
|
||||
* as a set of early filters.
|
||||
*/
|
||||
val orderedFilters = Seq(
|
||||
tweetAgeFilter,
|
||||
impressedTweetListFilter,
|
||||
videoTweetFilter,
|
||||
tweetReplyFilter
|
||||
)
|
||||
|
||||
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
PreRankFilterRunner.runSequentialFilters(
|
||||
request,
|
||||
candidates,
|
||||
orderedFilters,
|
||||
scopedStats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object PreRankFilterRunner {
|
||||
private def recordCandidateStatsBeforeFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "before").incr(
|
||||
candidates.count { _.isEmpty }
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "before").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def recordCandidateStatsAfterFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "after").incr(
|
||||
candidates.count { _.isEmpty }
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "after").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Helper function for running some candidates through a sequence of filters
|
||||
*/
|
||||
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filters: Seq[FilterBase],
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Seq[Seq[InitialCandidate]]] =
|
||||
filters.foldLeft(Future.value(candidates)) {
|
||||
case (candsFut, filter) =>
|
||||
candsFut.flatMap { cands =>
|
||||
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
|
||||
filter
|
||||
.filter(cands, filter.requestToConfig(request))
|
||||
.map { filteredCands =>
|
||||
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
|
||||
filteredCands
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters candidates that are replies
|
||||
*/
|
||||
@Singleton
|
||||
case class ReplyFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filterNot { candidate =>
|
||||
candidate.tweetInfo.isReply.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
true
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters candidates that are retweets
|
||||
*/
|
||||
@Singleton
|
||||
case class RetweetFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filterNot { candidate =>
|
||||
candidate.tweetInfo.isRetweet.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
query.params(UtegTweetGlobalParams.EnableRetweetFilterParam)
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.conversions.DurationOps._
|
||||
|
||||
@Singleton
|
||||
case class TweetAgeFilter() extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = Duration
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
maxTweetAge: Duration
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (maxTweetAge >= 720.hours) {
|
||||
Future.value(candidates)
|
||||
} else {
|
||||
// Tweet IDs are approximately chronological (see http://go/snowflake),
|
||||
// so we are building the earliest tweet id once,
|
||||
// and pass that as the value to filter candidates for each CandidateGenerationModel.
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
|
||||
Future.value(candidates.map(_.filter(_.tweetId >= earliestTweetId)))
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): Duration = {
|
||||
query.params(GlobalParams.MaxTweetAgeHoursParam)
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.HealthThreshold
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
trait TweetInfoHealthFilterBase extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = HealthThreshold.Enum.Value
|
||||
def thresholdToPropertyMap: Map[HealthThreshold.Enum.Value, TweetInfo => Option[Boolean]]
|
||||
def getFilterParamFn: CandidateGeneratorQuery => HealthThreshold.Enum.Value
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: HealthThreshold.Enum.Value
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
Future.value(candidates.map { seq =>
|
||||
seq.filter(p => thresholdToPropertyMap(config)(p.tweetInfo).getOrElse(true))
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the config params here. passing in param() into the filter is strongly discouraged
|
||||
* because param() can be slow when called many times
|
||||
*/
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): HealthThreshold.Enum.Value = {
|
||||
query match {
|
||||
case q: CrCandidateGeneratorQuery => getFilterParamFn(q)
|
||||
case _ => HealthThreshold.Enum.Off
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,96 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
*
|
||||
* Run filters sequentially for UTEG candidate generator. The structure is copied from PreRankFilterRunner.
|
||||
*/
|
||||
@Singleton
|
||||
class UtegFilterRunner @Inject() (
|
||||
inNetworkFilter: InNetworkFilter,
|
||||
utegHealthFilter: UtegHealthFilter,
|
||||
retweetFilter: RetweetFilter,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
val orderedFilters: Seq[FilterBase] = Seq(
|
||||
inNetworkFilter,
|
||||
utegHealthFilter,
|
||||
retweetFilter
|
||||
)
|
||||
|
||||
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
UtegFilterRunner.runSequentialFilters(
|
||||
request,
|
||||
candidates,
|
||||
orderedFilters,
|
||||
scopedStats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object UtegFilterRunner {
|
||||
private def recordCandidateStatsBeforeFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "before").incr(
|
||||
candidates.count {
|
||||
_.isEmpty
|
||||
}
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "before").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def recordCandidateStatsAfterFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "after").incr(
|
||||
candidates.count {
|
||||
_.isEmpty
|
||||
}
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "after").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Helper function for running some candidates through a sequence of filters
|
||||
*/
|
||||
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filters: Seq[FilterBase],
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Seq[Seq[InitialCandidate]]] =
|
||||
filters.foldLeft(Future.value(candidates)) {
|
||||
case (candsFut, filter) =>
|
||||
candsFut.flatMap { cands =>
|
||||
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
|
||||
filter
|
||||
.filter(cands, filter.requestToConfig(request))
|
||||
.map { filteredCands =>
|
||||
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
|
||||
filteredCands
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Remove unhealthy candidates
|
||||
* Currently Timeline Ranker applies a check on the following three scores:
|
||||
* - toxicityScore
|
||||
* - pBlockScore
|
||||
* - pReportedTweetScore
|
||||
*
|
||||
* Where isPassTweetHealthFilterStrict checks two additions scores with the same threshold:
|
||||
* - pSpammyTweetScore
|
||||
* - spammyTweetContentScore
|
||||
*
|
||||
* We've verified that both filters behave very similarly.
|
||||
*/
|
||||
@Singleton
|
||||
case class UtegHealthFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filter { candidate =>
|
||||
candidate.tweetInfo.isPassTweetHealthFilterStrict.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
query.params(UtegTweetGlobalParams.EnableTLRHealthFilterParam)
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.filter.VideoTweetFilter.FilterConfig
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.VideoTweetFilterParams
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class VideoTweetFilter() extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
Future.value(candidates.map {
|
||||
_.flatMap {
|
||||
candidate =>
|
||||
if (!config.enableVideoTweetFilter) {
|
||||
Some(candidate)
|
||||
} else {
|
||||
// if hasVideo is true, hasImage, hasGif should be false
|
||||
val hasVideo = checkTweetInfoAttribute(candidate.tweetInfo.hasVideo)
|
||||
val isHighMediaResolution =
|
||||
checkTweetInfoAttribute(candidate.tweetInfo.isHighMediaResolution)
|
||||
val isQuoteTweet = checkTweetInfoAttribute(candidate.tweetInfo.isQuoteTweet)
|
||||
val isReply = checkTweetInfoAttribute(candidate.tweetInfo.isReply)
|
||||
val hasMultipleMedia = checkTweetInfoAttribute(candidate.tweetInfo.hasMultipleMedia)
|
||||
val hasUrl = checkTweetInfoAttribute(candidate.tweetInfo.hasUrl)
|
||||
|
||||
if (hasVideo && isHighMediaResolution && !isQuoteTweet &&
|
||||
!isReply && !hasMultipleMedia && !hasUrl) {
|
||||
Some(candidate)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
def checkTweetInfoAttribute(attributeOpt: => Option[Boolean]): Boolean = {
|
||||
if (attributeOpt.isDefined)
|
||||
attributeOpt.get
|
||||
else {
|
||||
// takes Quoted Tweet (TweetInfo.isQuoteTweet) as an example,
|
||||
// if the attributeOpt is None, we by default say it is not a quoted tweet
|
||||
// similarly, if TweetInfo.hasVideo is a None,
|
||||
// we say it does not have video.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): FilterConfig = {
|
||||
val enableVideoTweetFilter = query match {
|
||||
case _: CrCandidateGeneratorQuery | _: RelatedTweetCandidateGeneratorQuery |
|
||||
_: RelatedVideoTweetCandidateGeneratorQuery =>
|
||||
query.params(VideoTweetFilterParams.EnableVideoTweetFilterParam)
|
||||
case _ => false // e.g., GetRelatedTweets()
|
||||
}
|
||||
FilterConfig(
|
||||
enableVideoTweetFilter = enableVideoTweetFilter
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object VideoTweetFilter {
|
||||
// extend the filterConfig to add more flags if needed.
|
||||
// now they are hardcoded according to the prod setting
|
||||
case class FilterConfig(
|
||||
enableVideoTweetFilter: Boolean)
|
||||
}
|
@ -0,0 +1,139 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRequest
|
||||
import com.twitter.cr_mixer.thriftscala.AdsResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class AdsRecommendationsScribeLogger @Inject() (
|
||||
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
|
||||
/*
|
||||
* Scribe first step results after fetching initial ads candidate
|
||||
* */
|
||||
def scribeInitialAdsCandidates(
|
||||
query: AdsCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
|
||||
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scribe top level API results
|
||||
* */
|
||||
def scribeGetAdsRecommendations(
|
||||
request: AdsRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[AdsResponse],
|
||||
enableScribe: Boolean
|
||||
): Future[AdsResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
|
||||
AdsRecommendationTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
|
||||
requestUserId: UserId
|
||||
): AdsRecommendationsResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
AdsRecommendationsResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: AdsRecommendationsResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetAdsRecommendationsScribe = {
|
||||
GetAdsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetAdsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = adsRecommendationsScribeLogger,
|
||||
codec = GetAdsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"abdecider/src/main/scala",
|
||||
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
|
||||
"finagle/finagle-core/src/main",
|
||||
"frigate/frigate-common:base",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
|
||||
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
|
||||
"scrooge/scrooge-serializer/src/main/scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/ml/api:data-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"timelines/src/main/scala/com/twitter/timelines/clientevent",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
],
|
||||
)
|
@ -0,0 +1,489 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.google.common.base.CaseFormat
|
||||
import com.twitter.abdecider.ScribingABDeciderUtil
|
||||
import com.twitter.scribelib.marshallers.ClientDataProvider
|
||||
import com.twitter.scribelib.marshallers.ScribeSerialization
|
||||
import com.twitter.timelines.clientevent.MinimalClientDataProvider
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.scribe.ScribeCategories
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.InterleaveResult
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.cr_mixer.thriftscala.RankResult
|
||||
import com.twitter.cr_mixer.thriftscala.Result
|
||||
import com.twitter.cr_mixer.thriftscala.SourceSignal
|
||||
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.cr_mixer.util.MetricTagUtil
|
||||
import com.twitter.decider.SimpleRecipient
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.finatra.kafka.producers.KafkaProducerBase
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import com.twitter.util.Time
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import scala.util.Random
|
||||
|
||||
@Singleton
|
||||
case class CrMixerScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
|
||||
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
|
||||
|
||||
import CrMixerScribeLogger._
|
||||
|
||||
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
|
||||
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
|
||||
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
|
||||
|
||||
private val serialization = new ScribeSerialization {}
|
||||
|
||||
def scribeSignalSources(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
|
||||
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchSignalSourcesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInterleaveCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[BlendedCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertInterleaveResult,
|
||||
enableKafkaScribe = true
|
||||
)
|
||||
}
|
||||
|
||||
def scribeRankedCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertRankResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetTweetRecommendations(
|
||||
request: CrMixerTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[CrMixerTweetResponse]
|
||||
): Future[CrMixerTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
|
||||
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
|
||||
publishTopLevelDdgMetrics(
|
||||
logger = ddgMetricsLogger,
|
||||
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
|
||||
latencyMs = latencyMs,
|
||||
candidateSize = response.tweets.length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
|
||||
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
|
||||
*/
|
||||
def scribeGetTweetRecommendationsForBlueVerified(
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
getResultFn.onSuccess { rankedCandidates =>
|
||||
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
|
||||
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
|
||||
|
||||
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
|
||||
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
|
||||
}
|
||||
|
||||
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
|
||||
|
||||
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
|
||||
blueVerifiedTweetCandidatesStats
|
||||
.scope(scribeMetadata.product.name).counter(
|
||||
candidate.tweetInfo.authorId.toString).incr()
|
||||
VITTweetCandidateScribe(
|
||||
tweetId = candidate.tweetId,
|
||||
authorId = candidate.tweetInfo.authorId,
|
||||
score = candidate.predictionScore,
|
||||
metricTags = MetricTagUtil.buildMetricTags(candidate)
|
||||
)
|
||||
}
|
||||
|
||||
val blueVerifiedScribe =
|
||||
VITTweetCandidatesScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
candidates = blueVerifiedCandidateScribes,
|
||||
product = scribeMetadata.product,
|
||||
impressedBuckets = impressedBuckets
|
||||
)
|
||||
|
||||
publish(
|
||||
logger = blueVerifiedTweetRecsScribeLogger,
|
||||
codec = VITTweetCandidatesScribe,
|
||||
message = blueVerifiedScribe)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch signals, fetch candidates, filters, ranker, etc
|
||||
*/
|
||||
private[logging] def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => Result,
|
||||
enableKafkaScribe: Boolean = false
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
|
||||
// forks the scribe as a Kafka message for async feature hydration
|
||||
if (enableKafkaScribe && shouldScribeKafkaMessage(
|
||||
scribeMetadata.userId,
|
||||
scribeMetadata.product)) {
|
||||
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
|
||||
|
||||
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
|
||||
batchedKafkaMessages.foreach { kafkaMessage =>
|
||||
kafkaProducer.send(
|
||||
topic = ScribeCategories.TweetsRecs.scribeCategory,
|
||||
key = traceId.toString,
|
||||
value = kafkaMessage,
|
||||
timestamp = Time.now.inMilliseconds
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: CrMixerTweetRequest,
|
||||
response: CrMixerTweetResponse,
|
||||
startTime: Long
|
||||
): Result = {
|
||||
Result.TopLevelApiResult(
|
||||
TopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchSignalSourcesResult(
|
||||
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
|
||||
SourceSignal(id = Some(sourceInfo.internalId))
|
||||
}
|
||||
// For source graphs, we pass in requestUserId as a placeholder
|
||||
val sourceGraphs = sourceInfoSetTuple._2.map {
|
||||
case (_, _) =>
|
||||
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
|
||||
}
|
||||
Result.FetchSignalSourcesResult(
|
||||
FetchSignalSourcesResult(
|
||||
signals = Some(sourceSignals ++ sourceGraphs)
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
// We take InterleaveResult for Unconstrained dataset ML ranker training
|
||||
private def convertInterleaveResult(
|
||||
blendedCandidates: Seq[BlendedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = blendedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
|
||||
score = Some(blendedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
|
||||
) // hydrate fields for light ranking training data
|
||||
}
|
||||
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertRankResult(
|
||||
rankedCandidates: Seq[RankedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = rankedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
score = Some(rankedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
|
||||
)
|
||||
}
|
||||
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: Result,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetTweetsRecommendationsScribe = {
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = tweetRecsScribeLogger,
|
||||
codec = GetTweetsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gate for producing messages to Kafka for async feature hydration
|
||||
*/
|
||||
private def shouldScribeKafkaMessage(
|
||||
userId: UserId,
|
||||
product: Product
|
||||
): Boolean = {
|
||||
val isEligibleUser = decider.isAvailable(
|
||||
DeciderConstants.kafkaMessageScribeSampleRate,
|
||||
Some(SimpleRecipient(userId)))
|
||||
val isHomeProduct = (product == Product.Home)
|
||||
isEligibleUser && isHomeProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
|
||||
*/
|
||||
private[logging] def downsampleKafkaMessage(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Seq[GetTweetsRecommendationsScribe] = {
|
||||
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
|
||||
case Result.InterleaveResult(interleaveResult) =>
|
||||
val sampledTweetsSeq = interleaveResult.tweets
|
||||
.map { tweets =>
|
||||
Random
|
||||
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
|
||||
.grouped(BatchSize).toSeq
|
||||
}.getOrElse(Seq.empty)
|
||||
|
||||
sampledTweetsSeq.map { sampledTweets =>
|
||||
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
|
||||
}
|
||||
|
||||
// if it's an unrecognized type, err on the side of sending no candidates
|
||||
case _ =>
|
||||
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
|
||||
Seq(Result.InterleaveResult(InterleaveResult(None)))
|
||||
}
|
||||
|
||||
sampledResultSeq.map { sampledResult =>
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMsg.uuid,
|
||||
userId = scribeMsg.userId,
|
||||
result = sampledResult,
|
||||
traceId = scribeMsg.traceId,
|
||||
performanceMetrics = None,
|
||||
impressedBuckets = None
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles client_event serialization to log data into DDG metrics
|
||||
*/
|
||||
private[logging] def publishTopLevelDdgMetrics(
|
||||
logger: Logger,
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
candidateSize: Long,
|
||||
latencyMs: Long,
|
||||
): Unit = {
|
||||
val data = Map[Any, Any](
|
||||
"latency_ms" -> latencyMs,
|
||||
"event_value" -> candidateSize
|
||||
)
|
||||
val label: (String, String) = ("tweetrec", "")
|
||||
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
|
||||
val message =
|
||||
serialization
|
||||
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
|
||||
logger.info(message)
|
||||
}
|
||||
|
||||
private def getClientData(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
|
||||
): ClientDataProvider =
|
||||
MinimalClientDataProvider(
|
||||
userId = topLevelDdgMetricsMetadata.userId,
|
||||
guestId = None,
|
||||
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
|
||||
countryCode = topLevelDdgMetricsMetadata.countryCode
|
||||
)
|
||||
|
||||
private def getNamespace(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
label: (String, String)
|
||||
): Map[String, String] = {
|
||||
val productName =
|
||||
CaseFormat.UPPER_CAMEL
|
||||
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
|
||||
|
||||
Map(
|
||||
"client" -> ScribingABDeciderUtil.clientForAppId(
|
||||
topLevelDdgMetricsMetadata.clientApplicationId),
|
||||
"page" -> "cr-mixer",
|
||||
"section" -> productName,
|
||||
"component" -> label._1,
|
||||
"element" -> label._2
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object CrMixerScribeLogger {
|
||||
val KafkaMaxTweetsPerMessage: Int = 200
|
||||
val BatchSize: Int = 20
|
||||
}
|
@ -0,0 +1,193 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class RelatedTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getRelatedTweets endpoint.
|
||||
*/
|
||||
def scribeGetRelatedTweets(
|
||||
request: RelatedTweetRequest,
|
||||
startTime: Long,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[RelatedTweetResponse]
|
||||
): Future[RelatedTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => RelatedTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: RelatedTweetRequest,
|
||||
response: RelatedTweetResponse,
|
||||
startTime: Long
|
||||
): RelatedTweetResult = {
|
||||
RelatedTweetResult.RelatedTweetTopLevelApiResult(
|
||||
RelatedTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(candidate.tweetInfo.authorId),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None
|
||||
)
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
relatedTweetResult: RelatedTweetResult,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetRelatedTweetsScribe = {
|
||||
GetRelatedTweetsScribe(
|
||||
uuid = relatedTweetScribeMetadata.requestUUID,
|
||||
internalId = relatedTweetScribeMetadata.internalId,
|
||||
relatedTweetResult = relatedTweetResult,
|
||||
requesterId = relatedTweetScribeMetadata.clientContext.userId,
|
||||
guestId = relatedTweetScribeMetadata.clientContext.guestId,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetRelatedTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
|
||||
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.scrooge.BinaryThriftStructSerializer
|
||||
import com.twitter.scrooge.ThriftStruct
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
|
||||
object ScribeLoggerUtils {
|
||||
|
||||
/**
|
||||
* Handles base64-encoding, serialization, and publish.
|
||||
*/
|
||||
private[logging] def publish[T <: ThriftStruct](
|
||||
logger: Logger,
|
||||
codec: ThriftStructCodec[T],
|
||||
message: T
|
||||
): Unit = {
|
||||
logger.info(BinaryThriftStructSerializer(codec).toString(message))
|
||||
}
|
||||
|
||||
private[logging] def getImpressedBuckets(
|
||||
scopedStats: StatsReceiver
|
||||
): Option[List[ImpressesedBucketInfo]] = {
|
||||
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
|
||||
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
|
||||
val listBucketsSet = listBuckets.toSet
|
||||
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
|
||||
listBucketsSet.map { bucket =>
|
||||
ImpressesedBucketInfo(
|
||||
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
|
||||
bucketName = bucket.name,
|
||||
version = bucket.experiment.settings.version,
|
||||
)
|
||||
}.toList
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
|
||||
case class ScribeMetadata(
|
||||
requestUUID: Long,
|
||||
userId: UserId,
|
||||
product: Product)
|
||||
|
||||
object ScribeMetadata {
|
||||
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
}
|
||||
|
||||
case class RelatedTweetScribeMetadata(
|
||||
requestUUID: Long,
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext,
|
||||
product: Product)
|
||||
|
||||
object RelatedTweetScribeMetadata {
|
||||
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
|
||||
RelatedTweetScribeMetadata(
|
||||
query.requestUUID,
|
||||
query.internalId,
|
||||
query.clientContext,
|
||||
query.product)
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package com.twitter.cr_mixer
|
||||
package logging
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
|
||||
case class TopLevelDdgMetricsMetadata(
|
||||
userId: Option[Long],
|
||||
product: Product,
|
||||
clientApplicationId: Option[Long],
|
||||
countryCode: Option[String])
|
||||
|
||||
object TopLevelDdgMetricsMetadata {
|
||||
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
|
||||
TopLevelDdgMetricsMetadata(
|
||||
userId = request.clientContext.userId,
|
||||
product = request.product,
|
||||
clientApplicationId = request.clientContext.appId,
|
||||
countryCode = request.clientContext.countryCode
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,147 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class UtegTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the GetUtegTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetUtegTweetRecommendations(
|
||||
request: UtegTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[UtegTweetResponse]
|
||||
): Future[UtegTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: UtegTweetRequest,
|
||||
response: UtegTweetResponse,
|
||||
startTime: Long
|
||||
): UtegTweetResult = {
|
||||
UtegTweetResult.UtegTweetTopLevelApiResult(
|
||||
UtegTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
utegTweetResult: UtegTweetResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetUtegTweetsScribe = {
|
||||
GetUtegTweetsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
utegTweetResult = utegTweetResult,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetUtegTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof],
|
||||
requestUserId: UserId
|
||||
): UtegTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => UtegTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,200 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
sealed trait Candidate {
|
||||
val tweetId: TweetId
|
||||
|
||||
override def hashCode: Int = tweetId.toInt
|
||||
}
|
||||
|
||||
case class TweetWithCandidateGenerationInfo(
|
||||
tweetId: TweetId,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
}
|
||||
|
||||
case class InitialCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedCandidate = {
|
||||
BlendedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedCandidate(): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class InitialAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedAdsCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedAdsCandidate = {
|
||||
BlendedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedAdsCandidate(): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class RankedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class RankedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate
|
@ -0,0 +1,67 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Time
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source used in candidate generation
|
||||
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
|
||||
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
|
||||
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
|
||||
*
|
||||
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
|
||||
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
|
||||
* @param internalId, e.g., UserId(0L), TweetId(0L)
|
||||
*/
|
||||
case class SourceInfo(
|
||||
sourceType: SourceType,
|
||||
internalId: InternalId,
|
||||
sourceEventTime: Option[Time])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source User Graph used in candidate generation
|
||||
* It is an intermediate product, and will not be stored, unlike SourceInfo.
|
||||
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
|
||||
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
|
||||
* UTG or UTEG will leverage these sources to build candidates.
|
||||
*
|
||||
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
|
||||
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
|
||||
*
|
||||
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
|
||||
* have a unified interface.
|
||||
*/
|
||||
case class GraphSourceInfo(
|
||||
sourceType: SourceType,
|
||||
seedWithScores: Map[UserId, Double])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
|
||||
* candidate generation along with their metadata.
|
||||
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
|
||||
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
|
||||
* @param score - a score generated by this sim engine
|
||||
*/
|
||||
case class SimilarityEngineInfo(
|
||||
similarityEngineType: SimilarityEngineType,
|
||||
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
|
||||
score: Option[Double])
|
||||
|
||||
/****
|
||||
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
|
||||
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
|
||||
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
|
||||
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
|
||||
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
|
||||
*
|
||||
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
|
||||
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
|
||||
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
|
||||
*/
|
||||
case class CandidateGenerationInfo(
|
||||
sourceInfoOpt: Option[SourceInfo],
|
||||
similarityEngineInfo: SimilarityEngineInfo,
|
||||
contributingSimilarityEngines: Seq[SimilarityEngineInfo])
|
@ -0,0 +1,96 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.timelines.configapi.Params
|
||||
|
||||
sealed trait CandidateGeneratorQuery {
|
||||
val product: Product
|
||||
val maxNumResults: Int
|
||||
val impressedTweetList: Set[TweetId]
|
||||
val params: Params
|
||||
val requestUUID: Long
|
||||
}
|
||||
|
||||
sealed trait HasUserId {
|
||||
val userId: UserId
|
||||
}
|
||||
|
||||
case class CrCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
languageCode: Option[String] = None)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class UtegTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class RelatedTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class RelatedVideoTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class FrsTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedUserList: Set[UserId],
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
languageCodeOpt: Option[String] = None,
|
||||
countryCodeOpt: Option[String] = None,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class AdsCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
|
||||
case class TopicTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
topicIds: Set[TopicId],
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
isVideoOnly: Boolean)
|
||||
extends CandidateGeneratorQuery
|
@ -0,0 +1,6 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
sealed trait EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType
|
@ -0,0 +1,11 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
object HealthThreshold {
|
||||
object Enum extends Enumeration {
|
||||
val Off: Value = Value(1)
|
||||
val Moderate: Value = Value(2)
|
||||
val Strict: Value = Value(3)
|
||||
val Stricter: Value = Value(4)
|
||||
val StricterPlus: Value = Value(5)
|
||||
}
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* A Configuration class for all Model Based Candidate Sources.
|
||||
*
|
||||
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
|
||||
* If your model is used for multiple product surfaces, name it as all
|
||||
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
|
||||
*
|
||||
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
|
||||
*/
|
||||
object ModelConfig {
|
||||
// Offline SimClusters CG Experiment related Model Ids
|
||||
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
|
||||
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
|
||||
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
|
||||
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
|
||||
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
|
||||
|
||||
// Twhin Model Ids
|
||||
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
|
||||
"ConsumerBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Averaged Twhin Model Ids
|
||||
val TweetBasedTwHINRegularUpdateAll20221024: String =
|
||||
"TweetBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Collaborative Filtering Twhin Model Ids
|
||||
val TwhinCollabFilterForFollow: String =
|
||||
"TwhinCollabFilterForFollow"
|
||||
val TwhinCollabFilterForEngagement: String =
|
||||
"TwhinCollabFilterForEngagement"
|
||||
val TwhinMultiClusterForFollow: String =
|
||||
"TwhinMultiClusterForFollow"
|
||||
val TwhinMultiClusterForEngagement: String =
|
||||
"TwhinMultiClusterForEngagement"
|
||||
|
||||
// Two Tower model Ids
|
||||
val TwoTowerFavALL20220808: String =
|
||||
"TwoTowerFav_ALL_20220808"
|
||||
|
||||
// Debugger Demo-Only Model Ids
|
||||
val DebuggerDemo: String = "DebuggerDemo"
|
||||
|
||||
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
|
||||
// indicate ColdStartLookalike candidate source, which is currently being pluged into
|
||||
// CustomizedRetrievalCandidateGeneration temporarily.
|
||||
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
|
||||
|
||||
// consumersBasedUTG-RealGraphOon Model Id
|
||||
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
|
||||
// consumersBasedUAG-RealGraphOon Model Id
|
||||
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
|
||||
|
||||
// FTR
|
||||
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
|
||||
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
|
||||
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
|
||||
|
||||
// All Model Ids of HnswANNSimilarityEngines
|
||||
val allHnswANNSimilarityEngineModelIds = Seq(
|
||||
ConsumerBasedTwHINRegularUpdateAll20221024,
|
||||
TwoTowerFavALL20220808,
|
||||
DebuggerDemo
|
||||
)
|
||||
|
||||
val ConsumerLogFavBasedInterestedInEmbedding: String =
|
||||
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
|
||||
val ConsumerFollowBasedInterestedInEmbedding: String =
|
||||
"ConsumerFollowBasedInterestedIn_ALL_20221228"
|
||||
|
||||
val RetweetBasedDiffusion: String =
|
||||
"RetweetBasedDiffusion"
|
||||
|
||||
}
|
@ -0,0 +1,122 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* Define name annotated module names here
|
||||
*/
|
||||
object ModuleNames {
|
||||
|
||||
final val FrsStore = "FrsStore"
|
||||
final val UssStore = "UssStore"
|
||||
final val UssStratoColumn = "UssStratoColumn"
|
||||
final val RsxStore = "RsxStore"
|
||||
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
|
||||
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
|
||||
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
|
||||
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
|
||||
final val StpStore = "StpStore"
|
||||
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
|
||||
final val TripCandidateStore = "TripCandidateStore"
|
||||
|
||||
final val ConsumerEmbeddingBasedTripSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTripSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
|
||||
final val ConsumersBasedUserAdGraphSimilarityEngine =
|
||||
"ConsumersBasedUserAdGraphSimilarityEngine"
|
||||
final val ConsumersBasedUserVideoGraphSimilarityEngine =
|
||||
"ConsumersBasedUserVideoGraphSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
|
||||
|
||||
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
|
||||
|
||||
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
|
||||
|
||||
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
|
||||
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
|
||||
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
|
||||
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
|
||||
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
|
||||
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
|
||||
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
|
||||
final val UnifiedCache = "unifiedCache"
|
||||
final val MLScoreCache = "mlScoreCache"
|
||||
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
|
||||
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
|
||||
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
|
||||
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithRetweetsRepliesCacheStore"
|
||||
|
||||
final val AbDeciderLogger = "abDeciderLogger"
|
||||
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
|
||||
final val TweetRecsLogger = "tweetRecsLogger"
|
||||
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
|
||||
final val RelatedTweetsLogger = "relatedTweetsLogger"
|
||||
final val UtegTweetsLogger = "utegTweetsLogger"
|
||||
final val AdsRecommendationsLogger = "adsRecommendationLogger"
|
||||
|
||||
final val OfflineSimClustersANNInterestedInSimilarityEngine =
|
||||
"OfflineSimClustersANNInterestedInSimilarityEngine"
|
||||
|
||||
final val RealGraphOonStore = "RealGraphOonStore"
|
||||
final val RealGraphInStore = "RealGraphInStore"
|
||||
|
||||
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
|
||||
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
|
||||
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
|
||||
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
|
||||
|
||||
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
|
||||
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
|
||||
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
|
||||
|
||||
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
|
||||
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
|
||||
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
|
||||
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
|
||||
|
||||
final val ProducerBasedUserAdGraphSimilarityEngine =
|
||||
"ProducerBasedUserAdGraphSimilarityEngine"
|
||||
final val ProducerBasedUserTweetGraphSimilarityEngine =
|
||||
"ProducerBasedUserTweetGraphSimilarityEngine"
|
||||
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
|
||||
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
|
||||
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
|
||||
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
|
||||
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
|
||||
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
|
||||
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
|
||||
|
||||
final val UserTweetEntityGraphSimilarityEngine =
|
||||
"UserTweetEntityGraphSimilarityEngine"
|
||||
|
||||
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
|
||||
final val CertoStratoStoreName = "CertoStratoStore"
|
||||
|
||||
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
|
||||
final val SkitHighPrecisionTopicTweetSimilarityEngine =
|
||||
"SkitHighPrecisionTopicTweetSimilarityEngine"
|
||||
final val SkitStratoStoreName = "SkitStratoStore"
|
||||
|
||||
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
|
||||
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
|
||||
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
|
||||
|
||||
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
|
||||
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
|
||||
|
||||
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
* @param similarityEngineType, which underlying topic source the topic tweet is from
|
||||
*/
|
||||
case class TopicTweetWithScore(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
similarityEngineType: SimilarityEngineType)
|
@ -0,0 +1,6 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
|
||||
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)
|
@ -0,0 +1,8 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
*/
|
||||
case class TweetWithScore(tweetId: TweetId, score: Double)
|
@ -0,0 +1,12 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.recos.recos_common.thriftscala.SocialProofType
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score and social proofs by type
|
||||
*/
|
||||
case class TweetWithScoreAndSocialProof(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
socialProofByType: Map[SocialProofType, Seq[Long]])
|
@ -0,0 +1,135 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.thrift.CompactThriftCodec
|
||||
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.DataType
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.GeneralTensor
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.manhattan.Revenue
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ActivePromotedTweetStoreModule extends TwitterModule {
|
||||
|
||||
case class ActivePromotedTweetStore(
|
||||
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
|
||||
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
|
||||
activePromotedTweetMHStore.get(tweetId.toString).map {
|
||||
_.map { dataRecord =>
|
||||
val richDataRecord = new RichDataRecord(dataRecord)
|
||||
val lineItemIdsFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
|
||||
|
||||
val lineItemObjectivesFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
|
||||
|
||||
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
|
||||
val lineItemObjectivesTensor: GeneralTensor =
|
||||
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
|
||||
|
||||
val lineItemIds: Seq[Long] =
|
||||
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemObjectives: Seq[LineItemObjective] =
|
||||
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
|
||||
LineItemObjective(objective.toInt))
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemInfo =
|
||||
if (lineItemIds.size == lineItemObjectives.size) {
|
||||
lineItemIds.zipWithIndex.map {
|
||||
case (lineItemId, index) =>
|
||||
LineItemInfo(
|
||||
lineItemId = lineItemId,
|
||||
lineItemObjective = lineItemObjectives(index)
|
||||
)
|
||||
}
|
||||
} else Seq.empty
|
||||
|
||||
lineItemInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesActivePromotedTweetStore(
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
crMixerStatsReceiver: StatsReceiver
|
||||
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
|
||||
|
||||
val mhConfig = new ManhattanROConfig {
|
||||
val hdfsPath = HDFSPath("")
|
||||
val applicationID = ApplicationID("ads_bigquery_features")
|
||||
val datasetName = DatasetName("active_promoted_tweets")
|
||||
val cluster = Revenue
|
||||
|
||||
override def statsReceiver: StatsReceiver =
|
||||
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
|
||||
}
|
||||
val mhStore: ReadableStore[String, DataRecord] =
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[String, DataRecord](
|
||||
mhConfig,
|
||||
manhattanKVClientMtlsParams
|
||||
)(
|
||||
implicitly[Injection[String, Array[Byte]]],
|
||||
CompactThriftCodec[DataRecord]
|
||||
)
|
||||
|
||||
val underlyingStore =
|
||||
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 60.minutes,
|
||||
asyncUpdate = false
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
|
||||
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
|
||||
keyToString = { k: TweetId => s"apt/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 30.minutes,
|
||||
maxKeys = 250000, // size of promoted tweet is around 200,000
|
||||
windowSize = 10000L,
|
||||
cacheName = "active_promoted_tweet_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
scala_library(
|
||||
sources = [
|
||||
"*.scala",
|
||||
"core/*.scala",
|
||||
"grpc_client/*.scala",
|
||||
"similarity_engine/*.scala",
|
||||
"source_signal/*.scala",
|
||||
"thrift_client/*.scala",
|
||||
],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/bijection:core",
|
||||
"3rdparty/jvm/com/twitter/bijection:scrooge",
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/com/twitter/storehaus:memcache",
|
||||
"3rdparty/jvm/io/grpc:grpc-api",
|
||||
"3rdparty/jvm/io/grpc:grpc-auth",
|
||||
"3rdparty/jvm/io/grpc:grpc-core",
|
||||
"3rdparty/jvm/io/grpc:grpc-netty",
|
||||
"3rdparty/jvm/io/grpc:grpc-protobuf",
|
||||
"3rdparty/jvm/io/grpc:grpc-stub",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/jvm/org/scalanlp:breeze",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"abdecider/src/main/scala",
|
||||
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
|
||||
"configapi/configapi-abdecider",
|
||||
"configapi/configapi-core",
|
||||
"configapi/configapi-featureswitches:v2",
|
||||
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
|
||||
"featureswitches/featureswitches-core",
|
||||
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
|
||||
"finagle-internal/finagle-grpc/src/main/scala",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
|
||||
"finatra-internal/mtls-thriftmux/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"finatra/inject/inject-modules/src/main/scala",
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"hydra/partition/thrift/src/main/thrift:thrift-scala",
|
||||
"hydra/root/thrift/src/main/thrift:thrift-scala",
|
||||
"mediaservices/commons/src/main/scala:futuretracker",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"qig-ranker/thrift/src/main/thrift:thrift-scala",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||
"relevance-platform/thrift/src/main/thrift:thrift-scala",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"representation-scorer/server/src/main/thrift:thrift-scala",
|
||||
"servo/decider",
|
||||
"servo/util/src/main/scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
|
||||
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
|
||||
"src/scala/com/twitter/algebird_internal/injection",
|
||||
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
|
||||
"src/scala/com/twitter/ml/api/embedding",
|
||||
"src/scala/com/twitter/ml/featurestore/lib",
|
||||
"src/scala/com/twitter/scalding_internal/multiformat/format",
|
||||
"src/scala/com/twitter/simclusters_v2/candidate_source",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan/config",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache/config",
|
||||
"src/scala/com/twitter/storehaus_internal/offline",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/scala/com/twitter/topic_recos/stores",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/ml/api:embedding-scala",
|
||||
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
|
||||
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
|
||||
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
|
||||
"src/thrift/com/twitter/search:earlybird-scala",
|
||||
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
|
||||
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:service-scala",
|
||||
"src/thrift/com/twitter/twistly:twistly-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"stitch/stitch-storehaus",
|
||||
"stitch/stitch-tweetypie/src/main/scala",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"user-signal-service/thrift/src/main/thrift:thrift-scala",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
"util/util-hashing",
|
||||
],
|
||||
)
|
@ -0,0 +1,52 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Athena
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
|
||||
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore)
|
||||
def providesBlueVerifiedAnnotationStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
|
||||
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("content_recommender_athena"),
|
||||
DatasetName("blue_verified_annotations"),
|
||||
Athena),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
underlyingStore,
|
||||
ttl = 24.hours,
|
||||
maxKeys = 100000,
|
||||
windowSize = 10000L,
|
||||
cacheName = "blue_verified_annotation_cache"
|
||||
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
|
||||
import com.twitter.topic_recos.thriftscala.TweetWithScores
|
||||
|
||||
object CertoStratoStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.CertoStratoStoreName)
|
||||
def providesCertoStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
|
||||
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
|
||||
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
|
||||
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = certoStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_certo_store"),
|
||||
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "certo_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("certo_in_memory_cache"))
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
|
||||
def providesConsumerBasedUserAdGraphStore(
|
||||
userAdGraphService: UserAdGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedAdRequest
|
||||
): Future[Option[RelatedAdResponse]] = {
|
||||
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
|
||||
def providesConsumerBasedUserTweetGraphStore(
|
||||
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
|
||||
def providesConsumerBasedUserVideoGraphStore(
|
||||
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.timelines.configapi.Config
|
||||
import com.twitter.cr_mixer.param.CrMixerParamConfig
|
||||
import com.twitter.inject.TwitterModule
|
||||
import javax.inject.Singleton
|
||||
|
||||
object CrMixerParamConfigModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def provideConfig(): Config = {
|
||||
CrMixerParamConfig.config
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object DiffusionStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
|
||||
BinaryScalaCodec(TweetsWithScore)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
|
||||
def retweetBasedDiffusionRecsMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("diffusion_retweet_tweet_recs"),
|
||||
Apollo
|
||||
)
|
||||
|
||||
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildTweetRecsStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, TweetsWithScore](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, tweetRecsInjection)
|
||||
}
|
||||
}
|
@ -0,0 +1,189 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.SeqLongInjection
|
||||
import com.twitter.hashing.KeyHasher
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdService
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
|
||||
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
// Home based EB filters out retweets and replies
|
||||
val earlybirdRequest =
|
||||
buildEarlybirdRequest(
|
||||
userId,
|
||||
FilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
timeoutConfig.earlybirdServerTimeout)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
val earlybirdRequest = buildEarlybirdRequest(
|
||||
userId,
|
||||
// Notifications based EB keeps retweets and replies
|
||||
NotFilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
processingTimeout = timeoutConfig.earlybirdServerTimeout
|
||||
)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||
|
||||
/**
|
||||
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
|
||||
* If the value changes, it will increase the size of the memcache.
|
||||
*/
|
||||
private val DefaultMaxNumTweetPerUser: Int = 100
|
||||
private val FilterOutRetweetsAndReplies = true
|
||||
private val NotFilterOutRetweetsAndReplies = false
|
||||
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
|
||||
|
||||
private def buildEarlybirdRequest(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): EarlybirdRequest =
|
||||
EarlybirdRequest(
|
||||
searchQuery = getThriftSearchQuery(
|
||||
seedUserId = seedUserId,
|
||||
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
|
||||
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
|
||||
processingTimeout = processingTimeout
|
||||
),
|
||||
clientId = Some(EarlybirdClientId),
|
||||
timeoutMs = processingTimeout.inMilliseconds.intValue(),
|
||||
getOlderResults = Some(false),
|
||||
adjustedProtectedRequestParams = None,
|
||||
adjustedFullArchiveRequestParams = None,
|
||||
getProtectedTweetsOnly = Some(false),
|
||||
skipVeryRecentTweets = true,
|
||||
)
|
||||
|
||||
private def getThriftSearchQuery(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): ThriftSearchQuery = ThriftSearchQuery(
|
||||
serializedQuery = GetEarlybirdQuery(
|
||||
None,
|
||||
None,
|
||||
Set.empty,
|
||||
filterOutRetweetsAndReplies
|
||||
).map(_.serialize),
|
||||
fromUserIDFilter64 = Some(Seq(seedUserId)),
|
||||
numResults = maxNumTweetsPerSeedUser,
|
||||
rankingMode = ThriftSearchRankingMode.Recency,
|
||||
collectorParams = Some(
|
||||
CollectorParams(
|
||||
// numResultsToReturn defines how many results each EB shard will return to search root
|
||||
numResultsToReturn = maxNumTweetsPerSeedUser,
|
||||
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
|
||||
terminationParams =
|
||||
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
|
||||
)),
|
||||
facetFieldNames = Some(FacetsToFetch),
|
||||
resultMetadataOptions = Some(MetadataOptions),
|
||||
searchStatusIds = None
|
||||
)
|
||||
|
||||
private def getEarlybirdSearchResult(
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
request: EarlybirdRequest,
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
|
||||
.search(request)
|
||||
.map { response =>
|
||||
response.responseCode match {
|
||||
case EarlybirdResponseCode.Success =>
|
||||
val earlybirdSearchResult =
|
||||
response.searchResults
|
||||
.map {
|
||||
_.results
|
||||
.map(searchResult => searchResult.id)
|
||||
}
|
||||
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
|
||||
earlybirdSearchResult
|
||||
case e =>
|
||||
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
|
||||
Some(Seq.empty)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,195 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.{thriftscala => api}
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object EmbeddingStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
CompactScalaCodec(api.Embedding)
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def twHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
|
||||
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_user_embedding_regular_update_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
|
||||
def twoTowerFavConsumerEmbeddingMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("two_tower_fav_user_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoUserEmbeddingMhStoreName)
|
||||
def debuggerDemoUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_user_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
|
||||
def debuggerDemoTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_tweet_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
private def buildTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object FrsStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.FrsStore)
|
||||
def providesFrsStore(
|
||||
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
|
||||
statsReceiver: StatsReceiver,
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
|
||||
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
|
||||
statsReceiver.scope("follow_recommendations_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import javax.inject.Singleton
|
||||
|
||||
object MHMtlsParamsModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesManhattanMtlsParams(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ManhattanKVClientMtlsParams = {
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
}
|
||||
}
|
@ -0,0 +1,150 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object OfflineCandidateStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020CandidateStore)
|
||||
def offlineTweet2020CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
|
||||
def offlineTweet2020Hl0El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
|
||||
def offlineTweet2020Hl2El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
|
||||
def offlineTweet2020Hl2El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
|
||||
def offlineTweet2020Hl8El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
|
||||
def offlineTweetMTSCandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
|
||||
def offlineFavDecayedSumCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_decayed_sum"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
private def buildOfflineCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
datasetName: String
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, CandidateTweetsList](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("multi_type_simclusters"),
|
||||
DatasetName(datasetName),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphOonStoreModule extends TwitterModule {
|
||||
|
||||
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.userRealGraphOonColumnPath",
|
||||
default = "recommendations/twistly/userRealgraphOon",
|
||||
help = "Strato column path for user real graph OON Store"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphOonStore)
|
||||
def providesRealGraphOonStore(
|
||||
stratoClient: StratoClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
val realGraphOonStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
realGraphOonStratoFetchableStore
|
||||
)(statsReceiver.scope("user_real_graph_oon_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphStoreMhModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphInStore)
|
||||
def providesRealGraphStoreMh(
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, CandidateSeq](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("real_graph_scores_apollo"),
|
||||
Apollo),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 24.hours,
|
||||
)(
|
||||
valueInjection = valueCodec,
|
||||
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
|
||||
keyToString = { k: UserId => s"uRGraph/$k" }
|
||||
)
|
||||
|
||||
DeciderableReadableStore(
|
||||
memCachedStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
|
||||
statsReceiver.scope("RealGraphMh")
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,107 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
|
||||
object RepresentationManagerModule extends TwitterModule {
|
||||
private val ColPathPrefix = "recommendations/representation_manager/"
|
||||
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
|
||||
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
|
||||
def providesRepresentationManagerTweetStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTweetColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavLongestL2EmbeddingTweet,
|
||||
ModelVersion.Model20m145k2020))
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
|
||||
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FavBasedProducer,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_fav_based_producer_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FollowBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_follow_interestedin_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.representationscorer.thriftscala.ListScoreId
|
||||
|
||||
object RepresentationScorerModule extends TwitterModule {
|
||||
|
||||
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
|
||||
|
||||
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
|
||||
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RsxStore)
|
||||
def providesRepresentationScorerStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[(UserId, TweetId), Double] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
|
||||
UserId,
|
||||
TweetId
|
||||
)] { key =>
|
||||
representationScorerStoreKeyMapping(key._1, key._2)
|
||||
}
|
||||
)(statsReceiver.scope("rsx_store"))
|
||||
}
|
||||
|
||||
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
|
||||
ListScoreId(
|
||||
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
|
||||
modelVersion = SimClusterModelVersion,
|
||||
targetEmbeddingType = TweetEmbeddingType,
|
||||
targetId = InternalId.TweetId(t1),
|
||||
candidateEmbeddingType = TweetEmbeddingType,
|
||||
candidateIds = Seq(InternalId.TweetId(t2))
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
|
||||
*/
|
||||
object SimpleSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimpleSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStore = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
3L -> Seq((300L, 3.0), (301L, 3.0))
|
||||
))
|
||||
|
||||
new StandardSimilarityEngine[UserId, (TweetId, Double)](
|
||||
implementingStore = dummyStore,
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
|
||||
*/
|
||||
object LookupSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesLookupSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStoreV1 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
val dummyStoreV2 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
new LookupSimilarityEngine[UserId, (TweetId, Double)](
|
||||
versionedStoreMap = Map(
|
||||
"V1" -> dummyStoreV1,
|
||||
"V2" -> dummyStoreV2
|
||||
),
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclustersann.thriftscala.SimClustersANNService
|
||||
import javax.inject.Named
|
||||
|
||||
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimClustersANNServiceNameToClientMapping(
|
||||
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
|
||||
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
|
||||
Map[String, SimClustersANNService.MethodPerEndpoint](
|
||||
"simclusters-ann" -> simClustersANNServiceProd,
|
||||
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
|
||||
"simclusters-ann-1" -> simClustersANNService1,
|
||||
"simclusters-ann-2" -> simClustersANNService2,
|
||||
"simclusters-ann-3" -> simClustersANNService3,
|
||||
"simclusters-ann-5" -> simClustersANNService5,
|
||||
"simclusters-ann-4" -> simClustersANNService4
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.thriftscala.TopicTopTweets
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweet
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
|
||||
|
||||
/**
|
||||
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
|
||||
*/
|
||||
object SkitStratoStoreModule extends TwitterModule {
|
||||
|
||||
val column = "recommendations/topic_recos/topicTopTweets"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.SkitStratoStoreName)
|
||||
def providesSkitStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
|
||||
val skitStore = ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
|
||||
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
|
||||
topicTopTweets.topTweets
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = skitStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_skit_store"),
|
||||
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "skit_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("skit_in_memory_cache"))
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.hermit.stp.thriftscala.STPResult
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import javax.inject.Named
|
||||
|
||||
object StrongTiePredictionStoreModule extends TwitterModule {
|
||||
|
||||
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.strongTiePredictionColumnPath",
|
||||
default = "onboarding/userrecs/strong_tie_prediction_big",
|
||||
help = "Strato column path for StrongTiePredictionStore"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.StpStore)
|
||||
def providesStrongTiePredictionStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, STPResult] = {
|
||||
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
strongTiePredictionStratoFetchableStore
|
||||
)(statsReceiver.scope("strong_tie_prediction_big_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
|
||||
import javax.inject.Named
|
||||
|
||||
object TripCandidateStoreModule extends TwitterModule {
|
||||
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
|
||||
|
||||
@Provides
|
||||
@Named(ModuleNames.TripCandidateStore)
|
||||
def providesSimClustersTripCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[TripDomain, Seq[TripTweet]] = {
|
||||
val tripCandidateStratoFetchableStore =
|
||||
StratoFetchableStore
|
||||
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
|
||||
.mapValues(_.tweets)
|
||||
|
||||
ObservedReadableStore(
|
||||
tripCandidateStratoFetchableStore
|
||||
)(statsReceiver.scope("simclusters_trip_candidate_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
|
||||
import com.twitter.frigate.common.store.health.UserHealthModelStore
|
||||
import com.twitter.frigate.thriftscala.TweetHealthScores
|
||||
import com.twitter.frigate.thriftscala.UserAgathaScores
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.contentrecommender.store.TweetInfoStore
|
||||
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
|
||||
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
|
||||
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
|
||||
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
|
||||
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.tweetypie.thriftscala.TweetService
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.JavaTimer
|
||||
import com.twitter.util.Timer
|
||||
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetInfoStoreModule extends TwitterModule {
|
||||
implicit val timer: Timer = new JavaTimer(true)
|
||||
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetInfoStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
stratoClient: StratoClient,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
tweetyPieService: TweetService.MethodPerEndpoint,
|
||||
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
|
||||
String,
|
||||
BlueVerifiedAnnotationsV2
|
||||
],
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[TweetId, TweetInfo] = {
|
||||
|
||||
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
|
||||
val underlyingStore =
|
||||
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
|
||||
override def get(
|
||||
k: TweetId
|
||||
): Future[Option[TweetEngagementScores]] = {
|
||||
userTweetGraphPlusService.tweetEngagementScore(k).map {
|
||||
Some(_)
|
||||
}
|
||||
}
|
||||
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(
|
||||
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
|
||||
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
|
||||
val underlyingStore = TweetHealthModelStore.buildReadableStore(
|
||||
stratoClient,
|
||||
Some(
|
||||
TweetHealthModelStoreConfig(
|
||||
enablePBlock = true,
|
||||
enableToxicity = true,
|
||||
enablePSpammy = true,
|
||||
enablePReported = true,
|
||||
enableSpammyTweetContent = true,
|
||||
enablePNegMultimodal = true,
|
||||
))
|
||||
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 2.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetHealthScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
|
||||
keyToString = { k: TweetId => s"tHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
|
||||
statsReceiver.scope("TweetHealthModelStore")
|
||||
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
|
||||
}
|
||||
|
||||
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
|
||||
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
|
||||
statsReceiver.scope("UnderlyingUserHealthModelStore"))
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 18.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserAgathaScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
|
||||
keyToString = { k: UserId => s"uHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
|
||||
statsReceiver.scope("UserHealthModelStore")
|
||||
)
|
||||
}
|
||||
|
||||
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
|
||||
val underlyingStore =
|
||||
UserMediaRepresentationHealthStore.buildReadableStore(
|
||||
manhattanKVClientMtlsParams,
|
||||
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
|
||||
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
|
||||
keyToString = { k: UserId => s"uMRHS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
|
||||
statsReceiver.scope("UserMediaRepresentationHealthStore")
|
||||
)
|
||||
}
|
||||
|
||||
val magicRecsRealTimeAggregatesStore: ReadableStore[
|
||||
TweetId,
|
||||
MagicRecsRealTimeAggregatesScores
|
||||
] = {
|
||||
val underlyingStore =
|
||||
MagicRecsRealTimeAggregatesStore.buildReadableStore(
|
||||
serviceIdentifier,
|
||||
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
|
||||
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
|
||||
)
|
||||
}
|
||||
|
||||
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
|
||||
val underlyingStore = TweetInfoStore(
|
||||
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
|
||||
userMediaRepresentationHealthStore,
|
||||
magicRecsRealTimeAggregatesStore,
|
||||
tweetEngagementScoreStore,
|
||||
blueVerifiedAnnotationStore
|
||||
)(statsReceiver.scope("tweetInfoStore"))
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 15.minutes,
|
||||
// Hydrating tweetInfo is now a required step for all candidates,
|
||||
// hence we needed to tune these thresholds.
|
||||
asyncUpdate = serviceIdentifier.environment == "prod"
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetInfo),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
|
||||
keyToString = { k: TweetId => s"tIS/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 15.minutes,
|
||||
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
|
||||
windowSize = 10000L,
|
||||
cacheName = "tweet_info_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
|
||||
}
|
||||
tweetInfoStore
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
|
||||
|
||||
object TweetRecentEngagedUserStoreModule extends TwitterModule {
|
||||
|
||||
private val tweetRecentEngagedUsersStoreDefaultVersion =
|
||||
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
|
||||
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.tweetRecentEngagedUsersColumnPath",
|
||||
default = "recommendations/twistly/tweetRecentEngagedUsers",
|
||||
help = "Strato column path for TweetRecentEngagedUsersStore"
|
||||
)
|
||||
private type Version = Long
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecentEngagedUserStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
|
||||
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
|
||||
stratoClient,
|
||||
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
|
||||
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
|
||||
|
||||
ObservedReadableStore(
|
||||
tweetRecentEngagedUsersStratoFetchableStore
|
||||
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.hermit.store.common.ReadableWritableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetRecommendationResultsStoreModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecommendationResultsStore(
|
||||
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
|
||||
ObservedReadableWritableMemcacheStore.fromCacheClient(
|
||||
cacheClient = tweetRecommendationResultsCacheClient,
|
||||
ttl = 24.hours)(
|
||||
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
|
||||
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
|
||||
keyToString = { k: UserId => k.toString }
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
|
||||
|
||||
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
|
||||
def providesTwhinCollabFilterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
|
||||
def providesTwhinCollabFilterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
|
||||
def providesTwhinMultiClusterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterFollow20220921")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
|
||||
def providesTwhinMultiClusterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterEng20220921"))
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.simclusters_v2.thriftscala.OrderedClustersAndMembers
|
||||
import javax.inject.Named
|
||||
|
||||
object TwiceClustersMembersStoreModule extends TwitterModule {
|
||||
|
||||
private val twiceClustersMembersColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.twiceClustersMembersColumnPath",
|
||||
default =
|
||||
"recommendations/simclusters_v2/embeddings/TwiceClustersMembersLargestDimApeSimilarity",
|
||||
help = "Strato column path for TweetRecentEngagedUsersStore"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwiceClustersMembersStore)
|
||||
def providesTweetRecentEngagedUserStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, OrderedClustersAndMembers] = {
|
||||
val twiceClustersMembersStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, OrderedClustersAndMembers](
|
||||
stratoClient,
|
||||
twiceClustersMembersColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
twiceClustersMembersStratoFetchableStore
|
||||
)(statsReceiver.scope("twice_clusters_members_largestDimApe_similarity_store"))
|
||||
}
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus_internal.memcache.MemcacheStore
|
||||
import com.twitter.storehaus_internal.util.ClientName
|
||||
import com.twitter.storehaus_internal.util.ZkEndPoint
|
||||
import javax.inject.Named
|
||||
|
||||
object UnifiedCacheClient extends TwitterModule {
|
||||
|
||||
private val TIME_OUT = 20.milliseconds
|
||||
|
||||
val crMixerUnifiedCacheDest: Flag[String] = flag[String](
|
||||
name = "crMixer.unifiedCacheDest",
|
||||
default = "/s/cache/content_recommender_unified_v2",
|
||||
help = "Wily path to Content Recommender unified cache"
|
||||
)
|
||||
|
||||
val tweetRecommendationResultsCacheDest: Flag[String] = flag[String](
|
||||
name = "tweetRecommendationResults.CacheDest",
|
||||
default = "/s/cache/tweet_recommendation_results",
|
||||
help = "Wily path to CrMixer getTweetRecommendations() results cache"
|
||||
)
|
||||
|
||||
val earlybirdTweetsCacheDest: Flag[String] = flag[String](
|
||||
name = "earlybirdTweets.CacheDest",
|
||||
default = "/s/cache/crmixer_earlybird_tweets",
|
||||
help = "Wily path to CrMixer Earlybird Recency Based Similarity Engine result cache"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.UnifiedCache)
|
||||
def provideUnifiedCacheClient(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-content-recommender-unified"),
|
||||
dest = ZkEndPoint(crMixerUnifiedCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TweetRecommendationResultsCache)
|
||||
def providesTweetRecommendationResultsCache(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-tweet-recommendation-results"),
|
||||
dest = ZkEndPoint(tweetRecommendationResultsCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdTweetsCache)
|
||||
def providesEarlybirdTweetsCache(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-crmixer-earlybird-tweets"),
|
||||
dest = ZkEndPoint(earlybirdTweetsCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user