Twitter Recommendation Algorithm

Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
This commit is contained in:
twitter-team
2023-03-31 17:36:31 -05:00
commit ef4c5eb65e
5364 changed files with 460239 additions and 0 deletions

View File

@ -0,0 +1,8 @@
resources(
sources = [
"*.xml",
"*.yml",
"config/*.yml",
],
tags = ["bazel-compatible"],
)

View File

@ -0,0 +1,146 @@
# The keys in this file correspond to the DeciderValues defined in
# https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala
dark_traffic_filter:
comment: Proportion of the requests that are forwarded as dark traffic to the proxy
default_availability: 0
enable_tweet_recommendations_home_product:
comment: Proportion of requests where we return an actual response for TweetRecommendations Home product
default_availability: 10000
enable_tweet_health_score:
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute TweetHealthModelScore"
default_availability: 10000
enable_user_agatha_score:
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute UserHealthModelScore"
default_availability: 10000
enable_user_tweet_entity_graph_traffic:
comment: "Enable the traffic to user entity tweet graph to fetch liked-by tweets candidates"
default_availability: 10000
enable_user_tweet_graph_traffic:
comment: "Enable the traffic to user tweet graph to fetch similar tweets candidates"
default_availability: 10000
enable_user_video_graph_traffic:
comment: "Enable the traffic to user video graph to fetch similar tweets candidates"
default_availability: 10000
enable_user_ad_graph_traffic:
comment: "Enable the traffic to user ad graph to fetch similar tweets candidates"
default_availability: 10000
enable_qig_similar_tweets_traffic:
comment: "Enable the traffic to QIG to fetch similar tweet candidates"
default_availability: 0
enable_frs_traffic:
comment: "Enable the traffic to FRS to fetch user follow recommendations"
default_availability: 0
enable_hydra_dark_traffic:
comment: "Enable dark traffic to hydra"
default_availability: 0
enable_real_graph_mh_store:
comment: "Enable traffic for the real graph manhattan based store"
default_availability: 0
enable_simclusters_ann_experimental_dark_traffic:
comment: "Enable dark traffic to simclusters-ann-experimental"
default_availability: 0
enable_simclusters_ann_2_dark_traffic:
comment: "Enable dark traffic to prod SimClustersANN2"
default_availability: 0
enable_user_state_store:
comment: "Enable traffic user state store to hydrate user state"
default_availability: 0
upper_funnel_per_step_scribe_rate:
comment: "Enable Upper Funnel Event Scribe Sampling (fetch, pre-rank, interleave etc.) for getTweetsRecommendations() endpoint"
default_availability: 0
kafka_message_scribe_sample_rate:
comment: "Gates the production of forked scribe messages to kafka for the async feature hydrator"
default_availability: 0
top_level_api_ddg_metrics_scribe_rate:
comment: "Enable Top Level API DDG Metrics Scribe Sampling for getTweetsRecommendations() endpoint"
default_availability: 0
ads_recommendations_per_experiment_scribe_rate:
comment: "Percentage of DDG traffic to Scribe for getAdsRecommendations() endpoint"
default_availability: 0
enable_loadshedding_getTweetRecommendations:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Notifications:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Email:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet_MoreTweetsModule:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryAuthor:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryAuthor_MoreTweetsModule:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getFrsBasedTweetRecommendations_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getFrsBasedTweetRecommendations_Notifications:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_user_media_representation_store:
comment: "Enable fetching user nudity rate signal from Media Understanding"
default_availability: 0
enable_magic_recs_real_time_aggregates_store:
comment: "Enable fetching real time aggregates features from Magic Recs memcache"
default_availability: 0
enable_utg_realtime_tweet_engagement_score:
comment: "Enable fetching real time tweet engagement score from utg-plus"
default_availability: 0
get_tweet_recommendations_cache_rate:
comment: "Proportion of users where getTweetRecommendations() request and responses will be cached"
default_availability: 1000
enable_earlybird_traffic:
comment: "Enable fetching tweet candidates from Earlybird"
default_availability: 0
enable_scribe_for_blue_verified_tweet_candidates:
comment: "Enable scribing for tweet candidates from Blue Verified users"
default_availability: 0

View File

@ -0,0 +1,168 @@
<configuration>
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
<!-- ===================================================== -->
<!-- Service Config -->
<!-- ===================================================== -->
<property name="DEFAULT_SERVICE_PATTERN"
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
<property name="DEFAULT_ACCESS_PATTERN"
value="%msg"/>
<!-- ===================================================== -->
<!-- Common Config -->
<!-- ===================================================== -->
<!-- JUL/JDK14 to Logback bridge -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>
<!-- ====================================================================================== -->
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
<!-- ====================================================================================== -->
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.service.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
<!-- keep 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.access.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
<!-- keep 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
</encoder>
</appender>
<!--LogLens -->
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/service</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- LogLens Access -->
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/access</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- Pipeline Execution Logs -->
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>allow_listed_pipeline_executions.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
<!-- keep 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- ===================================================== -->
<!-- Primary Async Appenders -->
<!-- ===================================================== -->
<property name="async_queue_size" value="${queue.size:-50000}"/>
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="SERVICE"/>
</appender>
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ACCESS"/>
</appender>
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
</appender>
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS"/>
</appender>
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS-ACCESS"/>
</appender>
<!-- ===================================================== -->
<!-- Package Config -->
<!-- ===================================================== -->
<!-- Per-Package Config -->
<logger name="com.twitter" level="info"/>
<logger name="com.twitter.wilyns" level="warn"/>
<logger name="com.twitter.configbus.client.file" level="off"/>
<logger name="com.twitter.finagle.mux" level="warn"/>
<logger name="com.twitter.finagle.serverset2" level="warn"/>
<logger name="com.twitter.logging.ScribeHandler" level="off"/>
<logger name="com.twitter.zookeeper.client.internal" level="warn"/>
<logger name="io.netty.handler.ssl.SslHandler" level="OFF"/>
<!-- Root Config -->
<root level="${log_level:-INFO}">
<appender-ref ref="ASYNC-SERVICE"/>
<appender-ref ref="ASYNC-LOGLENS"/>
</root>
<!-- Access Logging -->
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter"
level="info"
additivity="false">
<appender-ref ref="ASYNC-ACCESS"/>
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
</logger>
<!-- Pipeline Executions Log -->
<logger name="com.twitter.product_mixer.core.service.pipeline_execution_logger"
level="info"
additivity="false">
<appender-ref ref="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" />
</logger>
</configuration>

View File

@ -0,0 +1,48 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"cr-mixer/server/src/main/resources",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"finagle/finagle-core/src/main",
"finagle/finagle-http/src/main/scala",
"finagle/finagle-thriftmux/src/main/scala",
"finatra-internal/mtls-http/src/main/scala",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/http-core/src/main/java/com/twitter/finatra/http",
"finatra/inject/inject-app/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-server/src/main/scala",
"finatra/inject/inject-utils/src/main/scala",
"finatra/utils/src/main/java/com/twitter/finatra/annotations",
"hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/controllers",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
"src/thrift/com/twitter/timelines/render:thrift-scala",
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms",
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
"timelines/src/main/scala/com/twitter/timelines/features/app",
"twitter-server-internal",
"twitter-server/server/src/main/scala",
"util/util-app/src/main/scala",
"util/util-core:scala",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -0,0 +1,18 @@
package com.twitter.cr_mixer
import com.twitter.finatra.http.routing.HttpWarmup
import com.twitter.finatra.httpclient.RequestBuilder._
import com.twitter.inject.Logging
import com.twitter.inject.utils.Handler
import com.twitter.util.Try
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class CrMixerHttpServerWarmupHandler @Inject() (warmup: HttpWarmup) extends Handler with Logging {
override def handle(): Unit = {
Try(warmup.send(get("/admin/cr-mixer/product-pipelines"), admin = true)())
.onFailure(e => error(e.getMessage, e))
}
}

View File

@ -0,0 +1,229 @@
package com.twitter.cr_mixer
import com.google.inject.Module
import com.twitter.cr_mixer.controller.CrMixerThriftController
import com.twitter.cr_mixer.featureswitch.SetImpressedBucketsLocalContextFilter
import com.twitter.cr_mixer.module.ActivePromotedTweetStoreModule
import com.twitter.cr_mixer.module.CertoStratoStoreModule
import com.twitter.cr_mixer.module.CrMixerParamConfigModule
import com.twitter.cr_mixer.module.EmbeddingStoreModule
import com.twitter.cr_mixer.module.FrsStoreModule
import com.twitter.cr_mixer.module.MHMtlsParamsModule
import com.twitter.cr_mixer.module.OfflineCandidateStoreModule
import com.twitter.cr_mixer.module.RealGraphStoreMhModule
import com.twitter.cr_mixer.module.RealGraphOonStoreModule
import com.twitter.cr_mixer.module.RepresentationManagerModule
import com.twitter.cr_mixer.module.RepresentationScorerModule
import com.twitter.cr_mixer.module.TweetInfoStoreModule
import com.twitter.cr_mixer.module.TweetRecentEngagedUserStoreModule
import com.twitter.cr_mixer.module.TweetRecommendationResultsStoreModule
import com.twitter.cr_mixer.module.TripCandidateStoreModule
import com.twitter.cr_mixer.module.TwhinCollabFilterStratoStoreModule
import com.twitter.cr_mixer.module.UserSignalServiceColumnModule
import com.twitter.cr_mixer.module.UserSignalServiceStoreModule
import com.twitter.cr_mixer.module.UserStateStoreModule
import com.twitter.cr_mixer.module.core.ABDeciderModule
import com.twitter.cr_mixer.module.core.CrMixerFlagModule
import com.twitter.cr_mixer.module.core.CrMixerLoggingABDeciderModule
import com.twitter.cr_mixer.module.core.FeatureContextBuilderModule
import com.twitter.cr_mixer.module.core.FeatureSwitchesModule
import com.twitter.cr_mixer.module.core.KafkaProducerModule
import com.twitter.cr_mixer.module.core.LoggerFactoryModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUnifiedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.SimClustersANNSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUnifiedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedQigSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedTwHINSimlarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserTweetGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserVideoGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TwhinCollabFilterLookupSimilarityEngineModule
import com.twitter.cr_mixer.module.ConsumersBasedUserAdGraphStoreModule
import com.twitter.cr_mixer.module.ConsumersBasedUserTweetGraphStoreModule
import com.twitter.cr_mixer.module.ConsumersBasedUserVideoGraphStoreModule
import com.twitter.cr_mixer.module.DiffusionStoreModule
import com.twitter.cr_mixer.module.EarlybirdRecencyBasedCandidateStoreModule
import com.twitter.cr_mixer.module.TwiceClustersMembersStoreModule
import com.twitter.cr_mixer.module.StrongTiePredictionStoreModule
import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.EarlybirdSearchClientModule
import com.twitter.cr_mixer.module.thrift_client.FrsClientModule
import com.twitter.cr_mixer.module.thrift_client.QigServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.SimClustersAnnServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.TweetyPieClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphPlusClientModule
import com.twitter.cr_mixer.module.thrift_client.UserVideoGraphClientModule
import com.twitter.cr_mixer.{thriftscala => st}
import com.twitter.finagle.Filter
import com.twitter.finatra.annotations.DarkTrafficFilterType
import com.twitter.finatra.decider.modules.DeciderModule
import com.twitter.finatra.http.HttpServer
import com.twitter.finatra.http.routing.HttpRouter
import com.twitter.finatra.jackson.modules.ScalaObjectMapperModule
import com.twitter.finatra.mtls.http.{Mtls => HttpMtls}
import com.twitter.finatra.mtls.thriftmux.Mtls
import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
import com.twitter.finatra.thrift.ThriftServer
import com.twitter.finatra.thrift.filters._
import com.twitter.finatra.thrift.routing.ThriftRouter
import com.twitter.hydra.common.model_config.{ConfigModule => HydraConfigModule}
import com.twitter.inject.thrift.modules.ThriftClientIdModule
import com.twitter.product_mixer.core.module.LoggingThrowableExceptionMapper
import com.twitter.product_mixer.core.module.StratoClientModule
import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule
import com.twitter.relevance_platform.common.filters.ClientStatsFilter
import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
import com.twitter.cr_mixer.module.SimClustersANNServiceNameToClientMapper
import com.twitter.cr_mixer.module.SkitStratoStoreModule
import com.twitter.cr_mixer.module.BlueVerifiedAnnotationStoreModule
import com.twitter.cr_mixer.module.core.TimeoutConfigModule
import com.twitter.cr_mixer.module.grpc_client.NaviGRPCClientModule
import com.twitter.cr_mixer.module.similarity_engine.CertoTopicTweetSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerBasedWalsSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.DiffusionBasedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.EarlybirdSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.SkitTopicTweetSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.UserTweetEntityGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.thrift_client.HydraPartitionClientModule
import com.twitter.cr_mixer.module.thrift_client.HydraRootClientModule
import com.twitter.cr_mixer.module.thrift_client.UserAdGraphClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetEntityGraphClientModule
import com.twitter.thriftwebforms.MethodOptions
object CrMixerServerMain extends CrMixerServer
class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls {
override val name = "cr-mixer-server"
private val coreModules = Seq(
ABDeciderModule,
CrMixerFlagModule,
CrMixerLoggingABDeciderModule,
CrMixerParamConfigModule,
new DarkTrafficFilterModule[st.CrMixer.ReqRepServicePerEndpoint](),
DeciderModule,
FeatureContextBuilderModule,
FeatureSwitchesModule,
KafkaProducerModule,
LoggerFactoryModule,
MHMtlsParamsModule,
ProductMixerFlagModule,
ScalaObjectMapperModule,
ThriftClientIdModule
)
private val thriftClientModules = Seq(
AnnQueryServiceClientModule,
EarlybirdSearchClientModule,
FrsClientModule,
HydraPartitionClientModule,
HydraRootClientModule,
QigServiceClientModule,
SimClustersAnnServiceClientModule,
TweetyPieClientModule,
UserAdGraphClientModule,
UserTweetEntityGraphClientModule,
UserTweetGraphClientModule,
UserTweetGraphPlusClientModule,
UserVideoGraphClientModule,
)
private val grpcClientModules = Seq(
NaviGRPCClientModule
)
// Modules sorted alphabetically, please keep the order when adding a new module
override val modules: Seq[Module] =
coreModules ++ thriftClientModules ++ grpcClientModules ++
Seq(
ActivePromotedTweetStoreModule,
CertoStratoStoreModule,
CertoTopicTweetSimilarityEngineModule,
ConsumersBasedUserAdGraphSimilarityEngineModule,
ConsumersBasedUserTweetGraphStoreModule,
ConsumersBasedUserVideoGraphSimilarityEngineModule,
ConsumersBasedUserVideoGraphStoreModule,
ConsumerEmbeddingBasedTripSimilarityEngineModule,
ConsumerEmbeddingBasedTwHINSimilarityEngineModule,
ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule,
ConsumersBasedUserAdGraphStoreModule,
ConsumerBasedWalsSimilarityEngineModule,
DiffusionStoreModule,
EmbeddingStoreModule,
EarlybirdSimilarityEngineModule,
EarlybirdRecencyBasedCandidateStoreModule,
FrsStoreModule,
HydraConfigModule,
OfflineCandidateStoreModule,
ProducerBasedUnifiedSimilarityEngineModule,
ProducerBasedUserAdGraphSimilarityEngineModule,
ProducerBasedUserTweetGraphSimilarityEngineModule,
RealGraphOonStoreModule,
RealGraphStoreMhModule,
RepresentationManagerModule,
RepresentationScorerModule,
SimClustersANNServiceNameToClientMapper,
SimClustersANNSimilarityEngineModule,
SkitStratoStoreModule,
SkitTopicTweetSimilarityEngineModule,
StratoClientModule,
StrongTiePredictionStoreModule,
TimeoutConfigModule,
TripCandidateStoreModule,
TwiceClustersMembersStoreModule,
TweetBasedQigSimilarityEngineModule,
TweetBasedTwHINSimlarityEngineModule,
TweetBasedUnifiedSimilarityEngineModule,
TweetBasedUserAdGraphSimilarityEngineModule,
TweetBasedUserTweetGraphSimilarityEngineModule,
TweetBasedUserVideoGraphSimilarityEngineModule,
TweetInfoStoreModule,
TweetRecentEngagedUserStoreModule,
TweetRecommendationResultsStoreModule,
TwhinCollabFilterStratoStoreModule,
TwhinCollabFilterLookupSimilarityEngineModule,
UserSignalServiceColumnModule,
UserSignalServiceStoreModule,
UserStateStoreModule,
UserTweetEntityGraphSimilarityEngineModule,
DiffusionBasedSimilarityEngineModule,
BlueVerifiedAnnotationStoreModule,
new MtlsThriftWebFormsModule[st.CrMixer.MethodPerEndpoint](this) {
override protected def defaultMethodAccess: MethodOptions.Access = {
MethodOptions.Access.ByLdapGroup(
Seq(
"cr-mixer-admins",
"recosplat-sensitive-data-medium",
"recos-platform-admins",
))
}
}
)
def configureThrift(router: ThriftRouter): Unit = {
router
.filter[LoggingMDCFilter]
.filter[TraceIdMDCFilter]
.filter[ThriftMDCFilter]
.filter[ClientStatsFilter]
.filter[AccessLoggingFilter]
.filter[SetImpressedBucketsLocalContextFilter]
.filter[ExceptionMappingFilter]
.filter[Filter.TypeAgnostic, DarkTrafficFilterType]
.exceptionMapper[LoggingThrowableExceptionMapper]
.add[CrMixerThriftController]
}
override protected def warmup(): Unit = {
handle[CrMixerThriftServerWarmupHandler]()
handle[CrMixerHttpServerWarmupHandler]()
}
}

View File

@ -0,0 +1,75 @@
package com.twitter.cr_mixer
import com.twitter.finagle.thrift.ClientId
import com.twitter.finatra.thrift.routing.ThriftWarmup
import com.twitter.inject.Logging
import com.twitter.inject.utils.Handler
import com.twitter.product_mixer.core.{thriftscala => pt}
import com.twitter.cr_mixer.{thriftscala => st}
import com.twitter.scrooge.Request
import com.twitter.scrooge.Response
import com.twitter.util.Return
import com.twitter.util.Throw
import com.twitter.util.Try
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class CrMixerThriftServerWarmupHandler @Inject() (warmup: ThriftWarmup)
extends Handler
with Logging {
private val clientId = ClientId("thrift-warmup-client")
def handle(): Unit = {
val testIds = Seq(1, 2, 3)
try {
clientId.asCurrent {
testIds.foreach { id =>
val warmupReq = warmupQuery(id)
info(s"Sending warm-up request to service with query: $warmupReq")
warmup.sendRequest(
method = st.CrMixer.GetTweetRecommendations,
req = Request(st.CrMixer.GetTweetRecommendations.Args(warmupReq)))(assertWarmupResponse)
}
}
} catch {
case e: Throwable =>
// we don't want a warmup failure to prevent start-up
error(e.getMessage, e)
}
info("Warm-up done.")
}
private def warmupQuery(userId: Long): st.CrMixerTweetRequest = {
val clientContext = pt.ClientContext(
userId = Some(userId),
guestId = None,
appId = Some(258901L),
ipAddress = Some("0.0.0.0"),
userAgent = Some("FAKE_USER_AGENT_FOR_WARMUPS"),
countryCode = Some("US"),
languageCode = Some("en"),
isTwoffice = None,
userRoles = None,
deviceId = Some("FAKE_DEVICE_ID_FOR_WARMUPS")
)
st.CrMixerTweetRequest(
clientContext = clientContext,
product = st.Product.Home,
productContext = Some(st.ProductContext.HomeContext(st.HomeContext())),
)
}
private def assertWarmupResponse(
result: Try[Response[st.CrMixer.GetTweetRecommendations.SuccessType]]
): Unit = {
// we collect and log any exceptions from the result.
result match {
case Return(_) => // ok
case Throw(exception) =>
warn("Error performing warm-up request.")
error(exception.getMessage, exception)
}
}
}

View File

@ -0,0 +1,77 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedAdsCandidate
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
import scala.collection.mutable
@Singleton
case class AdsBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Interleaves candidates by iteratively choosing InterestedIn candidates and TWISTLY candidates
* in turn. InterestedIn candidates have no source signal, whereas TWISTLY candidates do. TWISTLY
* candidates themselves are interleaved by source before equal blending with InterestedIn
* candidates.
*/
def blend(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
): Future[Seq[BlendedAdsCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val (interestedInCandidates, twistlyCandidates) =
candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty)
// First interleave twistly candidates
val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates)
val twistlyAndInterestedInCandidates =
Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates)
// then interleave twistly candidates with interested in to make them even
val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
private def buildBlendedAdsCandidate(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
interleavedCandidates: Seq[InitialAdsCandidate]
): Seq[BlendedAdsCandidate] = {
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
interleavedCandidates.map { interleavedCandidate =>
interleavedCandidate.toBlendedAdsCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
}
}
private def buildCandidateToCGInfosMap(
candidateSeq: Seq[Seq[InitialAdsCandidate]],
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
candidateSeq.foreach { candidates =>
candidates.foreach { candidate =>
val candidateGenerationInfoSeq = {
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
}
val candidateGenerationInfo = candidate.candidateGenerationInfo
tweetIdMap.put(
candidate.tweetId,
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
}
}
tweetIdMap.toMap
}
}

View File

@ -0,0 +1,20 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
],
)

View File

@ -0,0 +1,48 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.simclusters_v2.common.TweetId
import scala.collection.mutable
object BlendedCandidatesBuilder {
/**
* @param inputCandidates input candidate prior to interleaving
* @param interleavedCandidates after interleaving. These tweets are de-duplicated.
*/
def build(
inputCandidates: Seq[Seq[InitialCandidate]],
interleavedCandidates: Seq[InitialCandidate]
): Seq[BlendedCandidate] = {
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
interleavedCandidates.map { interleavedCandidate =>
interleavedCandidate.toBlendedCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
}
}
/**
* The same tweet can be generated by different sources.
* This function tells you which CandidateGenerationInfo generated a given tweet
*/
private def buildCandidateToCGInfosMap(
candidateSeq: Seq[Seq[InitialCandidate]],
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
candidateSeq.foreach { candidates =>
candidates.foreach { candidate =>
val candidateGenerationInfoSeq = {
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
}
val candidateGenerationInfo = candidate.candidateGenerationInfo
tweetIdMap.put(
candidate.tweetId,
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
}
}
tweetIdMap.toMap
}
}

View File

@ -0,0 +1,121 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Exposes multiple types of sorting relying only on Content Based signals
* Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores
* that come from the active SimilarityEngine and then sort on the standardized scores.
*/
def blend(
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match {
case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency =>
candidates.flatten.sortBy(c => getSnowflakeTimeStamp(c.tweetId)).reverse
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting =>
candidates.flatten.sortBy(_ => scala.util.Random.nextDouble())
case BlenderParams.ContentBasedSortingAlgorithmEnum.FavoriteCount =>
candidates.flatten.sortBy(-_.tweetInfo.favCount)
case BlenderParams.ContentBasedSortingAlgorithmEnum.SimilarityToSignalSorting =>
standardizeAndSortByScore(flattenAndGroupByEngineTypeOrFirstContribEngine(candidates))
case _ =>
candidates.flatten.sortBy(-_.tweetInfo.favCount)
}
stats.stat("candidates").add(sortedCandidates.size)
val blendedCandidates =
BlendedCandidatesBuilder.build(inputCandidates, removeDuplicates(sortedCandidates))
Future.value(blendedCandidates)
}
private def removeDuplicates(candidates: Seq[InitialCandidate]): Seq[InitialCandidate] = {
val seen = collection.mutable.Set.empty[Long]
candidates.filter { c =>
if (seen.contains(c.tweetId)) {
false
} else {
seen += c.tweetId
true
}
}
}
private def groupByEngineTypeOrFirstContribEngine(
candidates: Seq[InitialCandidate]
): Map[SimilarityEngineType, Seq[InitialCandidate]] = {
val grouped = candidates.groupBy { candidate =>
val contrib = candidate.candidateGenerationInfo.contributingSimilarityEngines
if (contrib.nonEmpty) {
contrib.head.similarityEngineType
} else {
candidate.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
}
}
grouped
}
private def flattenAndGroupByEngineTypeOrFirstContribEngine(
candidates: Seq[Seq[InitialCandidate]]
): Seq[Seq[InitialCandidate]] = {
val flat = candidates.flatten
val grouped = groupByEngineTypeOrFirstContribEngine(flat)
grouped.values.toSeq
}
private def standardizeAndSortByScore(
candidates: Seq[Seq[InitialCandidate]]
): Seq[InitialCandidate] = {
candidates
.map { innerSeq =>
val meanScore = innerSeq
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
.sum / innerSeq.length
val stdDev = scala.math
.sqrt(
innerSeq
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
.map(a => a - meanScore)
.map(a => a * a)
.sum / innerSeq.length)
innerSeq
.map(c =>
(
c,
c.candidateGenerationInfo.similarityEngineInfo.score
.map { score =>
if (stdDev != 0) (score - meanScore) / stdDev
else 0.0
}
.getOrElse(0.0)))
}.flatten.sortBy { case (_, standardizedScore) => -standardizedScore }
.map { case (candidate, _) => candidate }
}
private def getSnowflakeTimeStamp(tweetId: Long): Time = {
val isSnowflake = SnowflakeId.isSnowflakeId(tweetId)
if (isSnowflake) {
SnowflakeId(tweetId).time
} else {
Time.fromMilliseconds(0L)
}
}
}

View File

@ -0,0 +1,90 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.util.CountWeightedInterleaveUtil
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/**
* A weighted round robin interleaving algorithm.
* The weight of each blending group based on the count of candidates in each blending group.
* The more candidates under a blending group, the more candidates are selected from it during round
* robin, which in effect prioritizes this group.
*
* Weights sum up to 1. For example:
* total candidates = 8
* Group Weight
* [A1, A2, A3, A4] 4/8 = 0.5 // select 50% of results from group A
* [B1, B2] 2/8 = 0.25 // 25% from group B
* [C1, C2] 2/8 = 0.25 // 25% from group C
*
* Blended results = [A1, A2, B1, C1, A3, A4, B2, C2]
* See @linht's go/weighted-interleave
*/
@Singleton
case class CountWeightedInterleaveBlender @Inject() (globalStats: StatsReceiver) {
import CountWeightedInterleaveBlender._
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
def blend(
query: CrCandidateGeneratorQuery,
inputCandidates: Seq[Seq[InitialCandidate]]
): Future[Seq[BlendedCandidate]] = {
val weightedBlenderQuery = CountWeightedInterleaveBlender.paramToQuery(query.params)
countWeightedInterleave(weightedBlenderQuery, inputCandidates)
}
private[blender] def countWeightedInterleave(
query: WeightedBlenderQuery,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
val candidatesAndWeightKeyByIndexId: Seq[(Seq[InitialCandidate], Double)] = {
CountWeightedInterleaveUtil.buildInitialCandidatesWithWeightKeyByFeature(
inputCandidates,
query.rankerWeightShrinkage)
}
val interleavedCandidates =
InterleaveUtil.weightedInterleave(candidatesAndWeightKeyByIndexId, query.maxWeightAdjustments)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}
object CountWeightedInterleaveBlender {
/**
* We pass two parameters to the weighted interleaver:
* @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we
* stay to uniform sampling. The bigger the shrinkage the
* closer we are to uniform round robin
* @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to
* uniform. Set so that we avoid infinite loops (e.g. if weights are
* 0)
*/
case class WeightedBlenderQuery(
rankerWeightShrinkage: Double,
maxWeightAdjustments: Int)
def paramToQuery(params: Params): WeightedBlenderQuery = {
val rankerWeightShrinkage: Double =
params(BlenderParams.RankingInterleaveWeightShrinkageParam)
val maxWeightAdjustments: Int =
params(BlenderParams.RankingInterleaveMaxWeightAdjustments)
WeightedBlenderQuery(rankerWeightShrinkage, maxWeightAdjustments)
}
}

View File

@ -0,0 +1,33 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class InterleaveBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Interleaves candidates, by taking 1 candidate from each Seq[Seq[InitialCandidate]] in sequence,
* until we run out of candidates.
*/
def blend(
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
val interleavedCandidates = InterleaveUtil.interleave(inputCandidates)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}

View File

@ -0,0 +1,64 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypes
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypesWithVideo
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Partition the candidates based on source type
* Interleave the two partitions of candidates separately
* Then append the back fill candidates to the end
*/
def blend(
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val backFillSourceTypes =
if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo
else BackFillSourceTypes
// partition candidates based on their source types
val (backFillCandidates, regularCandidates) =
candidates.partition(
_.head.candidateGenerationInfo.sourceInfoOpt
.exists(sourceInfo => backFillSourceTypes.contains(sourceInfo.sourceType)))
val interleavedRegularCandidates = InterleaveUtil.interleave(regularCandidates)
val interleavedBackFillCandidates =
InterleaveUtil.interleave(backFillCandidates)
stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size)
// Append interleaved backfill candidates to the end
val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}
object ImplicitSignalBackFillBlender {
final val BackFillSourceTypesWithVideo: Set[SourceType] = Set(
SourceType.UserRepeatedProfileVisit,
SourceType.VideoTweetPlayback50,
SourceType.VideoTweetQualityView)
final val BackFillSourceTypes: Set[SourceType] = Set(SourceType.UserRepeatedProfileVisit)
}

View File

@ -0,0 +1,81 @@
package com.twitter.cr_mixer.blender
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.param.BlenderParams.BlendingAlgorithmEnum
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class SwitchBlender @Inject() (
defaultBlender: InterleaveBlender,
sourceTypeBackFillBlender: SourceTypeBackFillBlender,
adsBlender: AdsBlender,
contentSignalBlender: ContentSignalBlender,
globalStats: StatsReceiver) {
private val stats = globalStats.scope(this.getClass.getCanonicalName)
def blend(
params: Params,
userState: UserState,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Take out empty seq
val nonEmptyCandidates = inputCandidates.collect {
case candidates if candidates.nonEmpty =>
candidates
}
stats.stat("num_of_sequences").add(inputCandidates.size)
// Sort the seqs in an order
val innerSignalSorting = params(BlenderParams.SignalTypeSortingAlgorithmParam) match {
case BlenderParams.ContentBasedSortingAlgorithmEnum.SourceSignalRecency =>
SwitchBlender.TimestampOrder
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => SwitchBlender.RandomOrder
case _ => SwitchBlender.TimestampOrder
}
val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting)
// Blend based on specified blender rules
params(BlenderParams.BlendingAlgorithmParam) match {
case BlendingAlgorithmEnum.RoundRobin =>
defaultBlender.blend(candidatesToBlend)
case BlendingAlgorithmEnum.SourceTypeBackFill =>
sourceTypeBackFillBlender.blend(params, candidatesToBlend)
case BlendingAlgorithmEnum.SourceSignalSorting =>
contentSignalBlender.blend(params, candidatesToBlend)
case _ => defaultBlender.blend(candidatesToBlend)
}
}
}
object SwitchBlender {
/**
* Prefers candidates generated from sources with the latest timestamps.
* The newer the source signal, the higher a candidate ranks.
* This ordering biases against consumer-based candidates because their timestamp defaults to 0
*
* Within a Seq[Seq[Candidate]], all candidates within a inner Seq
* are guaranteed to have the same sourceInfo because they are grouped by (sourceInfo, SE model).
* Hence, we can pick .headOption to represent the whole list when filtering by the internalId of the sourceInfoOpt.
* But of course the similarityEngine score in a CGInfo could be different.
*/
val TimestampOrder: Ordering[InitialCandidate] =
math.Ordering
.by[InitialCandidate, Time](
_.candidateGenerationInfo.sourceInfoOpt
.flatMap(_.sourceEventTime)
.getOrElse(Time.fromMilliseconds(0L)))
.reverse
private val RandomOrder: Ordering[InitialCandidate] =
Ordering.by[InitialCandidate, Double](_ => scala.util.Random.nextDouble())
}

View File

@ -0,0 +1,140 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.blender.AdsBlender
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.BlendedAdsCandidate
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.RankedAdsCandidate
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.param.AdsParams
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
import com.twitter.cr_mixer.source_signal.UssSourceSignalFetcher
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class AdsCandidateGenerator @Inject() (
ussSourceSignalFetcher: UssSourceSignalFetcher,
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
adsCandidateSourceRouter: AdsCandidateSourcesRouter,
adsBlender: AdsBlender,
scribeLogger: AdsRecommendationsScribeLogger,
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSourcesStats = stats.scope("fetchSources")
private val fetchRealGraphSeedsStats = stats.scope("fetchRealGraphSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val interleaveStats = stats.scope("interleave")
private val rankStats = stats.scope("rank")
def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
// fetch source signals
sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) {
fetchSources(query)
}
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) {
fetchSeeds(query)
}
// get initial candidates from similarity engines
// hydrate lineItemInfo and filter out non active ads
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, sourceSignals, realGraphSeeds)
}
// blend candidates
blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
interleave(initialCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
rank(
blendedCandidates,
query.params(AdsParams.EnableScoreBoost),
query.params(AdsParams.AdsCandidateGenerationScoreBoostFactor),
rankStats)
}
} yield {
rankedCandidates.take(query.maxNumResults)
}
}
}
}
def fetchSources(
query: AdsCandidateGeneratorQuery
): Future[Set[SourceInfo]] = {
val fetcherQuery =
FetcherQuery(query.userId, query.product, query.userState, query.params)
ussSourceSignalFetcher.get(fetcherQuery).map(_.getOrElse(Seq.empty).toSet)
}
private def fetchCandidates(
query: AdsCandidateGeneratorQuery,
sourceSignals: Set[SourceInfo],
realGraphSeeds: Map[UserId, Double]
): Future[Seq[Seq[InitialAdsCandidate]]] = {
scribeLogger.scribeInitialAdsCandidates(
query,
adsCandidateSourceRouter
.fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params),
query.params(AdsParams.EnableScribe)
)
}
private def fetchSeeds(
query: AdsCandidateGeneratorQuery
): Future[Map[UserId, Double]] = {
if (query.params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
realGraphInSourceGraphFetcher
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
} else Future.value(Map.empty[UserId, Double])
}
private def interleave(
candidates: Seq[Seq[InitialAdsCandidate]]
): Future[Seq[BlendedAdsCandidate]] = {
adsBlender
.blend(candidates)
}
private def rank(
candidates: Seq[BlendedAdsCandidate],
enableScoreBoost: Boolean,
scoreBoostFactor: Double,
statsReceiver: StatsReceiver,
): Future[Seq[RankedAdsCandidate]] = {
val candidateSize = candidates.size
val rankedCandidates = candidates.zipWithIndex.map {
case (candidate, index) =>
val score = 0.5 + 0.5 * ((candidateSize - index).toDouble / candidateSize)
val boostedScore = if (enableScoreBoost) {
statsReceiver.stat("boostedScore").add((100.0 * score * scoreBoostFactor).toFloat)
score * scoreBoostFactor
} else {
statsReceiver.stat("score").add((100.0 * score).toFloat)
score
}
candidate.toRankedAdsCandidate(boostedScore)
}
Future.value(rankedCandidates)
}
}

View File

@ -0,0 +1,516 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.ModelConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.InterestedInParams
import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.SimClustersANNParams
import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.FilterUtil
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class AdsCandidateSourcesRouter @Inject() (
activePromotedTweetStore: ReadableStore[TweetId, Seq[LineItemInfo]],
decider: CrMixerDecider,
@Named(ModuleNames.SimClustersANNSimilarityEngine) simClustersANNSimilarityEngine: StandardSimilarityEngine[
Query,
TweetWithScore
],
@Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine)
tweetBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
TweetBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine)
consumersBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
ConsumersBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine)
producerBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine)
tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) consumerTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
ConsumerBasedWalsSimilarityEngine.Query,
TweetWithScore
],
globalStats: StatsReceiver,
) {
import AdsCandidateSourcesRouter._
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
def fetchCandidates(
requestUserId: UserId,
sourceSignals: Set[SourceInfo],
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
val tweetBasedSANNMinScore = params(
TweetBasedCandidateGenerationParams.SimClustersMinScoreParam)
val tweetBasedSANN1Candidates =
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN1ConfigId,
tweetBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val tweetBasedSANN2Candidates =
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN2ConfigId,
tweetBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val tweetBasedUagCandidates =
if (params(TweetBasedCandidateGenerationParams.EnableUAGParam)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getTweetBasedUserAdGraphCandidates(Some(sourceInfo), params)
})
} else Future.value(Seq.empty)
val realGraphInNetworkBasedUagCandidates =
if (params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
getRealGraphConsumersBasedUserAdGraphCandidates(realGraphSeeds, params).map(Seq(_))
} else Future.value(Seq.empty)
val producerBasedUagCandidates =
if (params(ProducerBasedCandidateGenerationParams.EnableUAGParam)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getProducerBasedUserAdGraphCandidates(Some(sourceInfo), params)
})
} else Future.value(Seq.empty)
val tweetBasedTwhinAdsCandidates =
if (params(TweetBasedCandidateGenerationParams.EnableTwHINParam)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getTwHINAdsCandidates(
tweetBasedTwHINANNSimilarityEngine,
SimilarityEngineType.TweetBasedTwHINANN,
requestUserId,
Some(sourceInfo),
ModelConfig.DebuggerDemo)
})
} else Future.value(Seq.empty)
val producerBasedSANNMinScore = params(
ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam)
val producerBasedSANN1Candidates =
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN1ConfigId,
producerBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val producerBasedSANN2Candidates =
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN2ConfigId,
producerBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val interestedInMinScore = params(InterestedInParams.MinScoreParam)
val interestedInSANN1Candidates = if (params(InterestedInParams.EnableSimClustersANN1Param)) {
getSimClustersANNCandidates(
requestUserId,
None,
params,
simClustersANN1ConfigId,
interestedInMinScore).map(Seq(_))
} else Future.value(Seq.empty)
val interestedInSANN2Candidates = if (params(InterestedInParams.EnableSimClustersANN2Param)) {
getSimClustersANNCandidates(
requestUserId,
None,
params,
simClustersANN2ConfigId,
interestedInMinScore).map(Seq(_))
} else Future.value(Seq.empty)
val consumerTwHINAdsCandidates =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
getTwHINAdsCandidates(
consumerTwHINANNSimilarityEngine,
SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN,
requestUserId,
None,
ModelConfig.DebuggerDemo).map(Seq(_))
} else Future.value(Seq.empty)
val consumerBasedWalsCandidates =
if (params(
ConsumerBasedWalsParams.EnableSourceParam
)) {
getConsumerBasedWalsCandidates(sourceSignals, params)
}.map {
Seq(_)
}
else Future.value(Seq.empty)
Future
.collect(Seq(
tweetBasedSANN1Candidates,
tweetBasedSANN2Candidates,
tweetBasedUagCandidates,
tweetBasedTwhinAdsCandidates,
producerBasedUagCandidates,
producerBasedSANN1Candidates,
producerBasedSANN2Candidates,
realGraphInNetworkBasedUagCandidates,
interestedInSANN1Candidates,
interestedInSANN2Candidates,
consumerTwHINAdsCandidates,
consumerBasedWalsCandidates,
)).map(_.flatten).map { tweetsWithCGInfoSeq =>
Future.collect(
tweetsWithCGInfoSeq.map(candidates => convertToInitialCandidates(candidates, stats)))
}.flatten.map { candidatesLists =>
val result = candidatesLists.filter(_.nonEmpty)
stats.stat("numOfSequences").add(result.size)
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
result
}
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
stats: StatsReceiver
): Future[Seq[InitialAdsCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
stats.stat("initialCandidateSizeBeforeLineItemFilter").add(tweetIds.size)
Future.collect(activePromotedTweetStore.multiGet(tweetIds)).map { lineItemInfos =>
/** *
* If lineItemInfo does not exist, we will filter out the promoted tweet as it cannot be targeted and ranked in admixer
*/
val filteredCandidates = candidates.collect {
case candidate if lineItemInfos.getOrElse(candidate.tweetId, None).isDefined =>
val lineItemInfo = lineItemInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialAdsCandidate(
tweetId = candidate.tweetId,
lineItemInfo = lineItemInfo,
candidate.candidateGenerationInfo
)
}
stats.stat("initialCandidateSizeAfterLineItemFilter").add(filteredCandidates.size)
filteredCandidates
}
}
private[candidate_generation] def getSimClustersANNCandidates(
requestUserId: UserId,
sourceInfo: Option[SourceInfo],
params: configapi.Params,
configId: String,
minScore: Double
) = {
val simClustersModelVersion =
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
val embeddingType =
if (sourceInfo.isEmpty) {
params(InterestedInParams.InterestedInEmbeddingIdParam).embeddingType
} else getSimClustersANNEmbeddingType(sourceInfo.get)
val query = SimClustersANNSimilarityEngine.fromParams(
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
embeddingType,
simClustersModelVersion,
configId,
params
)
// dark traffic to simclusters-ann-2
if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) {
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val sann2Query = SimClustersANNSimilarityEngine.fromParams(
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params
)
simClustersANNSimilarityEngine
.getCandidates(sann2Query)
}
simClustersANNSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.filter(_.score > minScore).map {
tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getProducerBasedUserAdGraphCandidates(
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
)
producerBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = ProducerBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getTweetBasedUserAdGraphCandidates(
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = TweetBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
)
tweetBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = TweetBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getRealGraphConsumersBasedUserAdGraphCandidates(
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
) = {
val query = ConsumersBasedUserAdGraphSimilarityEngine
.fromParams(realGraphSeeds, params)
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
val sourceInfo = SourceInfo(
sourceType = SourceType.RealGraphIn,
internalId = InternalId.UserId(0L),
sourceEventTime = None
)
consumersBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = ConsumersBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(sourceInfo),
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
})
}
private[candidate_generation] def getTwHINAdsCandidates(
similarityEngine: HnswANNSimilarityEngine,
similarityEngineType: SimilarityEngineType,
requestUserId: UserId,
sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine
model: String
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
val internalId =
if (sourceInfo.nonEmpty) sourceInfo.get.internalId else InternalId.UserId(requestUserId)
similarityEngine
.getCandidates(buildHnswANNQuery(internalId, model)).map(_.getOrElse(Seq.empty)).map(_.map {
tweetWithScore =>
val similarityEngineInfo = SimilarityEngineInfo(
similarityEngineType = similarityEngineType,
modelId = Some(model),
score = Some(tweetWithScore.score))
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getConsumerBasedWalsCandidates(
sourceSignals: Set[SourceInfo],
params: configapi.Params
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
// Fetch source signals and filter them based on age.
val signals = FilterUtil.tweetSourceAgeFilter(
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
consumerBasedWalsSimilarityEngine.getScopedStats
.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
} yield tweetsWithCandidateGenerationInfoOpt.toSeq.flatten
}
}
object AdsCandidateSourcesRouter {
def getSimClustersANNEmbeddingType(
sourceInfo: SourceInfo
): EmbeddingType = {
sourceInfo.sourceType match {
case SourceType.TweetFavorite | SourceType.Retweet | SourceType.OriginalTweet |
SourceType.Reply | SourceType.TweetShare | SourceType.NotificationClick |
SourceType.GoodTweetClick | SourceType.VideoTweetQualityView |
SourceType.VideoTweetPlayback50 =>
EmbeddingType.LogFavLongestL2EmbeddingTweet
case SourceType.UserFollow | SourceType.UserRepeatedProfileVisit | SourceType.RealGraphOon |
SourceType.FollowRecommendation | SourceType.UserTrafficAttributionProfileVisit |
SourceType.GoodProfileClick | SourceType.TwiceUserId =>
EmbeddingType.FavBasedProducer
case _ => throw new IllegalArgumentException("sourceInfo.sourceType not supported")
}
}
def buildHnswANNQuery(internalId: InternalId, modelId: String): HnswANNEngineQuery = {
HnswANNEngineQuery(
sourceId = internalId,
modelId = modelId,
params = Params.Empty
)
}
def getConsumerBasedWalsSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.TweetDontLike.value, //currently no-op
SourceType.TweetReport.value, //currently no-op
SourceType.AccountMute.value, //currently no-op
SourceType.AccountBlock.value //currently no-op
)
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
}

View File

@ -0,0 +1,51 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"ann/src/main/scala/com/twitter/ann/hnsw",
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"cuad/projects/hashspace/thrift:thrift-scala",
"decider/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
"frigate/frigate-common:base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/constants",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
"src/scala/com/twitter/ml/featurestore/lib",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/ml/api:embedding-scala",
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"strato/config/columns/cuad/hashspace:hashspace-strato-client",
],
)

View File

@ -0,0 +1,536 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModelConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.model.TripTweetWithScore
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.EngineQuery
import com.twitter.cr_mixer.similarity_engine.FilterUtil
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TripEngineQuery
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/**
* Route the SourceInfo to the associated Candidate Engines.
*/
@Singleton
case class CandidateSourcesRouter @Inject() (
customizedRetrievalCandidateGeneration: CustomizedRetrievalCandidateGeneration,
simClustersInterestedInCandidateGeneration: SimClustersInterestedInCandidateGeneration,
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine)
tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine)
producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine)
consumerEmbeddingBasedTripSimilarityEngine: StandardSimilarityEngine[
TripEngineQuery,
TripTweetWithScore
],
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine)
consumerBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine)
consumerBasedTwoTowerSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine)
consumersBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[
ConsumersBasedUserVideoGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
UserTweetEntityGraphSimilarityEngine.Query,
TweetWithScoreAndSocialProof
],
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
ConsumerBasedWalsSimilarityEngine.Query,
TweetWithScore
],
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver,
) {
import CandidateSourcesRouter._
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
def fetchCandidates(
requestUserId: UserId,
sourceSignals: Set[SourceInfo],
sourceGraphs: Map[String, Option[GraphSourceInfo]],
params: configapi.Params,
): Future[Seq[Seq[InitialCandidate]]] = {
val tweetBasedCandidatesFuture = getCandidates(
getTweetBasedSourceInfo(sourceSignals),
params,
TweetBasedUnifiedSimilarityEngine.fromParams,
tweetBasedUnifiedSimilarityEngine.getCandidates)
val producerBasedCandidatesFuture =
getCandidates(
getProducerBasedSourceInfo(sourceSignals),
params,
ProducerBasedUnifiedSimilarityEngine.fromParams(_, _),
producerBasedUnifiedSimilarityEngine.getCandidates
)
val simClustersInterestedInBasedCandidatesFuture =
getCandidatesPerSimilarityEngineModel(
requestUserId,
params,
SimClustersInterestedInCandidateGeneration.fromParams,
simClustersInterestedInCandidateGeneration.get)
val consumerEmbeddingBasedLogFavBasedTripCandidatesFuture =
if (params(
ConsumerEmbeddingBasedCandidateGenerationParams.EnableLogFavBasedSimClustersTripParam)) {
getSimClustersTripCandidates(
params,
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
ModelConfig.ConsumerLogFavBasedInterestedInEmbedding,
InternalId.UserId(requestUserId),
params
),
consumerEmbeddingBasedTripSimilarityEngine
).map {
Seq(_)
}
} else
Future.Nil
val consumersBasedUvgRealGraphInCandidatesFuture =
if (params(ConsumersBasedUserVideoGraphParams.EnableSourceParam)) {
val realGraphInGraphSourceInfoOpt =
getGraphSourceInfoBySourceType(SourceType.RealGraphIn.name, sourceGraphs)
getGraphBasedCandidates(
params,
ConsumersBasedUserVideoGraphSimilarityEngine
.fromParamsForRealGraphIn(
realGraphInGraphSourceInfoOpt
.map { graphSourceInfo => graphSourceInfo.seedWithScores }.getOrElse(Map.empty),
params),
consumersBasedUserVideoGraphSimilarityEngine,
ConsumersBasedUserVideoGraphSimilarityEngine.toSimilarityEngineInfo,
realGraphInGraphSourceInfoOpt
).map {
Seq(_)
}
} else Future.Nil
val consumerEmbeddingBasedFollowBasedTripCandidatesFuture =
if (params(
ConsumerEmbeddingBasedCandidateGenerationParams.EnableFollowBasedSimClustersTripParam)) {
getSimClustersTripCandidates(
params,
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
ModelConfig.ConsumerFollowBasedInterestedInEmbedding,
InternalId.UserId(requestUserId),
params
),
consumerEmbeddingBasedTripSimilarityEngine
).map {
Seq(_)
}
} else
Future.Nil
val consumerBasedWalsCandidatesFuture =
if (params(
ConsumerBasedWalsParams.EnableSourceParam
)) {
getConsumerBasedWalsCandidates(sourceSignals, params)
}.map { Seq(_) }
else Future.Nil
val consumerEmbeddingBasedTwHINCandidatesFuture =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
getHnswCandidates(
params,
ConsumerEmbeddingBasedTwHINSimilarityEngine.fromParams(
InternalId.UserId(requestUserId),
params),
consumerBasedTwHINANNSimilarityEngine
).map { Seq(_) }
} else Future.Nil
val consumerEmbeddingBasedTwoTowerCandidatesFuture =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwoTowerParam)) {
getHnswCandidates(
params,
ConsumerEmbeddingBasedTwoTowerSimilarityEngine.fromParams(
InternalId.UserId(requestUserId),
params),
consumerBasedTwoTowerSimilarityEngine
).map {
Seq(_)
}
} else Future.Nil
val customizedRetrievalBasedCandidatesFuture =
getCandidatesPerSimilarityEngineModel(
requestUserId,
params,
CustomizedRetrievalCandidateGeneration.fromParams,
customizedRetrievalCandidateGeneration.get)
Future
.collect(
Seq(
tweetBasedCandidatesFuture,
producerBasedCandidatesFuture,
simClustersInterestedInBasedCandidatesFuture,
consumerBasedWalsCandidatesFuture,
consumerEmbeddingBasedLogFavBasedTripCandidatesFuture,
consumerEmbeddingBasedFollowBasedTripCandidatesFuture,
consumerEmbeddingBasedTwHINCandidatesFuture,
consumerEmbeddingBasedTwoTowerCandidatesFuture,
consumersBasedUvgRealGraphInCandidatesFuture,
customizedRetrievalBasedCandidatesFuture
)).map { candidatesList =>
// remove empty innerSeq
val result = candidatesList.flatten.filter(_.nonEmpty)
stats.stat("numOfSequences").add(result.size)
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
result
}
}
private def getGraphBasedCandidates[QueryType](
params: configapi.Params,
query: EngineQuery[QueryType],
engine: StandardSimilarityEngine[QueryType, TweetWithScore],
toSimilarityEngineInfo: Double => SimilarityEngineInfo,
graphSourceInfoOpt: Option[GraphSourceInfo] = None
): Future[Seq[InitialCandidate]] = {
val candidatesOptFut = engine.getCandidates(query)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
engine.getScopedStats.stat("sortedCandidates_size").add(sortedCandidates.size)
val tweetsWithCandidateGenerationInfo = sortedCandidates.map { tweetWithScore =>
{
val similarityEngineInfo = toSimilarityEngineInfo(tweetWithScore.score)
val sourceInfo = graphSourceInfoOpt.map { graphSourceInfo =>
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
SourceInfo(
sourceType = graphSourceInfo.sourceType,
internalId = InternalId.UserId(0L),
sourceEventTime = None
)
}
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getCandidates[QueryType](
sourceSignals: Set[SourceInfo],
params: configapi.Params,
fromParams: (SourceInfo, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
val queries = sourceSignals.map { sourceInfo =>
fromParams(sourceInfo, params)
}.toSeq
Future
.collect {
queries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(candidates.toSeq.flatten)
} yield {
prefilterCandidates
}
}
}
}
private def getConsumerBasedWalsCandidates(
sourceSignals: Set[SourceInfo],
params: configapi.Params
): Future[Seq[InitialCandidate]] = {
// Fetch source signals and filter them based on age.
val signals = FilterUtil.tweetSourceAgeFilter(
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
consumerBasedWalsSimilarityEngine.getScopedStats
.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getSimClustersTripCandidates(
params: configapi.Params,
query: TripEngineQuery,
engine: StandardSimilarityEngine[
TripEngineQuery,
TripTweetWithScore
],
): Future[Seq[InitialCandidate]] = {
val tweetsWithCandidatesGenerationInfoOptFut =
engine.getCandidates(EngineQuery(query, params)).map {
_.map {
_.map { tweetWithScore =>
// define filters
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
SimilarityEngineType.ExploreTripOfflineSimClustersTweets,
None,
Some(tweetWithScore.score)),
Seq.empty
)
)
}
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidatesGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getHnswCandidates(
params: configapi.Params,
query: HnswANNEngineQuery,
engine: HnswANNSimilarityEngine,
): Future[Seq[InitialCandidate]] = {
val candidatesOptFut = engine.getCandidates(query)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
engine.getScopedStats.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
engine.toSimilarityEngineInfo(query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
/**
* Returns candidates from each similarity engine separately.
* For 1 requestUserId, it will fetch results from each similarity engine e_i,
* and returns Seq[Seq[TweetCandidate]].
*/
private def getCandidatesPerSimilarityEngineModel[QueryType](
requestUserId: UserId,
params: configapi.Params,
fromParams: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[
Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]
]
): Future[Seq[Seq[InitialCandidate]]] = {
val query = fromParams(InternalId.UserId(requestUserId), params)
getFunc(query).flatMap { candidatesPerSimilarityEngineModelOpt =>
val candidatesPerSimilarityEngineModel = candidatesPerSimilarityEngineModelOpt.toSeq.flatten
Future.collect {
candidatesPerSimilarityEngineModel.map(convertToInitialCandidates)
}
}
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}
object CandidateSourcesRouter {
def getGraphSourceInfoBySourceType(
sourceTypeStr: String,
sourceGraphs: Map[String, Option[GraphSourceInfo]]
): Option[GraphSourceInfo] = {
sourceGraphs.getOrElse(sourceTypeStr, None)
}
def getTweetBasedSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForTweetBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
def getProducerBasedSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForProducerBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
def getConsumerBasedWalsSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
/***
* Signal funneling should not exist in CG or even in any SimilarityEngine.
* They will be in Router, or eventually, in CrCandidateGenerator.
*/
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.TweetDontLike.value, //currently no-op
SourceType.TweetReport.value, //currently no-op
SourceType.AccountMute.value, //currently no-op
SourceType.AccountBlock.value //currently no-op
)
val AllowedSourceTypesForTweetBasedUnifiedSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.OriginalTweet.value,
SourceType.Reply.value,
SourceType.TweetShare.value,
SourceType.NotificationClick.value,
SourceType.GoodTweetClick.value,
SourceType.VideoTweetQualityView.value,
SourceType.VideoTweetPlayback50.value,
SourceType.TweetAggregation.value,
)
val AllowedSourceTypesForProducerBasedUnifiedSE = Set(
SourceType.UserFollow.value,
SourceType.UserRepeatedProfileVisit.value,
SourceType.RealGraphOon.value,
SourceType.FollowRecommendation.value,
SourceType.UserTrafficAttributionProfileVisit.value,
SourceType.GoodProfileClick.value,
SourceType.ProducerAggregation.value,
)
}

View File

@ -0,0 +1,350 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.blender.SwitchBlender
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.filter.PostRankFilterRunner
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.param.RankerParams
import com.twitter.cr_mixer.param.RecentNegativeSignalParams
import com.twitter.cr_mixer.ranker.SwitchRanker
import com.twitter.cr_mixer.source_signal.SourceInfoRouter
import com.twitter.cr_mixer.source_signal.UssStore.EnabledNegativeSourceTypes
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
import javax.inject.Inject
import javax.inject.Singleton
/**
* For now it performs the main steps as follows:
* 1. Source signal (via USS, FRS) fetch
* 2. Candidate generation
* 3. Filtering
* 4. Interleave blender
* 5. Ranker
* 6. Post-ranker filter
* 7. Truncation
*/
@Singleton
class CrCandidateGenerator @Inject() (
sourceInfoRouter: SourceInfoRouter,
candidateSourceRouter: CandidateSourcesRouter,
switchBlender: SwitchBlender,
preRankFilterRunner: PreRankFilterRunner,
postRankFilterRunner: PostRankFilterRunner,
switchRanker: SwitchRanker,
crMixerScribeLogger: CrMixerScribeLogger,
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
private val timer: Timer = new JavaTimer(true)
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSourcesStats = stats.scope("fetchSources")
private val fetchPositiveSourcesStats = stats.scope("fetchPositiveSources")
private val fetchNegativeSourcesStats = stats.scope("fetchNegativeSources")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val fetchCandidatesAfterFilterStats = stats.scope("fetchCandidatesAfterFilter")
private val preRankFilterStats = stats.scope("preRankFilter")
private val interleaveStats = stats.scope("interleave")
private val rankStats = stats.scope("rank")
private val postRankFilterStats = stats.scope("postRankFilter")
private val blueVerifiedTweetStats = stats.scope("blueVerifiedTweetStats")
private val blueVerifiedTweetStatsPerSimilarityEngine =
stats.scope("blueVerifiedTweetStatsPerSimilarityEngine")
def get(query: CrCandidateGeneratorQuery): Future[Seq[RankedCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
val perProductBlueVerifiedStats =
blueVerifiedTweetStats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
trackResultStats(perProductStats) {
StatsUtil.trackItemsStats(perProductStats) {
val result = for {
(sourceSignals, sourceGraphsMap) <- StatsUtil.trackBlockStats(fetchSourcesStats) {
fetchSources(query)
}
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) {
// find the positive and negative signals
val (positiveSignals, negativeSignals) = sourceSignals.partition { signal =>
!EnabledNegativeSourceTypes.contains(signal.sourceType)
}
fetchPositiveSourcesStats.stat("size").add(positiveSignals.size)
fetchNegativeSourcesStats.stat("size").add(negativeSignals.size)
// find the positive signals to keep, removing block and muted users
val filteredSourceInfo =
if (negativeSignals.nonEmpty && query.params(
RecentNegativeSignalParams.EnableSourceParam)) {
filterSourceInfo(positiveSignals, negativeSignals)
} else {
positiveSignals
}
// fetch candidates from the positive signals
StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, filteredSourceInfo, sourceGraphsMap)
}
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
interleavedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
interleave(query, filteredCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
val candidatesToRank =
interleavedCandidates.take(query.params(RankerParams.MaxCandidatesToRank))
rank(query, candidatesToRank)
}
postRankFilterCandidates <- StatsUtil.trackItemsStats(postRankFilterStats) {
postRankFilter(query, rankedCandidates)
}
} yield {
trackTopKStats(
800,
postRankFilterCandidates,
isQueryK = false,
perProductBlueVerifiedStats)
trackTopKStats(
400,
postRankFilterCandidates,
isQueryK = false,
perProductBlueVerifiedStats)
trackTopKStats(
query.maxNumResults,
postRankFilterCandidates,
isQueryK = true,
perProductBlueVerifiedStats)
val (blueVerifiedTweets, remainingTweets) =
postRankFilterCandidates.partition(
_.tweetInfo.hasBlueVerifiedAnnotation.contains(true))
val topKBlueVerified = blueVerifiedTweets.take(query.maxNumResults)
val topKRemaining = remainingTweets.take(query.maxNumResults - topKBlueVerified.size)
trackBlueVerifiedTweetStats(topKBlueVerified, perProductBlueVerifiedStats)
if (topKBlueVerified.nonEmpty && query.params(RankerParams.EnableBlueVerifiedTopK)) {
topKBlueVerified ++ topKRemaining
} else {
postRankFilterCandidates
}
}
result.raiseWithin(timeoutConfig.serviceTimeout)(timer)
}
}
}
}
private def fetchSources(
query: CrCandidateGeneratorQuery
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
crMixerScribeLogger.scribeSignalSources(
query,
sourceInfoRouter
.get(query.userId, query.product, query.userState, query.params))
}
private def filterSourceInfo(
positiveSignals: Set[SourceInfo],
negativeSignals: Set[SourceInfo]
): Set[SourceInfo] = {
val filterUsers: Set[Long] = negativeSignals.flatMap {
case SourceInfo(_, InternalId.UserId(userId), _) => Some(userId)
case _ => None
}
positiveSignals.filter {
case SourceInfo(_, InternalId.UserId(userId), _) => !filterUsers.contains(userId)
case _ => true
}
}
def fetchCandidates(
query: CrCandidateGeneratorQuery,
sourceSignals: Set[SourceInfo],
sourceGraphs: Map[String, Option[GraphSourceInfo]]
): Future[Seq[Seq[InitialCandidate]]] = {
val initialCandidates = candidateSourceRouter
.fetchCandidates(
query.userId,
sourceSignals,
sourceGraphs,
query.params
)
initialCandidates.map(_.flatten.map { candidate =>
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
blueVerifiedTweetStatsPerSimilarityEngine
.scope(query.product.toString).scope(
candidate.candidateGenerationInfo.contributingSimilarityEngines.head.similarityEngineType.toString).counter(
candidate.tweetInfo.authorId.toString).incr()
}
})
crMixerScribeLogger.scribeInitialCandidates(
query,
initialCandidates
)
}
private def preRankFilter(
query: CrCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
crMixerScribeLogger.scribePreRankFilterCandidates(
query,
preRankFilterRunner
.runSequentialFilters(query, candidates))
}
private def postRankFilter(
query: CrCandidateGeneratorQuery,
candidates: Seq[RankedCandidate]
): Future[Seq[RankedCandidate]] = {
postRankFilterRunner.run(query, candidates)
}
private def interleave(
query: CrCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[BlendedCandidate]] = {
crMixerScribeLogger.scribeInterleaveCandidates(
query,
switchBlender
.blend(query.params, query.userState, candidates))
}
private def rank(
query: CrCandidateGeneratorQuery,
candidates: Seq[BlendedCandidate],
): Future[Seq[RankedCandidate]] = {
crMixerScribeLogger.scribeRankedCandidates(
query,
switchRanker.rank(query, candidates)
)
}
private def trackResultStats(
stats: StatsReceiver
)(
fn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
fn.onSuccess { candidates =>
trackReasonChosenSourceTypeStats(candidates, stats)
trackReasonChosenSimilarityEngineStats(candidates, stats)
trackPotentialReasonsSourceTypeStats(candidates, stats)
trackPotentialReasonsSimilarityEngineStats(candidates, stats)
}
}
private def trackReasonChosenSourceTypeStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.groupBy(_.reasonChosen.sourceInfoOpt.map(_.sourceType))
.foreach {
case (sourceTypeOpt, rankedCands) =>
val sourceType = sourceTypeOpt.map(_.toString).getOrElse("RequesterId") // default
stats.stat("reasonChosen", "sourceType", sourceType, "size").add(rankedCands.size)
}
}
private def trackReasonChosenSimilarityEngineStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.groupBy(_.reasonChosen.similarityEngineInfo.similarityEngineType)
.foreach {
case (seInfoType, rankedCands) =>
stats
.stat("reasonChosen", "similarityEngine", seInfoType.toString, "size").add(
rankedCands.size)
}
}
private def trackPotentialReasonsSourceTypeStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.flatMap(_.potentialReasons.map(_.sourceInfoOpt.map(_.sourceType)))
.groupBy(source => source)
.foreach {
case (sourceInfoOpt, seq) =>
val sourceType = sourceInfoOpt.map(_.toString).getOrElse("RequesterId") // default
stats.stat("potentialReasons", "sourceType", sourceType, "size").add(seq.size)
}
}
private def trackPotentialReasonsSimilarityEngineStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.flatMap(_.potentialReasons.map(_.similarityEngineInfo.similarityEngineType))
.groupBy(se => se)
.foreach {
case (seType, seq) =>
stats.stat("potentialReasons", "similarityEngine", seType.toString, "size").add(seq.size)
}
}
private def trackBlueVerifiedTweetStats(
candidates: Seq[RankedCandidate],
statsReceiver: StatsReceiver
): Unit = {
candidates.foreach { candidate =>
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
statsReceiver.counter(candidate.tweetInfo.authorId.toString).incr()
statsReceiver
.scope(candidate.tweetInfo.authorId.toString).counter(candidate.tweetId.toString).incr()
}
}
}
private def trackTopKStats(
k: Int,
tweetCandidates: Seq[RankedCandidate],
isQueryK: Boolean,
statsReceiver: StatsReceiver
): Unit = {
val (topK, beyondK) = tweetCandidates.splitAt(k)
val blueVerifiedIds = tweetCandidates.collect {
case candidate if candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true) =>
candidate.tweetInfo.authorId
}.toSet
blueVerifiedIds.foreach { blueVerifiedId =>
val numTweetsTopK = topK.count(_.tweetInfo.authorId == blueVerifiedId)
val numTweetsBeyondK = beyondK.count(_.tweetInfo.authorId == blueVerifiedId)
if (isQueryK) {
statsReceiver.scope(blueVerifiedId.toString).stat(s"topK").add(numTweetsTopK)
statsReceiver
.scope(blueVerifiedId.toString).stat(s"beyondK").add(numTweetsBeyondK)
} else {
statsReceiver.scope(blueVerifiedId.toString).stat(s"top$k").add(numTweetsTopK)
statsReceiver
.scope(blueVerifiedId.toString).stat(s"beyond$k").add(numTweetsBeyondK)
}
}
}
}

View File

@ -0,0 +1,345 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.candidate_generation.CustomizedRetrievalCandidateGeneration.Query
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedCandidateGenerationParams._
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedTwhinParams._
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.LookupEngineQuery
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.base.CandidateSource
import com.twitter.frigate.common.base.Stats
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.timelines.configapi
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
import scala.collection.mutable.ArrayBuffer
/**
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources
*
* Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different
* similarity engines without blending. In other words, this class shall not be thought of as a
* Unified Similarity Engine. It is a CG that calls multiple singular Similarity Engines.
*/
@Singleton
case class CustomizedRetrievalCandidateGeneration @Inject() (
@Named(ModuleNames.TwhinCollabFilterSimilarityEngine)
twhinCollabFilterSimilarityEngine: LookupSimilarityEngine[
TwhinCollabFilterSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.DiffusionBasedSimilarityEngine)
diffusionBasedSimilarityEngine: LookupSimilarityEngine[
DiffusionBasedSimilarityEngine.Query,
TweetWithScore
],
statsReceiver: StatsReceiver)
extends CandidateSource[
Query,
Seq[TweetWithCandidateGenerationInfo]
] {
override def name: String = this.getClass.getSimpleName
private val stats = statsReceiver.scope(name)
private val fetchCandidatesStat = stats.scope("fetchCandidates")
/**
* For each Similarity Engine Model, return a list of tweet candidates
*/
override def get(
query: Query
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
query.internalId match {
case InternalId.UserId(_) =>
Stats.trackOption(fetchCandidatesStat) {
val twhinCollabFilterForFollowCandidatesFut = if (query.enableTwhinCollabFilter) {
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinCollabFilterFollowQuery)
} else Future.None
val twhinCollabFilterForEngagementCandidatesFut =
if (query.enableTwhinCollabFilter) {
twhinCollabFilterSimilarityEngine.getCandidates(
query.twhinCollabFilterEngagementQuery)
} else Future.None
val twhinMultiClusterForFollowCandidatesFut = if (query.enableTwhinMultiCluster) {
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinMultiClusterFollowQuery)
} else Future.None
val twhinMultiClusterForEngagementCandidatesFut =
if (query.enableTwhinMultiCluster) {
twhinCollabFilterSimilarityEngine.getCandidates(
query.twhinMultiClusterEngagementQuery)
} else Future.None
val diffusionBasedSimilarityEngineCandidatesFut = if (query.enableRetweetBasedDiffusion) {
diffusionBasedSimilarityEngine.getCandidates(query.diffusionBasedSimilarityEngineQuery)
} else Future.None
Future
.join(
twhinCollabFilterForFollowCandidatesFut,
twhinCollabFilterForEngagementCandidatesFut,
twhinMultiClusterForFollowCandidatesFut,
twhinMultiClusterForEngagementCandidatesFut,
diffusionBasedSimilarityEngineCandidatesFut
).map {
case (
twhinCollabFilterForFollowCandidates,
twhinCollabFilterForEngagementCandidates,
twhinMultiClusterForFollowCandidates,
twhinMultiClusterForEngagementCandidates,
diffusionBasedSimilarityEngineCandidates) =>
val maxCandidateNumPerSourceKey = 200
val twhinCollabFilterForFollowWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinCollabFilterForFollowCandidates,
maxCandidateNumPerSourceKey,
query.twhinCollabFilterFollowQuery,
)
val twhinCollabFilterForEngagementWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinCollabFilterForEngagementCandidates,
maxCandidateNumPerSourceKey,
query.twhinCollabFilterEngagementQuery,
)
val twhinMultiClusterForFollowWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinMultiClusterForFollowCandidates,
maxCandidateNumPerSourceKey,
query.twhinMultiClusterFollowQuery,
)
val twhinMultiClusterForEngagementWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinMultiClusterForEngagementCandidates,
maxCandidateNumPerSourceKey,
query.twhinMultiClusterEngagementQuery,
)
val retweetBasedDiffusionWithCGInfo =
getDiffusionBasedCandidatesWithCGInfo(
diffusionBasedSimilarityEngineCandidates,
maxCandidateNumPerSourceKey,
query.diffusionBasedSimilarityEngineQuery,
)
val twhinCollabCandidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
twhinCollabFilterForFollowWithCGInfo,
twhinCollabFilterForEngagementWithCGInfo,
)
val twhinMultiClusterCandidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
twhinMultiClusterForFollowWithCGInfo,
twhinMultiClusterForEngagementWithCGInfo,
)
val interleavedTwhinCollabCandidates =
InterleaveUtil.interleave(twhinCollabCandidateSourcesToBeInterleaved)
val interleavedTwhinMultiClusterCandidates =
InterleaveUtil.interleave(twhinMultiClusterCandidateSourcesToBeInterleaved)
val twhinCollabFilterResults =
if (interleavedTwhinCollabCandidates.nonEmpty) {
Some(interleavedTwhinCollabCandidates.take(maxCandidateNumPerSourceKey))
} else None
val twhinMultiClusterResults =
if (interleavedTwhinMultiClusterCandidates.nonEmpty) {
Some(interleavedTwhinMultiClusterCandidates.take(maxCandidateNumPerSourceKey))
} else None
val diffusionResults =
if (retweetBasedDiffusionWithCGInfo.nonEmpty) {
Some(retweetBasedDiffusionWithCGInfo.take(maxCandidateNumPerSourceKey))
} else None
Some(
Seq(
twhinCollabFilterResults,
twhinMultiClusterResults,
diffusionResults
).flatten)
}
}
case _ =>
throw new IllegalArgumentException("sourceId_is_not_userId_cnt")
}
}
/** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */
private def tweetAgeFilter(
candidates: Seq[TweetWithScore],
maxTweetAgeHours: Duration
): Seq[TweetWithScore] = {
// Tweet IDs are approximately chronological (see http://go/snowflake),
// so we are building the earliest tweet id once
// The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper.
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours)
candidates.filter { candidate => candidate.tweetId >= earliestTweetId }
}
/**
* AgeFilters tweetCandidates with stats
* Only age filter logic is effective here (through tweetAgeFilter). This function acts mostly for metric logging.
*/
private def ageFilterWithStats(
offlineInterestedInCandidates: Seq[TweetWithScore],
maxTweetAgeHours: Duration,
scopedStatsReceiver: StatsReceiver
): Seq[TweetWithScore] = {
scopedStatsReceiver.stat("size").add(offlineInterestedInCandidates.size)
val candidates = offlineInterestedInCandidates.map { candidate =>
TweetWithScore(candidate.tweetId, candidate.score)
}
val filteredCandidates = tweetAgeFilter(candidates, maxTweetAgeHours)
scopedStatsReceiver.stat(f"filtered_size").add(filteredCandidates.size)
if (filteredCandidates.isEmpty) scopedStatsReceiver.counter(f"empty").incr()
filteredCandidates
}
private def getTwhinCollabCandidatesWithCGInfo(
tweetCandidates: Option[Seq[TweetWithScore]],
maxCandidateNumPerSourceKey: Int,
twhinCollabFilterQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
): Seq[TweetWithCandidateGenerationInfo] = {
val twhinTweets = tweetCandidates match {
case Some(tweetsWithScores) =>
tweetsWithScores.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
TwhinCollabFilterSimilarityEngine
.toSimilarityEngineInfo(twhinCollabFilterQuery, tweetWithScore.score),
Seq.empty
)
)
}
case _ => Seq.empty
}
twhinTweets.take(maxCandidateNumPerSourceKey)
}
private def getDiffusionBasedCandidatesWithCGInfo(
tweetCandidates: Option[Seq[TweetWithScore]],
maxCandidateNumPerSourceKey: Int,
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
DiffusionBasedSimilarityEngine.Query
],
): Seq[TweetWithCandidateGenerationInfo] = {
val diffusionTweets = tweetCandidates match {
case Some(tweetsWithScores) =>
tweetsWithScores.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
DiffusionBasedSimilarityEngine
.toSimilarityEngineInfo(diffusionBasedSimilarityEngineQuery, tweetWithScore.score),
Seq.empty
)
)
}
case _ => Seq.empty
}
diffusionTweets.take(maxCandidateNumPerSourceKey)
}
}
object CustomizedRetrievalCandidateGeneration {
case class Query(
internalId: InternalId,
maxCandidateNumPerSourceKey: Int,
maxTweetAgeHours: Duration,
// twhinCollabFilter
enableTwhinCollabFilter: Boolean,
twhinCollabFilterFollowQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
twhinCollabFilterEngagementQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
// twhinMultiCluster
enableTwhinMultiCluster: Boolean,
twhinMultiClusterFollowQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
twhinMultiClusterEngagementQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
enableRetweetBasedDiffusion: Boolean,
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
DiffusionBasedSimilarityEngine.Query
],
)
def fromParams(
internalId: InternalId,
params: configapi.Params
): Query = {
val twhinCollabFilterFollowQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinCollabFilterFollowSource),
params)
val twhinCollabFilterEngagementQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinCollabFilterEngagementSource),
params)
val twhinMultiClusterFollowQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinMultiClusterFollowSource),
params)
val twhinMultiClusterEngagementQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinMultiClusterEngagementSource),
params)
val diffusionBasedSimilarityEngineQuery =
DiffusionBasedSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedRetweetDiffusionSource),
params)
Query(
internalId = internalId,
maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam),
maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam),
// twhinCollabFilter
enableTwhinCollabFilter = params(EnableTwhinCollabFilterClusterParam),
twhinCollabFilterFollowQuery = twhinCollabFilterFollowQuery,
twhinCollabFilterEngagementQuery = twhinCollabFilterEngagementQuery,
enableTwhinMultiCluster = params(EnableTwhinMultiClusterParam),
twhinMultiClusterFollowQuery = twhinMultiClusterFollowQuery,
twhinMultiClusterEngagementQuery = twhinMultiClusterEngagementQuery,
enableRetweetBasedDiffusion = params(EnableRetweetBasedDiffusionParam),
diffusionBasedSimilarityEngineQuery = diffusionBasedSimilarityEngineQuery
)
}
}

View File

@ -0,0 +1,220 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithAuthor
import com.twitter.cr_mixer.param.FrsParams
import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineRouter
import com.twitter.cr_mixer.source_signal.FrsStore
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
import com.twitter.cr_mixer.thriftscala.FrsTweet
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.hermit.constants.AlgorithmFeedbackTokens
import com.twitter.hermit.constants.AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap
import com.twitter.hermit.model.Algorithm
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/**
* TweetCandidateGenerator based on FRS seed users. For now this candidate generator fetches seed
* users from FRS, and retrieves the seed users' past tweets from Earlybird with Earlybird light
* ranking models.
*/
@Singleton
class FrsTweetCandidateGenerator @Inject() (
@Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]],
frsBasedSimilarityEngine: EarlybirdSimilarityEngineRouter,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
import FrsTweetCandidateGenerator._
private val timer = DefaultTimer
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSeedsStats = stats.scope("fetchSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val filterCandidatesStats = stats.scope("filterCandidates")
private val hydrateCandidatesStats = stats.scope("hydrateCandidates")
private val getCandidatesStats = stats.scope("getCandidates")
/**
* The function retrieves the candidate for the given user as follows:
* 1. Seed user fetch from FRS.
* 2. Candidate fetch from Earlybird.
* 3. Filtering.
* 4. Candidate hydration.
* 5. Truncation.
*/
def get(
frsTweetCandidateGeneratorQuery: FrsTweetCandidateGeneratorQuery
): Future[Seq[FrsTweet]] = {
val userId = frsTweetCandidateGeneratorQuery.userId
val product = frsTweetCandidateGeneratorQuery.product
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", product.name)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
val result = for {
seedAuthorWithScores <- StatsUtil.trackOptionItemMapStats(fetchSeedsStats) {
fetchSeeds(
userId,
frsTweetCandidateGeneratorQuery.impressedUserList,
frsTweetCandidateGeneratorQuery.languageCodeOpt,
frsTweetCandidateGeneratorQuery.countryCodeOpt,
frsTweetCandidateGeneratorQuery.params,
)
}
tweetCandidates <- StatsUtil.trackOptionItemsStats(fetchCandidatesStats) {
fetchCandidates(
userId,
seedAuthorWithScores.map(_.keys.toSeq).getOrElse(Seq.empty),
frsTweetCandidateGeneratorQuery.impressedTweetList,
seedAuthorWithScores.map(_.mapValues(_.score)).getOrElse(Map.empty),
frsTweetCandidateGeneratorQuery.params
)
}
filteredTweetCandidates <- StatsUtil.trackOptionItemsStats(filterCandidatesStats) {
filterCandidates(
tweetCandidates,
frsTweetCandidateGeneratorQuery.params
)
}
hydratedTweetCandidates <- StatsUtil.trackOptionItemsStats(hydrateCandidatesStats) {
hydrateCandidates(
seedAuthorWithScores,
filteredTweetCandidates
)
}
} yield {
hydratedTweetCandidates
.map(_.take(frsTweetCandidateGeneratorQuery.maxNumResults)).getOrElse(Seq.empty)
}
result.raiseWithin(timeoutConfig.frsBasedTweetEndpointTimeout)(timer)
}
}
}
/**
* Fetch recommended seed users from FRS
*/
private def fetchSeeds(
userId: UserId,
userDenyList: Set[UserId],
languageCodeOpt: Option[String],
countryCodeOpt: Option[String],
params: Params
): Future[Option[Map[UserId, FrsQueryResult]]] = {
frsStore
.get(
FrsStore.Query(
userId,
params(FrsParams.FrsBasedCandidateGenerationMaxSeedsNumParam),
params(FrsParams.FrsBasedCandidateGenerationDisplayLocationParam).displayLocation,
userDenyList.toSeq,
languageCodeOpt,
countryCodeOpt
)).map {
_.map { seedAuthors =>
seedAuthors.map(user => user.userId -> user).toMap
}
}
}
/**
* Fetch tweet candidates from Earlybird
*/
private def fetchCandidates(
searcherUserId: UserId,
seedAuthors: Seq[UserId],
impressedTweetList: Set[TweetId],
frsUserToScores: Map[UserId, Double],
params: Params
): Future[Option[Seq[TweetWithAuthor]]] = {
if (seedAuthors.nonEmpty) {
// call earlybird
val query = EarlybirdSimilarityEngineRouter.queryFromParams(
Some(searcherUserId),
seedAuthors,
impressedTweetList,
frsUserToScoresForScoreAdjustment = Some(frsUserToScores),
params
)
frsBasedSimilarityEngine.get(query)
} else Future.None
}
/**
* Filter candidates that do not pass visibility filter policy
*/
private def filterCandidates(
candidates: Option[Seq[TweetWithAuthor]],
params: Params
): Future[Option[Seq[TweetWithAuthor]]] = {
val tweetIds = candidates.map(_.map(_.tweetId).toSet).getOrElse(Set.empty)
if (params(FrsParams.FrsBasedCandidateGenerationEnableVisibilityFilteringParam))
Future
.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
candidates.map {
// If tweetInfo does not exist, we will filter out this tweet candidate.
_.filter(candidate => tweetInfos.getOrElse(candidate.tweetId, None).isDefined)
}
}
else {
Future.value(candidates)
}
}
/**
* Hydrate the candidates with the FRS candidate sources and scores
*/
private def hydrateCandidates(
frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]],
candidates: Option[Seq[TweetWithAuthor]]
): Future[Option[Seq[FrsTweet]]] = {
Future.value {
candidates.map {
_.map { tweetWithAuthor =>
val frsQueryResult = frsAuthorWithScores.flatMap(_.get(tweetWithAuthor.authorId))
FrsTweet(
tweetId = tweetWithAuthor.tweetId,
authorId = tweetWithAuthor.authorId,
frsPrimarySource = frsQueryResult.flatMap(_.primarySource),
frsAuthorScore = frsQueryResult.map(_.score),
frsCandidateSourceScores = frsQueryResult.flatMap { result =>
result.sourceWithScores.map {
_.collect {
// see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
// see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
case (candidateSourceAlgoStr, score)
if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains(
candidateSourceAlgoStr) =>
AlgorithmToFeedbackTokenMap.getOrElse(
AlgorithmFeedbackTokens.TokenStrToAlgorithmMap
.getOrElse(candidateSourceAlgoStr, DefaultAlgo),
DefaultAlgoToken) -> score
}
}
}
)
}
}
}
}
}
object FrsTweetCandidateGenerator {
val DefaultAlgo: Algorithm.Value = Algorithm.Other
// 9999 is the token for Algorithm.Other
val DefaultAlgoToken: Int = AlgorithmToFeedbackTokenMap.getOrElse(DefaultAlgo, 9999)
}

View File

@ -0,0 +1,156 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class RelatedTweetCandidateGenerator @Inject() (
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
preRankFilterRunner: PreRankFilterRunner,
relatedTweetScribeLogger: RelatedTweetScribeLogger,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val preRankFilterStats = stats.scope("preRankFilter")
def get(
query: RelatedTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query)
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
} yield {
filteredCandidates.headOption
.getOrElse(
throw new UnsupportedOperationException(
"RelatedTweetCandidateGenerator results invalid")
).take(query.maxNumResults)
}
}
}
}
def fetchCandidates(
query: RelatedTweetCandidateGeneratorQuery
): Future[Seq[Seq[InitialCandidate]]] = {
relatedTweetScribeLogger.scribeInitialCandidates(
query,
query.internalId match {
case InternalId.TweetId(_) =>
getCandidatesFromSimilarityEngine(
query,
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
tweetBasedUnifiedSimilarityEngine.getCandidates)
case InternalId.UserId(_) =>
getCandidatesFromSimilarityEngine(
query,
ProducerBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
producerBasedUnifiedSimilarityEngine.getCandidates)
case _ =>
throw new UnsupportedOperationException(
"RelatedTweetCandidateGenerator gets invalid InternalId")
}
)
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
private def getCandidatesFromSimilarityEngine[QueryType](
query: RelatedTweetCandidateGeneratorQuery,
fromParamsForRelatedTweet: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
*/
val engineQueries =
Seq(fromParamsForRelatedTweet(query.internalId, query.params))
Future
.collect {
engineQueries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(
candidates.toSeq.flatten
)
} yield prefilterCandidates
}
}
}
private def preRankFilter(
query: RelatedTweetCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
relatedTweetScribeLogger.scribePreRankFilterCandidates(
query,
preRankFilterRunner
.runSequentialFilters(query, candidates))
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}

View File

@ -0,0 +1,139 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class RelatedVideoTweetCandidateGenerator @Inject() (
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
preRankFilterRunner: PreRankFilterRunner,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val preRankFilterStats = stats.scope("preRankFilter")
def get(
query: RelatedVideoTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query)
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
} yield {
filteredCandidates.headOption
.getOrElse(
throw new UnsupportedOperationException(
"RelatedVideoTweetCandidateGenerator results invalid")
).take(query.maxNumResults)
}
}
}
}
def fetchCandidates(
query: RelatedVideoTweetCandidateGeneratorQuery
): Future[Seq[Seq[InitialCandidate]]] = {
query.internalId match {
case InternalId.TweetId(_) =>
getCandidatesFromSimilarityEngine(
query,
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedVideoTweet,
tweetBasedUnifiedSimilarityEngine.getCandidates)
case _ =>
throw new UnsupportedOperationException(
"RelatedVideoTweetCandidateGenerator gets invalid InternalId")
}
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
private def getCandidatesFromSimilarityEngine[QueryType](
query: RelatedVideoTweetCandidateGeneratorQuery,
fromParamsForRelatedVideoTweet: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
*/
val engineQueries =
Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params))
Future
.collect {
engineQueries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(
candidates.toSeq.flatten
)
} yield prefilterCandidates
}
}
}
private def preRankFilter(
query: RelatedVideoTweetCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
preRankFilterRunner
.runSequentialFilters(query, candidates)
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}

View File

@ -0,0 +1,640 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.InterestedInParams
import com.twitter.cr_mixer.param.SimClustersANNParams
import com.twitter.cr_mixer.similarity_engine.EngineQuery
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.base.CandidateSource
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
/**
* This store looks for similar tweets for a given UserId that generates UserInterestedIn
* from SimClustersANN. It will be a standalone CandidateGeneration class moving forward.
*
* After the abstraction improvement (apply SimilarityEngine trait)
* these CG will be subjected to change.
*/
@Singleton
case class SimClustersInterestedInCandidateGeneration @Inject() (
@Named(ModuleNames.SimClustersANNSimilarityEngine)
simClustersANNSimilarityEngine: StandardSimilarityEngine[
SimClustersANNSimilarityEngine.Query,
TweetWithScore
],
statsReceiver: StatsReceiver)
extends CandidateSource[
SimClustersInterestedInCandidateGeneration.Query,
Seq[TweetWithCandidateGenerationInfo]
] {
override def name: String = this.getClass.getSimpleName
private val stats = statsReceiver.scope(name)
private val fetchCandidatesStat = stats.scope("fetchCandidates")
override def get(
query: SimClustersInterestedInCandidateGeneration.Query
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
query.internalId match {
case _: InternalId.UserId =>
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
// UserInterestedIn Queries
val userInterestedInCandidateResultFut =
if (query.enableUserInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInExperimentalSANNCandidateResultFut =
if (query.enableUserInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN1CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN2CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN3CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN5CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN4CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
// UserNextInterestedIn Queries
val userNextInterestedInCandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInExperimentalSANNCandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN1CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN2CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN3CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN5CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN4CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
// AddressBookInterestedIn Queries
val userAddressBookInterestedInCandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookExperimentalSANNCandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN1CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN2CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN3CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN5CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN4CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
Future
.collect(
Seq(
userInterestedInCandidateResultFut,
userNextInterestedInCandidateResultFut,
userAddressBookInterestedInCandidateResultFut,
userInterestedInExperimentalSANNCandidateResultFut,
userNextInterestedInExperimentalSANNCandidateResultFut,
userAddressBookExperimentalSANNCandidateResultFut,
userInterestedInSANN1CandidateResultFut,
userNextInterestedInSANN1CandidateResultFut,
userAddressBookSANN1CandidateResultFut,
userInterestedInSANN2CandidateResultFut,
userNextInterestedInSANN2CandidateResultFut,
userAddressBookSANN2CandidateResultFut,
userInterestedInSANN3CandidateResultFut,
userNextInterestedInSANN3CandidateResultFut,
userAddressBookSANN3CandidateResultFut,
userInterestedInSANN5CandidateResultFut,
userNextInterestedInSANN5CandidateResultFut,
userAddressBookSANN5CandidateResultFut,
userInterestedInSANN4CandidateResultFut,
userNextInterestedInSANN4CandidateResultFut,
userAddressBookSANN4CandidateResultFut
)
).map { candidateResults =>
Some(
candidateResults.map(candidateResult => candidateResult.getOrElse(Seq.empty))
)
}
}
case _ =>
stats.counter("sourceId_is_not_userId_cnt").incr()
Future.None
}
}
private def simClustersCandidateMinScoreFilter(
simClustersAnnCandidates: Seq[TweetWithScore],
simClustersInterestedInMinScore: Double,
simClustersANNConfigId: String
): Seq[TweetWithScore] = {
val filteredCandidates = simClustersAnnCandidates
.filter { candidate =>
candidate.score > simClustersInterestedInMinScore
}
stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size)
stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr()
if (filteredCandidates.isEmpty)
stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr()
filteredCandidates.map { candidate =>
TweetWithScore(candidate.tweetId, candidate.score)
}
}
private def getInterestedInCandidateResult(
simClustersANNSimilarityEngine: StandardSimilarityEngine[
SimClustersANNSimilarityEngine.Query,
TweetWithScore
],
simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
simClustersInterestedInMinScore: Double,
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
val interestedInCandidatesFut =
simClustersANNSimilarityEngine.getCandidates(simClustersANNQuery)
val interestedInCandidateResultFut = interestedInCandidatesFut.map { interestedInCandidates =>
stats.stat("candidateSize").add(interestedInCandidates.size)
val embeddingCandidatesStat = stats.scope(
simClustersANNQuery.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.name)
embeddingCandidatesStat.stat("candidateSize").add(interestedInCandidates.size)
if (interestedInCandidates.isEmpty) {
embeddingCandidatesStat.counter("empty_results").incr()
}
embeddingCandidatesStat.counter("requests").incr()
val filteredTweets = simClustersCandidateMinScoreFilter(
interestedInCandidates.toSeq.flatten,
simClustersInterestedInMinScore,
simClustersANNQuery.storeQuery.simClustersANNConfigId)
val interestedInTweetsWithCGInfo = filteredTweets.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(simClustersANNQuery, tweetWithScore.score),
Seq.empty // SANN is an atomic SE, and hence it has no contributing SEs
)
)
}
val interestedInResults = if (interestedInTweetsWithCGInfo.nonEmpty) {
Some(interestedInTweetsWithCGInfo)
} else None
interestedInResults
}
interestedInCandidateResultFut
}
}
object SimClustersInterestedInCandidateGeneration {
case class Query(
internalId: InternalId,
enableUserInterestedIn: Boolean,
enableUserNextInterestedIn: Boolean,
enableAddressBookNextInterestedIn: Boolean,
enableProdSimClustersANNSimilarityEngine: Boolean,
enableExperimentalSimClustersANNSimilarityEngine: Boolean,
enableSimClustersANN1SimilarityEngine: Boolean,
enableSimClustersANN2SimilarityEngine: Boolean,
enableSimClustersANN3SimilarityEngine: Boolean,
enableSimClustersANN5SimilarityEngine: Boolean,
enableSimClustersANN4SimilarityEngine: Boolean,
simClustersInterestedInMinScore: Double,
simClustersNextInterestedInMinScore: Double,
simClustersAddressBookInterestedInMinScore: Double,
interestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInExperimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInExperimentalSimClustersANNQuery: EngineQuery[
SimClustersANNSimilarityEngine.Query
],
addressbookInterestedInExperimentalSimClustersANNQuery: EngineQuery[
SimClustersANNSimilarityEngine.Query
],
interestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
)
def fromParams(
internalId: InternalId,
params: configapi.Params,
): Query = {
// SimClusters common configs
val simClustersModelVersion =
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId)
val experimentalSimClustersANNConfigId = params(
SimClustersANNParams.ExperimentalSimClustersANNConfigId)
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId)
val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId)
val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId)
val simClustersInterestedInMinScore = params(InterestedInParams.MinScoreParam)
val simClustersNextInterestedInMinScore = params(
InterestedInParams.MinScoreSequentialModelParam)
val simClustersAddressBookInterestedInMinScore = params(
InterestedInParams.MinScoreAddressBookParam)
// InterestedIn embeddings parameters
val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam)
val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam)
val addressbookInterestedInEmbedding = params(
InterestedInParams.AddressBookInterestedInEmbeddingIdParam)
// Prod SimClustersANN Query
val interestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
val nextInterestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
val addressbookInterestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
// Experimental SANN cluster Query
val interestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
val nextInterestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
val addressbookInterestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
// SimClusters ANN cluster 1 Query
val interestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
val nextInterestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
val addressbookInterestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
// SimClusters ANN cluster 2 Query
val interestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
val nextInterestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
val addressbookInterestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
// SimClusters ANN cluster 3 Query
val interestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
val nextInterestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
val addressbookInterestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
// SimClusters ANN cluster 5 Query
val interestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
// SimClusters ANN cluster 4 Query
val interestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
val nextInterestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
val nextInterestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
val addressbookInterestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
val addressbookInterestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
Query(
internalId = internalId,
enableUserInterestedIn = params(InterestedInParams.EnableSourceParam),
enableUserNextInterestedIn = params(InterestedInParams.EnableSourceSequentialModelParam),
enableAddressBookNextInterestedIn = params(InterestedInParams.EnableSourceAddressBookParam),
enableProdSimClustersANNSimilarityEngine =
params(InterestedInParams.EnableProdSimClustersANNParam),
enableExperimentalSimClustersANNSimilarityEngine =
params(InterestedInParams.EnableExperimentalSimClustersANNParam),
enableSimClustersANN1SimilarityEngine = params(InterestedInParams.EnableSimClustersANN1Param),
enableSimClustersANN2SimilarityEngine = params(InterestedInParams.EnableSimClustersANN2Param),
enableSimClustersANN3SimilarityEngine = params(InterestedInParams.EnableSimClustersANN3Param),
enableSimClustersANN5SimilarityEngine = params(InterestedInParams.EnableSimClustersANN5Param),
enableSimClustersANN4SimilarityEngine = params(InterestedInParams.EnableSimClustersANN4Param),
simClustersInterestedInMinScore = simClustersInterestedInMinScore,
simClustersNextInterestedInMinScore = simClustersNextInterestedInMinScore,
simClustersAddressBookInterestedInMinScore = simClustersAddressBookInterestedInMinScore,
interestedInSimClustersANNQuery = interestedInSimClustersANNQuery,
nextInterestedInSimClustersANNQuery = nextInterestedInSimClustersANNQuery,
addressbookInterestedInSimClustersANNQuery = addressbookInterestedInSimClustersANNQuery,
interestedInExperimentalSimClustersANNQuery = interestedInExperimentalSimClustersANNQuery,
nextInterestedInExperimentalSimClustersANNQuery =
nextInterestedInExperimentalSimClustersANNQuery,
addressbookInterestedInExperimentalSimClustersANNQuery =
addressbookInterestedInExperimentalSimClustersANNQuery,
interestedInSimClustersANN1Query = interestedInSimClustersANN1Query,
nextInterestedInSimClustersANN1Query = nextInterestedInSimClustersANN1Query,
addressbookInterestedInSimClustersANN1Query = addressbookInterestedInSimClustersANN1Query,
interestedInSimClustersANN2Query = interestedInSimClustersANN2Query,
nextInterestedInSimClustersANN2Query = nextInterestedInSimClustersANN2Query,
addressbookInterestedInSimClustersANN2Query = addressbookInterestedInSimClustersANN2Query,
interestedInSimClustersANN3Query = interestedInSimClustersANN3Query,
nextInterestedInSimClustersANN3Query = nextInterestedInSimClustersANN3Query,
addressbookInterestedInSimClustersANN3Query = addressbookInterestedInSimClustersANN3Query,
interestedInSimClustersANN5Query = interestedInSimClustersANN5Query,
nextInterestedInSimClustersANN5Query = nextInterestedInSimClustersANN5Query,
addressbookInterestedInSimClustersANN5Query = addressbookInterestedInSimClustersANN5Query,
interestedInSimClustersANN4Query = interestedInSimClustersANN4Query,
nextInterestedInSimClustersANN4Query = nextInterestedInSimClustersANN4Query,
addressbookInterestedInSimClustersANN4Query = addressbookInterestedInSimClustersANN4Query,
)
}
}

View File

@ -0,0 +1,232 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TopicTweetWithScore
import com.twitter.cr_mixer.param.TopicTweetParams
import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.TopicTweet
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.servo.util.MemoizingStatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Singleton
/**
* Formerly CrTopic in legacy Content Recommender. This generator finds top Tweets per Topic.
*/
@Singleton
class TopicTweetCandidateGenerator @Inject() (
certoTopicTweetSimilarityEngine: CertoTopicTweetSimilarityEngine,
skitTopicTweetSimilarityEngine: SkitTopicTweetSimilarityEngine,
skitHighPrecisionTopicTweetSimilarityEngine: SkitHighPrecisionTopicTweetSimilarityEngine,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
private val timer = DefaultTimer
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val filterCandidatesStats = stats.scope("filterCandidates")
private val tweetyPieFilteredStats = filterCandidatesStats.stat("tweetypie_filtered")
private val memoizedStatsReceiver = new MemoizingStatsReceiver(stats)
def get(
query: TopicTweetCandidateGeneratorQuery
): Future[Map[Long, Seq[TopicTweet]]] = {
val maxTweetAge = query.params(TopicTweetParams.MaxTweetAge)
val product = query.product
val allStats = memoizedStatsReceiver.scope("all")
val perProductStats = memoizedStatsReceiver.scope("perProduct", product.name)
StatsUtil.trackMapValueStats(allStats) {
StatsUtil.trackMapValueStats(perProductStats) {
val result = for {
retrievedTweets <- fetchCandidates(query)
initialTweetCandidates <- convertToInitialCandidates(retrievedTweets)
filteredTweetCandidates <- filterCandidates(
initialTweetCandidates,
maxTweetAge,
query.isVideoOnly,
query.impressedTweetList)
rankedTweetCandidates = rankCandidates(filteredTweetCandidates)
hydratedTweetCandidates = hydrateCandidates(rankedTweetCandidates)
} yield {
hydratedTweetCandidates.map {
case (topicId, topicTweets) =>
val topKTweets = topicTweets.take(query.maxNumResults)
topicId -> topKTweets
}
}
result.raiseWithin(timeoutConfig.topicTweetEndpointTimeout)(timer)
}
}
}
private def fetchCandidates(
query: TopicTweetCandidateGeneratorQuery
): Future[Map[TopicId, Option[Seq[TopicTweetWithScore]]]] = {
Future.collect {
query.topicIds.map { topicId =>
topicId -> StatsUtil.trackOptionStats(fetchCandidatesStats) {
Future
.join(
certoTopicTweetSimilarityEngine.get(CertoTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params)),
skitTopicTweetSimilarityEngine
.get(SkitTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params)),
skitHighPrecisionTopicTweetSimilarityEngine
.get(SkitHighPrecisionTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params))
).map {
case (certoTopicTweets, skitTfgTopicTweets, skitHighPrecisionTopicTweets) =>
val uniqueCandidates = (certoTopicTweets.getOrElse(Nil) ++
skitTfgTopicTweets.getOrElse(Nil) ++
skitHighPrecisionTopicTweets.getOrElse(Nil))
.groupBy(_.tweetId).map {
case (_, dupCandidates) => dupCandidates.head
}.toSeq
Some(uniqueCandidates)
}
}
}.toMap
}
}
private def convertToInitialCandidates(
candidatesMap: Map[TopicId, Option[Seq[TopicTweetWithScore]]]
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
val initialCandidates = candidatesMap.map {
case (topicId, candidatesOpt) =>
val candidates = candidatesOpt.getOrElse(Nil)
val tweetIds = candidates.map(_.tweetId).toSet
val numTweetsPreFilter = tweetIds.size
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
val tweetyPieFilteredInitialCandidates = candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
similarityEngineType = candidate.similarityEngineType,
modelId = None,
score = Some(candidate.score)),
Seq.empty
)
)
}
val numTweetsPostFilter = tweetyPieFilteredInitialCandidates.size
tweetyPieFilteredStats.add(numTweetsPreFilter - numTweetsPostFilter)
topicId -> tweetyPieFilteredInitialCandidates
}
}
Future.collect(initialCandidates.toSeq).map(_.toMap)
}
private def filterCandidates(
topicTweetMap: Map[TopicId, Seq[InitialCandidate]],
maxTweetAge: Duration,
isVideoOnly: Boolean,
excludeTweetIds: Set[TweetId]
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
val filteredResults = topicTweetMap.map {
case (topicId, tweetsWithScore) =>
topicId -> StatsUtil.trackItemsStats(filterCandidatesStats) {
val timeFilteredTweets =
tweetsWithScore.filter { tweetWithScore =>
tweetWithScore.tweetId >= earliestTweetId && !excludeTweetIds.contains(
tweetWithScore.tweetId)
}
filterCandidatesStats
.stat("exclude_and_time_filtered").add(tweetsWithScore.size - timeFilteredTweets.size)
val tweetNudityFilteredTweets =
timeFilteredTweets.collect {
case tweet if tweet.tweetInfo.isPassTweetMediaNudityTag.contains(true) => tweet
}
filterCandidatesStats
.stat("tweet_nudity_filtered").add(
timeFilteredTweets.size - tweetNudityFilteredTweets.size)
val userNudityFilteredTweets =
tweetNudityFilteredTweets.collect {
case tweet if tweet.tweetInfo.isPassUserNudityRateStrict.contains(true) => tweet
}
filterCandidatesStats
.stat("user_nudity_filtered").add(
tweetNudityFilteredTweets.size - userNudityFilteredTweets.size)
val videoFilteredTweets = {
if (isVideoOnly) {
userNudityFilteredTweets.collect {
case tweet if tweet.tweetInfo.hasVideo.contains(true) => tweet
}
} else {
userNudityFilteredTweets
}
}
Future.value(videoFilteredTweets)
}
}
Future.collect(filteredResults)
}
private def rankCandidates(
tweetCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
): Map[TopicId, Seq[InitialCandidate]] = {
tweetCandidatesMap.mapValues { tweetCandidates =>
tweetCandidates.sortBy { candidate =>
-candidate.tweetInfo.favCount
}
}
}
private def hydrateCandidates(
topicCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
): Map[Long, Seq[TopicTweet]] = {
topicCandidatesMap.map {
case (topicId, tweetsWithScore) =>
topicId.entityId ->
tweetsWithScore.map { tweetWithScore =>
val similarityEngineType: SimilarityEngineType =
tweetWithScore.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
TopicTweet(
tweetId = tweetWithScore.tweetId,
score = tweetWithScore.getSimilarityScore,
similarityEngineType = similarityEngineType
)
}
}
}
}

View File

@ -0,0 +1,179 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
import com.twitter.cr_mixer.filter.UtegFilterRunner
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class UtegTweetCandidateGenerator @Inject() (
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
UserTweetEntityGraphSimilarityEngine.Query,
TweetWithScoreAndSocialProof
],
utegTweetScribeLogger: UtegTweetScribeLogger,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
utegFilterRunner: UtegFilterRunner,
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSeedsStats = stats.scope("fetchSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val utegFilterStats = stats.scope("utegFilter")
private val rankStats = stats.scope("rank")
def get(
query: UtegTweetCandidateGeneratorQuery
): Future[Seq[TweetWithScoreAndSocialProof]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
/**
* The candidate we return in the end needs a social proof field, which isn't
* supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof
* instead.
*
* However, filters and light ranker expect Candidate-typed param to work. In order to minimise the
* changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate
* in this method.
*/
for {
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchSeedsStats) {
fetchSeeds(query)
}
initialTweets <- StatsUtil.trackItemsStats(fetchCandidatesStats) {
fetchCandidates(query, realGraphSeeds)
}
initialCandidates <- convertToInitialCandidates(initialTweets)
filteredCandidates <- StatsUtil.trackItemsStats(utegFilterStats) {
utegFilter(query, initialCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
rankCandidates(query, filteredCandidates)
}
} yield {
val topTweets = rankedCandidates.take(query.maxNumResults)
convertToTweets(topTweets, initialTweets.map(tweet => tweet.tweetId -> tweet).toMap)
}
}
}
}
private def utegFilter(
query: UtegTweetCandidateGeneratorQuery,
candidates: Seq[InitialCandidate]
): Future[Seq[InitialCandidate]] = {
utegFilterRunner.runSequentialFilters(query, Seq(candidates)).map(_.flatten)
}
private def fetchSeeds(
query: UtegTweetCandidateGeneratorQuery
): Future[Map[UserId, Double]] = {
realGraphInSourceGraphFetcher
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
}
private[candidate_generation] def rankCandidates(
query: UtegTweetCandidateGeneratorQuery,
filteredCandidates: Seq[InitialCandidate],
): Future[Seq[RankedCandidate]] = {
val blendedCandidates = filteredCandidates.map(candidate =>
candidate.toBlendedCandidate(Seq(candidate.candidateGenerationInfo)))
Future(
blendedCandidates.map { candidate =>
val score = candidate.getSimilarityScore
candidate.toRankedCandidate(score)
}
)
}
def fetchCandidates(
query: UtegTweetCandidateGeneratorQuery,
realGraphSeeds: Map[UserId, Double],
): Future[Seq[TweetWithScoreAndSocialProof]] = {
val engineQuery = UserTweetEntityGraphSimilarityEngine.fromParams(
query.userId,
realGraphSeeds,
Some(query.impressedTweetList.toSeq),
query.params
)
utegTweetScribeLogger.scribeInitialCandidates(
query,
userTweetEntityGraphSimilarityEngine.getCandidates(engineQuery).map(_.toSeq.flatten)
)
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithScoreAndSocialProof],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
similarityEngineType = SimilarityEngineType.Uteg,
modelId = None,
score = Some(candidate.score)),
Seq.empty
)
)
}
}
}
private[candidate_generation] def convertToTweets(
candidates: Seq[RankedCandidate],
tweetMap: Map[TweetId, TweetWithScoreAndSocialProof]
): Seq[TweetWithScoreAndSocialProof] = {
candidates.map { candidate =>
tweetMap
.get(candidate.tweetId).map { tweet =>
TweetWithScoreAndSocialProof(
tweet.tweetId,
candidate.predictionScore,
tweet.socialProofByType
)
// The exception should never be thrown
}.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets"))
}
}
}

View File

@ -0,0 +1,13 @@
scala_library(
sources = ["*.scala"],
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"configapi/configapi-core",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"finatra/inject/inject-core/src/main/scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,473 @@
package com.twitter.cr_mixer.config
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.exception.InvalidSANNConfigException
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.simclustersann.thriftscala.{SimClustersANNConfig => ThriftSimClustersANNConfig}
import com.twitter.util.Duration
case class SimClustersANNConfig(
maxNumResults: Int,
minScore: Double,
candidateEmbeddingType: EmbeddingType,
maxTopTweetsPerCluster: Int,
maxScanClusters: Int,
maxTweetCandidateAge: Duration,
minTweetCandidateAge: Duration,
annAlgorithm: ScoringAlgorithm) {
val toSANNConfigThrift: ThriftSimClustersANNConfig = ThriftSimClustersANNConfig(
maxNumResults = maxNumResults,
minScore = minScore,
candidateEmbeddingType = candidateEmbeddingType,
maxTopTweetsPerCluster = maxTopTweetsPerCluster,
maxScanClusters = maxScanClusters,
maxTweetCandidateAgeHours = maxTweetCandidateAge.inHours,
minTweetCandidateAgeHours = minTweetCandidateAge.inHours,
annAlgorithm = annAlgorithm,
)
}
object SimClustersANNConfig {
final val DefaultConfig = SimClustersANNConfig(
maxNumResults = 200,
minScore = 0.0,
candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
maxTopTweetsPerCluster = 800,
maxScanClusters = 50,
maxTweetCandidateAge = 24.hours,
minTweetCandidateAge = 0.hours,
annAlgorithm = ScoringAlgorithm.CosineSimilarity,
)
/*
SimClustersANNConfigId: String
Format: Prod - “EmbeddingType_ModelVersion_Default”
Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD
*/
private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val FavBasedProducer_Model20m145k2020_20220617_06 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val FavBasedProducer_Model20m145k2020_20220801 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val FavBasedProducer_Model20m145k2020_20220810 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val FavBasedProducer_Model20m145k2020_20220818 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val FavBasedProducer_Model20m145k2020_20220819 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val FavBasedProducer_Model20m145k2020_20221221 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val FavBasedProducer_Model20m145k2020_20221220 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val UnfilteredUserInterestedIn_Model20m145k2020_20220617_06 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220801 =
UnfilteredUserInterestedIn_Model20m145k2020_20220617_06.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220810 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220818 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220819 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20221221 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20221220 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default =
DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val UserNextInterestedIn_Model20m145k2020_20220617_06 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val UserNextInterestedIn_Model20m145k2020_20220801 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val UserNextInterestedIn_Model20m145k2020_20220810 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val UserNextInterestedIn_Model20m145k2020_20220818 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val UserNextInterestedIn_Model20m145k2020_20220819 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val UserNextInterestedIn_Model20m145k2020_20221221 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val UserNextInterestedIn_Model20m145k2020_20221220 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow
private val FollowBasedProducer_Model20m145k2020_Default =
FavBasedProducer_Model20m145k2020_Default.copy()
// Experimental SANN config
private val FollowBasedProducer_Model20m145k2020_20220801 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val FollowBasedProducer_Model20m145k2020_20220810 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val FollowBasedProducer_Model20m145k2020_20220818 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val FollowBasedProducer_Model20m145k2020_20220819 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val FollowBasedProducer_Model20m145k2020_20221221 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val FollowBasedProducer_Model20m145k2020_20221220 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map(
"FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default,
"FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06,
"FavBasedProducer_Model20m145k2020_20220801" -> FavBasedProducer_Model20m145k2020_20220801,
"FavBasedProducer_Model20m145k2020_20220810" -> FavBasedProducer_Model20m145k2020_20220810,
"FavBasedProducer_Model20m145k2020_20220818" -> FavBasedProducer_Model20m145k2020_20220818,
"FavBasedProducer_Model20m145k2020_20220819" -> FavBasedProducer_Model20m145k2020_20220819,
"FavBasedProducer_Model20m145k2020_20221221" -> FavBasedProducer_Model20m145k2020_20221221,
"FavBasedProducer_Model20m145k2020_20221220" -> FavBasedProducer_Model20m145k2020_20221220,
"FollowBasedProducer_Model20m145k2020_Default" -> FollowBasedProducer_Model20m145k2020_Default,
"FollowBasedProducer_Model20m145k2020_20220801" -> FollowBasedProducer_Model20m145k2020_20220801,
"FollowBasedProducer_Model20m145k2020_20220810" -> FollowBasedProducer_Model20m145k2020_20220810,
"FollowBasedProducer_Model20m145k2020_20220818" -> FollowBasedProducer_Model20m145k2020_20220818,
"FollowBasedProducer_Model20m145k2020_20220819" -> FollowBasedProducer_Model20m145k2020_20220819,
"FollowBasedProducer_Model20m145k2020_20221221" -> FollowBasedProducer_Model20m145k2020_20221221,
"FollowBasedProducer_Model20m145k2020_20221220" -> FollowBasedProducer_Model20m145k2020_20221220,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220,
"UnfilteredUserInterestedIn_Model20m145k2020_Default" -> UnfilteredUserInterestedIn_Model20m145k2020_Default,
"UnfilteredUserInterestedIn_Model20m145k2020_20220617_06" -> UnfilteredUserInterestedIn_Model20m145k2020_20220617_06,
"UnfilteredUserInterestedIn_Model20m145k2020_20220801" -> UnfilteredUserInterestedIn_Model20m145k2020_20220801,
"UnfilteredUserInterestedIn_Model20m145k2020_20220810" -> UnfilteredUserInterestedIn_Model20m145k2020_20220810,
"UnfilteredUserInterestedIn_Model20m145k2020_20220818" -> UnfilteredUserInterestedIn_Model20m145k2020_20220818,
"UnfilteredUserInterestedIn_Model20m145k2020_20220819" -> UnfilteredUserInterestedIn_Model20m145k2020_20220819,
"UnfilteredUserInterestedIn_Model20m145k2020_20221221" -> UnfilteredUserInterestedIn_Model20m145k2020_20221221,
"UnfilteredUserInterestedIn_Model20m145k2020_20221220" -> UnfilteredUserInterestedIn_Model20m145k2020_20221220,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220,
"UserNextInterestedIn_Model20m145k2020_Default" -> UserNextInterestedIn_Model20m145k2020_Default,
"UserNextInterestedIn_Model20m145k2020_20220617_06" -> UserNextInterestedIn_Model20m145k2020_20220617_06,
"UserNextInterestedIn_Model20m145k2020_20220801" -> UserNextInterestedIn_Model20m145k2020_20220801,
"UserNextInterestedIn_Model20m145k2020_20220810" -> UserNextInterestedIn_Model20m145k2020_20220810,
"UserNextInterestedIn_Model20m145k2020_20220818" -> UserNextInterestedIn_Model20m145k2020_20220818,
"UserNextInterestedIn_Model20m145k2020_20220819" -> UserNextInterestedIn_Model20m145k2020_20220819,
"UserNextInterestedIn_Model20m145k2020_20221221" -> UserNextInterestedIn_Model20m145k2020_20221221,
"UserNextInterestedIn_Model20m145k2020_20221220" -> UserNextInterestedIn_Model20m145k2020_20221220,
)
def getConfig(
embeddingType: String,
modelVersion: String,
id: String
): SimClustersANNConfig = {
val configName = embeddingType + "_" + modelVersion + "_" + id
DefaultConfigMappings.get(configName) match {
case Some(config) => config
case None =>
throw InvalidSANNConfigException(s"Incorrect config id passed in for SANN $configName")
}
}
}

View File

@ -0,0 +1,24 @@
package com.twitter.cr_mixer.config
import com.twitter.util.Duration
case class TimeoutConfig(
/* Default timeouts for candidate generator */
serviceTimeout: Duration,
signalFetchTimeout: Duration,
similarityEngineTimeout: Duration,
annServiceClientTimeout: Duration,
/* For Uteg Candidate Generator */
utegSimilarityEngineTimeout: Duration,
/* For User State Store */
userStateUnderlyingStoreTimeout: Duration,
userStateStoreTimeout: Duration,
/* For FRS based tweets */
// Timeout passed to EarlyBird server
earlybirdServerTimeout: Duration,
// Timeout set on CrMixer side
earlybirdSimilarityEngineTimeout: Duration,
frsBasedTweetEndpointTimeout: Duration,
topicTweetEndpointTimeout: Duration,
// Timeout Settings for Navi gRPC Client
naviRequestTimeout: Duration)

View File

@ -0,0 +1,48 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/debug",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"finagle/finagle-base-http/src/main",
"finagle/finagle-core/src/main",
"finagle/finagle-http/src/main/scala",
"finatra/http-server/src/main/scala/com/twitter/finatra/http:controller",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/ads/schema:common-scala",
"src/thrift/com/twitter/context:twitter-context-scala",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/timelines/render:thrift-scala",
"src/thrift/com/twitter/timelines/timeline_logging:thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"stringcenter/client",
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview",
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview/funnelseries",
"twitter-context/src/main/scala",
"user-signal-service/thrift/src/main/thrift:thrift-scala",
],
)

View File

@ -0,0 +1,757 @@
package com.twitter.cr_mixer.controller
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.candidate_generation.AdsCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.CrCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.FrsTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.RelatedTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.RelatedVideoTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.TopicTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.UtegTweetCandidateGenerator
import com.twitter.cr_mixer.featureswitch.ParamsBuilder
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
import com.twitter.cr_mixer.logging.RelatedTweetScribeMetadata
import com.twitter.cr_mixer.logging.ScribeMetadata
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedAdsCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.AdsParams
import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.RelatedTweetGlobalParams
import com.twitter.cr_mixer.param.RelatedVideoTweetGlobalParams
import com.twitter.cr_mixer.param.TopicTweetParams
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.param.decider.EndpointLoadShedder
import com.twitter.cr_mixer.thriftscala.AdTweetRecommendation
import com.twitter.cr_mixer.thriftscala.AdsRequest
import com.twitter.cr_mixer.thriftscala.AdsResponse
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.cr_mixer.thriftscala.FrsTweetRequest
import com.twitter.cr_mixer.thriftscala.FrsTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedTweet
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweet
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetResponse
import com.twitter.cr_mixer.thriftscala.TopicTweet
import com.twitter.cr_mixer.thriftscala.TopicTweetRequest
import com.twitter.cr_mixer.thriftscala.TopicTweetResponse
import com.twitter.cr_mixer.thriftscala.TweetRecommendation
import com.twitter.cr_mixer.thriftscala.UtegTweet
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
import com.twitter.cr_mixer.util.MetricTagUtil
import com.twitter.cr_mixer.util.SignalTimestampStatsUtil
import com.twitter.cr_mixer.{thriftscala => t}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finatra.thrift.Controller
import com.twitter.hermit.store.common.ReadableWritableStore
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.timeline_logging.{thriftscala => thriftlog}
import com.twitter.timelines.tracing.lensview.funnelseries.TweetScoreFunnelSeries
import com.twitter.util.Future
import com.twitter.util.Time
import java.util.UUID
import javax.inject.Inject
import org.apache.commons.lang.exception.ExceptionUtils
class CrMixerThriftController @Inject() (
crCandidateGenerator: CrCandidateGenerator,
relatedTweetCandidateGenerator: RelatedTweetCandidateGenerator,
relatedVideoTweetCandidateGenerator: RelatedVideoTweetCandidateGenerator,
utegTweetCandidateGenerator: UtegTweetCandidateGenerator,
frsTweetCandidateGenerator: FrsTweetCandidateGenerator,
topicTweetCandidateGenerator: TopicTweetCandidateGenerator,
crMixerScribeLogger: CrMixerScribeLogger,
relatedTweetScribeLogger: RelatedTweetScribeLogger,
utegTweetScribeLogger: UtegTweetScribeLogger,
adsRecommendationsScribeLogger: AdsRecommendationsScribeLogger,
adsCandidateGenerator: AdsCandidateGenerator,
decider: CrMixerDecider,
paramsBuilder: ParamsBuilder,
endpointLoadShedder: EndpointLoadShedder,
signalTimestampStatsUtil: SignalTimestampStatsUtil,
tweetRecommendationResultsStore: ReadableWritableStore[UserId, CrMixerTweetResponse],
userStateStore: ReadableStore[UserId, UserState],
statsReceiver: StatsReceiver)
extends Controller(t.CrMixer) {
lazy private val tweetScoreFunnelSeries = new TweetScoreFunnelSeries(statsReceiver)
private def logErrMessage(endpoint: String, e: Throwable): Unit = {
val msg = Seq(
s"Failed endpoint $endpoint: ${e.getLocalizedMessage}",
ExceptionUtils.getStackTrace(e)
).mkString("\n")
/** *
* We chose logger.info() here to print message instead of logger.error since that
* logger.error sometimes suppresses detailed stacktrace.
*/
logger.info(msg)
}
private def generateRequestUUID(): Long = {
/** *
* We generate unique UUID via bitwise operations. See the below link for more:
* https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid
*/
UUID.randomUUID().getMostSignificantBits & Long.MaxValue
}
handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args =>
val endpointName = "getTweetRecommendations"
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val userId = args.request.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val queryFut = buildCrCandidateGeneratorQuery(args.request, requestUUID, userId)
queryFut.flatMap { query =>
val scribeMetadata = ScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
val response = crCandidateGenerator.get(query)
val blueVerifiedScribedResponse = response.flatMap { rankedCandidates =>
val hasBlueVerifiedCandidate = rankedCandidates.exists { tweet =>
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
}
if (hasBlueVerifiedCandidate) {
crMixerScribeLogger.scribeGetTweetRecommendationsForBlueVerified(
scribeMetadata,
response)
} else {
response
}
}
val thriftResponse = blueVerifiedScribedResponse.map { candidates =>
if (query.product == t.Product.Home) {
scribeTweetScoreFunnelSeries(candidates)
}
buildThriftResponse(candidates)
}
cacheTweetRecommendationResults(args.request, thriftResponse)
crMixerScribeLogger.scribeGetTweetRecommendations(
args.request,
startTime,
scribeMetadata,
thriftResponse)
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(CrMixerTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(CrMixerTweetResponse(Seq.empty))
}
}
}
/** *
* GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially
* doing very similar things, except that one passes in TweetId which calls TweetBased engine,
* and the other passes in AuthorId which calls ProducerBased engine.
*/
handle(t.CrMixer.GetRelatedTweetsForQueryTweet) {
args: t.CrMixer.GetRelatedTweetsForQueryTweet.Args =>
val endpointName = "getRelatedTweetsForQueryTweet"
getRelatedTweets(endpointName, args.request)
}
handle(t.CrMixer.GetRelatedVideoTweetsForQueryTweet) {
args: t.CrMixer.GetRelatedVideoTweetsForQueryTweet.Args =>
val endpointName = "getRelatedVideoTweetsForQueryVideoTweet"
getRelatedVideoTweets(endpointName, args.request)
}
handle(t.CrMixer.GetRelatedTweetsForQueryAuthor) {
args: t.CrMixer.GetRelatedTweetsForQueryAuthor.Args =>
val endpointName = "getRelatedTweetsForQueryAuthor"
getRelatedTweets(endpointName, args.request)
}
private def getRelatedTweets(
endpointName: String,
request: RelatedTweetRequest
): Future[RelatedTweetResponse] = {
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val queryFut = buildRelatedTweetQuery(request, requestUUID)
queryFut.flatMap { query =>
val relatedTweetScribeMetadata = RelatedTweetScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
relatedTweetScribeLogger.scribeGetRelatedTweets(
request,
startTime,
relatedTweetScribeMetadata,
relatedTweetCandidateGenerator
.get(query)
.map(buildRelatedTweetResponse))
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(RelatedTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(RelatedTweetResponse(Seq.empty))
}
}
}
private def getRelatedVideoTweets(
endpointName: String,
request: RelatedVideoTweetRequest
): Future[RelatedVideoTweetResponse] = {
val requestUUID = generateRequestUUID()
val queryFut = buildRelatedVideoTweetQuery(request, requestUUID)
queryFut.flatMap { query =>
endpointLoadShedder(endpointName, query.product.originalName) {
relatedVideoTweetCandidateGenerator.get(query).map { initialCandidateSeq =>
buildRelatedVideoTweetResponse(initialCandidateSeq)
}
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(RelatedVideoTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(RelatedVideoTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetFrsBasedTweetRecommendations) {
args: t.CrMixer.GetFrsBasedTweetRecommendations.Args =>
val endpointName = "getFrsBasedTweetRecommendations"
val requestUUID = generateRequestUUID()
val queryFut = buildFrsBasedTweetQuery(args.request, requestUUID)
queryFut.flatMap { query =>
endpointLoadShedder(endpointName, query.product.originalName) {
frsTweetCandidateGenerator.get(query).map(FrsTweetResponse(_))
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(FrsTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetTopicTweetRecommendations) {
args: t.CrMixer.GetTopicTweetRecommendations.Args =>
val endpointName = "getTopicTweetRecommendations"
val requestUUID = generateRequestUUID()
val query = buildTopicTweetQuery(args.request, requestUUID)
endpointLoadShedder(endpointName, query.product.originalName) {
topicTweetCandidateGenerator.get(query).map(TopicTweetResponse(_))
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(TopicTweetResponse(Map.empty[Long, Seq[TopicTweet]]))
}
}
handle(t.CrMixer.GetUtegTweetRecommendations) {
args: t.CrMixer.GetUtegTweetRecommendations.Args =>
val endpointName = "getUtegTweetRecommendations"
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val queryFut = buildUtegTweetQuery(args.request, requestUUID)
queryFut
.flatMap { query =>
val scribeMetadata = ScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
utegTweetScribeLogger.scribeGetUtegTweetRecommendations(
args.request,
startTime,
scribeMetadata,
utegTweetCandidateGenerator
.get(query)
.map(buildUtegTweetResponse)
)
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(UtegTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetAdsRecommendations) { args: t.CrMixer.GetAdsRecommendations.Args =>
val endpointName = "getAdsRecommendations"
val queryFut = buildAdsCandidateGeneratorQuery(args.request)
val startTime = Time.now.inMilliseconds
queryFut.flatMap { query =>
{
val scribeMetadata = ScribeMetadata.from(query)
val response = adsCandidateGenerator
.get(query).map { candidates =>
buildAdsResponse(candidates)
}
adsRecommendationsScribeLogger.scribeGetAdsRecommendations(
args.request,
startTime,
scribeMetadata,
response,
query.params(AdsParams.EnableScribe)
)
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(AdsResponse(Seq.empty))
}
}
}
private def buildCrCandidateGeneratorQuery(
thriftRequest: CrMixerTweetRequest,
requestUUID: Long,
userId: Long
): Future[CrCandidateGeneratorQuery] = {
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("CrMixerTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
// Specify product-specific behavior mapping here
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(9999)
case (t.Product.Notifications, Some(t.ProductContext.NotificationsContext(cxt))) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.Email, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.ImmersiveMediaViewer, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.VideoCarousel, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
CrCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID,
languageCode = thriftRequest.clientContext.languageCode
)
}
}
private def buildRelatedTweetQuery(
thriftRequest: RelatedTweetRequest,
requestUUID: Long
): Future[RelatedTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedTweetRequest")
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
case Some(userId) => userStateStore.get(userId)
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
userStateFut.map { userState =>
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
// Specify product-specific behavior mapping here
// Currently, Home takes 10, and RUX takes 100
val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam)
RelatedTweetCandidateGeneratorQuery(
internalId = thriftRequest.internalId,
clientContext = thriftRequest.clientContext,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildAdsCandidateGeneratorQuery(
thriftRequest: AdsRequest
): Future[AdsCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val product = thriftRequest.product
val requestUUID = generateRequestUUID()
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
val maxNumResults = params(AdsParams.AdsCandidateGenerationMaxCandidatesNumParam)
AdsCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
params = params,
maxNumResults = maxNumResults,
requestUUID = requestUUID
)
}
}
private def buildRelatedVideoTweetQuery(
thriftRequest: RelatedVideoTweetRequest,
requestUUID: Long
): Future[RelatedVideoTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedVideoTweetRequest")
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
case Some(userId) => userStateStore.get(userId)
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
userStateFut.map { userState =>
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
val maxNumResults = params(RelatedVideoTweetGlobalParams.MaxCandidatesPerRequestParam)
RelatedVideoTweetCandidateGeneratorQuery(
internalId = thriftRequest.internalId,
clientContext = thriftRequest.clientContext,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildUtegTweetQuery(
thriftRequest: UtegTweetRequest,
requestUUID: Long
): Future[UtegTweetCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("UtegTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
// Specify product-specific behavior mapping here
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(9999)
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
UtegTweetCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildTopicTweetQuery(
thriftRequest: TopicTweetRequest,
requestUUID: Long
): TopicTweetCandidateGeneratorQuery = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in the TopicTweetRequest clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
// Specify product-specific behavior mapping here
val isVideoOnly = (product, productContext) match {
case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) =>
context.isVideoOnly
case (t.Product.TopicLandingPage, None) =>
false
case (t.Product.HomeTopicsBackfill, None) =>
false
case (t.Product.TopicTweetsStrato, None) =>
false
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
statsReceiver.scope(product.toString).counter(TopicTweetRequest.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
product,
UserState.EnumUnknownUserState(100)
)
val topicIds = thriftRequest.topicIds.map { topicId =>
TopicId(
entityId = topicId,
language = thriftRequest.clientContext.languageCode,
country = None
)
}.toSet
TopicTweetCandidateGeneratorQuery(
userId = userId,
topicIds = topicIds,
product = product,
maxNumResults = params(TopicTweetParams.MaxTopicTweetCandidatesParam),
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID,
isVideoOnly = isVideoOnly
)
}
private def buildFrsBasedTweetQuery(
thriftRequest: FrsTweetRequest,
requestUUID: Long
): Future[FrsTweetCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in the FrsTweetRequest clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("FrsTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(
params(FrsBasedCandidateGenerationMaxCandidatesNumParam))
case _ =>
params(FrsBasedCandidateGenerationMaxCandidatesNumParam)
}
FrsTweetCandidateGeneratorQuery(
userId = userId,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
impressedUserList = thriftRequest.excludedUserIds.getOrElse(Nil).toSet,
params = params,
languageCodeOpt = thriftRequest.clientContext.languageCode,
countryCodeOpt = thriftRequest.clientContext.countryCode,
requestUUID = requestUUID
)
}
}
private def buildThriftResponse(
candidates: Seq[RankedCandidate]
): CrMixerTweetResponse = {
val tweets = candidates.map { candidate =>
TweetRecommendation(
tweetId = candidate.tweetId,
score = candidate.predictionScore,
metricTags = Some(MetricTagUtil.buildMetricTags(candidate)),
latestSourceSignalTimestampInMillis =
SignalTimestampStatsUtil.buildLatestSourceSignalTimestamp(candidate)
)
}
signalTimestampStatsUtil.statsSignalTimestamp(tweets)
CrMixerTweetResponse(tweets)
}
private def scribeTweetScoreFunnelSeries(
candidates: Seq[RankedCandidate]
): Seq[RankedCandidate] = {
// 202210210901 is a random number for code search of Lensview
tweetScoreFunnelSeries.startNewSpan(
name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType",
codePtr = 202210210901L) {
(
candidates,
candidates.map { candidate =>
thriftlog.TweetDimensionMeasure(
dimension = Some(
thriftlog
.RequestTweetDimension(
candidate.tweetId,
candidate.reasonChosen.similarityEngineInfo.similarityEngineType.value)),
measure = Some(thriftlog.RequestTweetMeasure(candidate.predictionScore))
)
}
)
}
}
private def buildRelatedTweetResponse(candidates: Seq[InitialCandidate]): RelatedTweetResponse = {
val tweets = candidates.map { candidate =>
RelatedTweet(
tweetId = candidate.tweetId,
score = Some(candidate.getSimilarityScore),
authorId = Some(candidate.tweetInfo.authorId)
)
}
RelatedTweetResponse(tweets)
}
private def buildRelatedVideoTweetResponse(
candidates: Seq[InitialCandidate]
): RelatedVideoTweetResponse = {
val tweets = candidates.map { candidate =>
RelatedVideoTweet(
tweetId = candidate.tweetId,
score = Some(candidate.getSimilarityScore)
)
}
RelatedVideoTweetResponse(tweets)
}
private def buildUtegTweetResponse(
candidates: Seq[TweetWithScoreAndSocialProof]
): UtegTweetResponse = {
val tweets = candidates.map { candidate =>
UtegTweet(
tweetId = candidate.tweetId,
score = candidate.score,
socialProofByType = candidate.socialProofByType
)
}
UtegTweetResponse(tweets)
}
private def buildAdsResponse(
candidates: Seq[RankedAdsCandidate]
): AdsResponse = {
AdsResponse(ads = candidates.map { candidate =>
AdTweetRecommendation(
tweetId = candidate.tweetId,
score = candidate.predictionScore,
lineItems = Some(candidate.lineItemInfo))
})
}
private def cacheTweetRecommendationResults(
request: CrMixerTweetRequest,
response: Future[CrMixerTweetResponse]
): Unit = {
val userId = request.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in getTweetRecommendations() Thrift clientContext"))
if (decider.isAvailableForId(userId, DeciderConstants.getTweetRecommendationsCacheRate)) {
response.map { crMixerTweetResponse =>
{
(
request.product,
request.clientContext.userId,
crMixerTweetResponse.tweets.nonEmpty) match {
case (t.Product.Home, Some(userId), true) =>
tweetRecommendationResultsStore.put((userId, crMixerTweetResponse))
case _ => Future.value(Unit)
}
}
}
}
}
}

View File

@ -0,0 +1,7 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [],
)

View File

@ -0,0 +1,4 @@
package com.twitter.cr_mixer
package exception
case class InvalidSANNConfigException(msg: String) extends Exception(msg)

View File

@ -0,0 +1,35 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"abdecider/src/main/scala",
"configapi/configapi-abdecider",
"configapi/configapi-core",
"configapi/configapi-featureswitches:v2",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
"featureswitches/featureswitches-core",
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,79 @@
package com.twitter.cr_mixer
package featureswitch
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.abdecider.LoggingABDecider
import com.twitter.abdecider.Recipient
import com.twitter.abdecider.Bucket
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.util.Local
import scala.collection.concurrent.{Map => ConcurrentMap}
/**
* Wraps a LoggingABDecider, so all impressed buckets are recorded to a 'LocalContext' on a given request.
*
* Contexts (https://twitter.github.io/finagle/guide/Contexts.html) are Finagle's mechanism for
* storing state/variables without having to pass these variables all around the request.
*
* In order for this class to be used the [[SetImpressedBucketsLocalContextFilter]] must be applied
* at the beginning of the request, to initialize a concurrent map used to store impressed buckets.
*
* Whenever we get an a/b impression, the bucket information is logged to the concurrent hashmap.
*/
case class CrMixerLoggingABDecider(
loggingAbDecider: LoggingABDecider,
statsReceiver: StatsReceiver)
extends LoggingABDecider {
private val scopedStatsReceiver = statsReceiver.scope("cr_logging_ab_decider")
override def impression(
experimentName: String,
recipient: Recipient
): Option[Bucket] = {
StatsUtil.trackNonFutureBlockStats(scopedStatsReceiver.scope("log_impression")) {
val maybeBuckets = loggingAbDecider.impression(experimentName, recipient)
maybeBuckets.foreach { b =>
scopedStatsReceiver.counter("impressions").incr()
CrMixerImpressedBuckets.recordImpressedBucket(b)
}
maybeBuckets
}
}
override def track(
experimentName: String,
eventName: String,
recipient: Recipient
): Unit = {
loggingAbDecider.track(experimentName, eventName, recipient)
}
override def bucket(
experimentName: String,
recipient: Recipient
): Option[Bucket] = {
loggingAbDecider.bucket(experimentName, recipient)
}
override def experiments: Seq[String] = loggingAbDecider.experiments
override def experiment(experimentName: String) =
loggingAbDecider.experiment(experimentName)
}
object CrMixerImpressedBuckets {
private[featureswitch] val localImpressedBucketsMap = new Local[ConcurrentMap[Bucket, Boolean]]
/**
* Gets all impressed buckets for this request.
**/
def getAllImpressedBuckets: Option[List[Bucket]] = {
localImpressedBucketsMap.apply().map(_.map { case (k, _) => k }.toList)
}
private[featureswitch] def recordImpressedBucket(bucket: Bucket) = {
localImpressedBucketsMap().foreach { m => m += bucket -> true }
}
}

View File

@ -0,0 +1,151 @@
package com.twitter.cr_mixer.featureswitch
import com.twitter.abdecider.LoggingABDecider
import com.twitter.abdecider.UserRecipient
import com.twitter.cr_mixer.{thriftscala => t}
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.discovery.common.configapi.FeatureContextBuilder
import com.twitter.featureswitches.FSRecipient
import com.twitter.featureswitches.UserAgent
import com.twitter.featureswitches.{Recipient => FeatureSwitchRecipient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.timelines.configapi.Config
import com.twitter.timelines.configapi.FeatureValue
import com.twitter.timelines.configapi.ForcedFeatureContext
import com.twitter.timelines.configapi.OrElseFeatureContext
import com.twitter.timelines.configapi.Params
import com.twitter.timelines.configapi.RequestContext
import com.twitter.timelines.configapi.abdecider.LoggingABDeciderExperimentContext
import javax.inject.Inject
import javax.inject.Singleton
/** Singleton object for building [[Params]] to override */
@Singleton
class ParamsBuilder @Inject() (
globalStats: StatsReceiver,
abDecider: LoggingABDecider,
featureContextBuilder: FeatureContextBuilder,
config: Config) {
private val stats = globalStats.scope("params")
def buildFromClientContext(
clientContext: ClientContext,
product: t.Product,
userState: UserState,
userRoleOverride: Option[Set[String]] = None,
featureOverrides: Map[String, FeatureValue] = Map.empty,
): Params = {
clientContext.userId match {
case Some(userId) =>
val userRecipient = buildFeatureSwitchRecipient(
userId,
userRoleOverride,
clientContext,
product,
userState
)
val featureContext = OrElseFeatureContext(
ForcedFeatureContext(featureOverrides),
featureContextBuilder(
Some(userId),
Some(userRecipient)
))
config(
requestContext = RequestContext(
userId = Some(userId),
experimentContext = LoggingABDeciderExperimentContext(
abDecider,
Some(UserRecipient(userId, Some(userId)))),
featureContext = featureContext
),
stats
)
case None =>
val guestRecipient =
buildFeatureSwitchRecipientWithGuestId(clientContext: ClientContext, product, userState)
val featureContext = OrElseFeatureContext(
ForcedFeatureContext(featureOverrides),
featureContextBuilder(
clientContext.userId,
Some(guestRecipient)
)
) //ExperimentContext with GuestRecipient is not supported as there is no active use-cases yet in CrMixer
config(
requestContext = RequestContext(
userId = clientContext.userId,
featureContext = featureContext
),
stats
)
}
}
private def buildFeatureSwitchRecipientWithGuestId(
clientContext: ClientContext,
product: t.Product,
userState: UserState
): FeatureSwitchRecipient = {
val recipient = FSRecipient(
userId = None,
userRoles = None,
deviceId = clientContext.deviceId,
guestId = clientContext.guestId,
languageCode = clientContext.languageCode,
countryCode = clientContext.countryCode,
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
isVerified = None,
isTwoffice = None,
tooClient = None,
highWaterMark = None
)
recipient.withCustomFields(
(ParamsBuilder.ProductCustomField, product.toString),
(ParamsBuilder.UserStateCustomField, userState.toString)
)
}
private def buildFeatureSwitchRecipient(
userId: Long,
userRolesOverride: Option[Set[String]],
clientContext: ClientContext,
product: t.Product,
userState: UserState
): FeatureSwitchRecipient = {
val userRoles = userRolesOverride match {
case Some(overrides) => Some(overrides)
case _ => clientContext.userRoles.map(_.toSet)
}
val recipient = FSRecipient(
userId = Some(userId),
userRoles = userRoles,
deviceId = clientContext.deviceId,
guestId = clientContext.guestId,
languageCode = clientContext.languageCode,
countryCode = clientContext.countryCode,
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
isVerified = None,
isTwoffice = None,
tooClient = None,
highWaterMark = None
)
recipient.withCustomFields(
(ParamsBuilder.ProductCustomField, product.toString),
(ParamsBuilder.UserStateCustomField, userState.toString)
)
}
}
object ParamsBuilder {
private val ProductCustomField = "product_id"
private val UserStateCustomField = "user_state"
}

View File

@ -0,0 +1,22 @@
package com.twitter.cr_mixer.featureswitch
import com.twitter.finagle.Filter
import javax.inject.Inject
import javax.inject.Singleton
import scala.collection.concurrent.TrieMap
import com.twitter.abdecider.Bucket
import com.twitter.finagle.Service
@Singleton
class SetImpressedBucketsLocalContextFilter @Inject() () extends Filter.TypeAgnostic {
override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
(request: Req, service: Service[Req, Rep]) => {
val concurrentTrieMap = TrieMap
.empty[Bucket, Boolean] // Trie map has no locks and O(1) inserts
CrMixerImpressedBuckets.localImpressedBucketsMap.let(concurrentTrieMap) {
service(request)
}
}
}

View File

@ -0,0 +1,22 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"finagle/finagle-core/src/main",
"frigate/frigate-common:util",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
],
)

View File

@ -0,0 +1,22 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
trait FilterBase {
def name: String
type ConfigType
def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]]
/**
* Build the config params here. passing in param() into the filter is strongly discouraged
* because param() can be slow when called many times
*/
def requestToConfig[CGQueryType <: CandidateGeneratorQuery](request: CGQueryType): ConfigType
}

View File

@ -0,0 +1,63 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
case class ImpressedTweetlistFilter() extends FilterBase {
import ImpressedTweetlistFilter._
override val name: String = this.getClass.getCanonicalName
override type ConfigType = FilterConfig
/*
Filtering removes some candidates based on configurable criteria.
*/
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: FilterConfig
): Future[Seq[Seq[InitialCandidate]]] = {
// Remove candidates which match a source tweet, or which are passed in impressedTweetList
val sourceTweetsMatch = candidates
.flatMap {
/***
* Within a Seq[Seq[InitialCandidate]], all candidates within a inner Seq
* are guaranteed to have the same sourceInfo. Hence, we can pick .headOption
* to represent the whole list when filtering by the internalId of the sourceInfoOpt.
* But of course the similarityEngineInfo could be different.
*/
_.headOption.flatMap { candidate =>
candidate.candidateGenerationInfo.sourceInfoOpt.map(_.internalId)
}
}.collect {
case InternalId.TweetId(id) => id
}
val impressedTweetList: Set[TweetId] =
config.impressedTweetList ++ sourceTweetsMatch
val filteredCandidateMap: Seq[Seq[InitialCandidate]] =
candidates.map {
_.filterNot { candidate =>
impressedTweetList.contains(candidate.tweetId)
}
}
Future.value(filteredCandidateMap)
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType
): FilterConfig = {
FilterConfig(request.impressedTweetList)
}
}
object ImpressedTweetlistFilter {
case class FilterConfig(impressedTweetList: Set[TweetId])
}

View File

@ -0,0 +1,80 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/***
* Filters in-network tweets
*/
@Singleton
case class InNetworkFilter @Inject() (
@Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq],
globalStats: StatsReceiver)
extends FilterBase {
override val name: String = this.getClass.getCanonicalName
import InNetworkFilter._
override type ConfigType = FilterConfig
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val filterCandidatesStats = stats.scope("filter_candidates")
override def filter(
candidates: Seq[Seq[InitialCandidate]],
filterConfig: FilterConfig,
): Future[Seq[Seq[InitialCandidate]]] = {
StatsUtil.trackItemsStats(filterCandidatesStats) {
filterCandidates(candidates, filterConfig)
}
}
private def filterCandidates(
candidates: Seq[Seq[InitialCandidate]],
filterConfig: FilterConfig,
): Future[Seq[Seq[InitialCandidate]]] = {
if (!filterConfig.enableInNetworkFilter) {
Future.value(candidates)
} else {
filterConfig.userIdOpt match {
case Some(userId) =>
realGraphStoreMh
.get(userId).map(_.map(_.candidates.map(_.userId)).getOrElse(Seq.empty).toSet).map {
realGraphInNetworkAuthorsSet =>
candidates.map(_.filterNot { candidate =>
realGraphInNetworkAuthorsSet.contains(candidate.tweetInfo.authorId)
})
}
case None => Future.value(candidates)
}
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType
): FilterConfig = {
request match {
case UtegTweetCandidateGeneratorQuery(userId, _, _, _, _, params, _) =>
FilterConfig(Some(userId), params(UtegTweetGlobalParams.EnableInNetworkFilterParam))
case _ => FilterConfig(None, false)
}
}
}
object InNetworkFilter {
case class FilterConfig(
userIdOpt: Option[UserId],
enableInNetworkFilter: Boolean)
}

View File

@ -0,0 +1,58 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class PostRankFilterRunner @Inject() (
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
private val beforeCount = scopedStats.stat("candidate_count", "before")
private val afterCount = scopedStats.stat("candidate_count", "after")
def run(
query: CrCandidateGeneratorQuery,
candidates: Seq[RankedCandidate]
): Future[Seq[RankedCandidate]] = {
beforeCount.add(candidates.size)
Future(
removeBadRecentNotificationCandidates(candidates)
).map { results =>
afterCount.add(results.size)
results
}
}
/**
* Remove "bad" quality candidates generated by recent notifications
* A candidate is bad when it is generated by a single RecentNotification
* SourceKey.
* e.x:
* tweetA {recent notification1} -> bad
* tweetB {recent notification1 recent notification2} -> good
*tweetC {recent notification1 recent follow1} -> bad
* SD-19397
*/
private[filter] def removeBadRecentNotificationCandidates(
candidates: Seq[RankedCandidate]
): Seq[RankedCandidate] = {
candidates.filterNot {
isBadQualityRecentNotificationCandidate
}
}
private def isBadQualityRecentNotificationCandidate(candidate: RankedCandidate): Boolean = {
candidate.potentialReasons.size == 1 &&
candidate.potentialReasons.head.sourceInfoOpt.nonEmpty &&
candidate.potentialReasons.head.sourceInfoOpt.get.sourceType == SourceType.NotificationClick
}
}

View File

@ -0,0 +1,99 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class PreRankFilterRunner @Inject() (
impressedTweetListFilter: ImpressedTweetlistFilter,
tweetAgeFilter: TweetAgeFilter,
videoTweetFilter: VideoTweetFilter,
tweetReplyFilter: ReplyFilter,
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
/***
* The order of the filters does not matter as long as we do not apply .take(N) truncation
* across all filters. In other words, it is fine that we first do tweetAgeFilter, and then
* we do impressedTweetListFilter, or the other way around.
* Same idea applies to the signal based filter - it is ok that we apply signal based filters
* before impressedTweetListFilter.
*
* We move all signal based filters before tweetAgeFilter and impressedTweetListFilter
* as a set of early filters.
*/
val orderedFilters = Seq(
tweetAgeFilter,
impressedTweetListFilter,
videoTweetFilter,
tweetReplyFilter
)
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
): Future[Seq[Seq[InitialCandidate]]] = {
PreRankFilterRunner.runSequentialFilters(
request,
candidates,
orderedFilters,
scopedStats
)
}
}
object PreRankFilterRunner {
private def recordCandidateStatsBeforeFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "before").incr(
candidates.count { _.isEmpty }
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "before").incr(candidate.size)
}
}
private def recordCandidateStatsAfterFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "after").incr(
candidates.count { _.isEmpty }
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "after").incr(candidate.size)
}
}
/*
Helper function for running some candidates through a sequence of filters
*/
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
filters: Seq[FilterBase],
statsReceiver: StatsReceiver
): Future[Seq[Seq[InitialCandidate]]] =
filters.foldLeft(Future.value(candidates)) {
case (candsFut, filter) =>
candsFut.flatMap { cands =>
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
filter
.filter(cands, filter.requestToConfig(request))
.map { filteredCands =>
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
filteredCands
}
}
}
}

View File

@ -0,0 +1,40 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
* Filters candidates that are replies
*/
@Singleton
case class ReplyFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filterNot { candidate =>
candidate.tweetInfo.isReply.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
true
}
}

View File

@ -0,0 +1,41 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
* Filters candidates that are retweets
*/
@Singleton
case class RetweetFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filterNot { candidate =>
candidate.tweetInfo.isRetweet.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
query.params(UtegTweetGlobalParams.EnableRetweetFilterParam)
}
}

View File

@ -0,0 +1,39 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Singleton
import com.twitter.conversions.DurationOps._
@Singleton
case class TweetAgeFilter() extends FilterBase {
override val name: String = this.getClass.getCanonicalName
override type ConfigType = Duration
override def filter(
candidates: Seq[Seq[InitialCandidate]],
maxTweetAge: Duration
): Future[Seq[Seq[InitialCandidate]]] = {
if (maxTweetAge >= 720.hours) {
Future.value(candidates)
} else {
// Tweet IDs are approximately chronological (see http://go/snowflake),
// so we are building the earliest tweet id once,
// and pass that as the value to filter candidates for each CandidateGenerationModel.
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
Future.value(candidates.map(_.filter(_.tweetId >= earliestTweetId)))
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): Duration = {
query.params(GlobalParams.MaxTweetAgeHoursParam)
}
}

View File

@ -0,0 +1,39 @@
package com.twitter.cr_mixer.filter
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.HealthThreshold
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
trait TweetInfoHealthFilterBase extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = HealthThreshold.Enum.Value
def thresholdToPropertyMap: Map[HealthThreshold.Enum.Value, TweetInfo => Option[Boolean]]
def getFilterParamFn: CandidateGeneratorQuery => HealthThreshold.Enum.Value
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: HealthThreshold.Enum.Value
): Future[Seq[Seq[InitialCandidate]]] = {
Future.value(candidates.map { seq =>
seq.filter(p => thresholdToPropertyMap(config)(p.tweetInfo).getOrElse(true))
})
}
/**
* Build the config params here. passing in param() into the filter is strongly discouraged
* because param() can be slow when called many times
*/
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): HealthThreshold.Enum.Value = {
query match {
case q: CrCandidateGeneratorQuery => getFilterParamFn(q)
case _ => HealthThreshold.Enum.Off
}
}
}

View File

@ -0,0 +1,96 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
*
* Run filters sequentially for UTEG candidate generator. The structure is copied from PreRankFilterRunner.
*/
@Singleton
class UtegFilterRunner @Inject() (
inNetworkFilter: InNetworkFilter,
utegHealthFilter: UtegHealthFilter,
retweetFilter: RetweetFilter,
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
val orderedFilters: Seq[FilterBase] = Seq(
inNetworkFilter,
utegHealthFilter,
retweetFilter
)
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
): Future[Seq[Seq[InitialCandidate]]] = {
UtegFilterRunner.runSequentialFilters(
request,
candidates,
orderedFilters,
scopedStats
)
}
}
object UtegFilterRunner {
private def recordCandidateStatsBeforeFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "before").incr(
candidates.count {
_.isEmpty
}
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "before").incr(candidate.size)
}
}
private def recordCandidateStatsAfterFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "after").incr(
candidates.count {
_.isEmpty
}
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "after").incr(candidate.size)
}
}
/*
Helper function for running some candidates through a sequence of filters
*/
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
filters: Seq[FilterBase],
statsReceiver: StatsReceiver
): Future[Seq[Seq[InitialCandidate]]] =
filters.foldLeft(Future.value(candidates)) {
case (candsFut, filter) =>
candsFut.flatMap { cands =>
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
filter
.filter(cands, filter.requestToConfig(request))
.map { filteredCands =>
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
filteredCands
}
}
}
}

View File

@ -0,0 +1,51 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/**
* Remove unhealthy candidates
* Currently Timeline Ranker applies a check on the following three scores:
* - toxicityScore
* - pBlockScore
* - pReportedTweetScore
*
* Where isPassTweetHealthFilterStrict checks two additions scores with the same threshold:
* - pSpammyTweetScore
* - spammyTweetContentScore
*
* We've verified that both filters behave very similarly.
*/
@Singleton
case class UtegHealthFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filter { candidate =>
candidate.tweetInfo.isPassTweetHealthFilterStrict.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
query.params(UtegTweetGlobalParams.EnableTLRHealthFilterParam)
}
}

View File

@ -0,0 +1,81 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.filter.VideoTweetFilter.FilterConfig
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.VideoTweetFilterParams
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
case class VideoTweetFilter() extends FilterBase {
override val name: String = this.getClass.getCanonicalName
override type ConfigType = FilterConfig
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
Future.value(candidates.map {
_.flatMap {
candidate =>
if (!config.enableVideoTweetFilter) {
Some(candidate)
} else {
// if hasVideo is true, hasImage, hasGif should be false
val hasVideo = checkTweetInfoAttribute(candidate.tweetInfo.hasVideo)
val isHighMediaResolution =
checkTweetInfoAttribute(candidate.tweetInfo.isHighMediaResolution)
val isQuoteTweet = checkTweetInfoAttribute(candidate.tweetInfo.isQuoteTweet)
val isReply = checkTweetInfoAttribute(candidate.tweetInfo.isReply)
val hasMultipleMedia = checkTweetInfoAttribute(candidate.tweetInfo.hasMultipleMedia)
val hasUrl = checkTweetInfoAttribute(candidate.tweetInfo.hasUrl)
if (hasVideo && isHighMediaResolution && !isQuoteTweet &&
!isReply && !hasMultipleMedia && !hasUrl) {
Some(candidate)
} else {
None
}
}
}
})
}
def checkTweetInfoAttribute(attributeOpt: => Option[Boolean]): Boolean = {
if (attributeOpt.isDefined)
attributeOpt.get
else {
// takes Quoted Tweet (TweetInfo.isQuoteTweet) as an example,
// if the attributeOpt is None, we by default say it is not a quoted tweet
// similarly, if TweetInfo.hasVideo is a None,
// we say it does not have video.
false
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): FilterConfig = {
val enableVideoTweetFilter = query match {
case _: CrCandidateGeneratorQuery | _: RelatedTweetCandidateGeneratorQuery |
_: RelatedVideoTweetCandidateGeneratorQuery =>
query.params(VideoTweetFilterParams.EnableVideoTweetFilterParam)
case _ => false // e.g., GetRelatedTweets()
}
FilterConfig(
enableVideoTweetFilter = enableVideoTweetFilter
)
}
}
object VideoTweetFilter {
// extend the filterConfig to add more flags if needed.
// now they are hardcoded according to the prod setting
case class FilterConfig(
enableVideoTweetFilter: Boolean)
}

View File

@ -0,0 +1,139 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
import com.twitter.cr_mixer.thriftscala.AdsRequest
import com.twitter.cr_mixer.thriftscala.AdsResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class AdsRecommendationsScribeLogger @Inject() (
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
decider: CrMixerDecider,
statsReceiver: StatsReceiver) {
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
/*
* Scribe first step results after fetching initial ads candidate
* */
def scribeInitialAdsCandidates(
query: AdsCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val scribeMetadata = ScribeMetadata.from(query)
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
/*
* Scribe top level API results
* */
def scribeGetAdsRecommendations(
request: AdsRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[AdsResponse],
enableScribe: Boolean
): Future[AdsResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
AdsRecommendationTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
requestUserId: UserId
): AdsRecommendationsResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
AdsRecommendationsResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: AdsRecommendationsResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetAdsRecommendationsScribe = {
GetAdsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetAdsRecommendationsScribe
): Unit = {
publish(
logger = adsRecommendationsScribeLogger,
codec = GetAdsRecommendationsScribe,
message = scribeMsg)
}
}

View File

@ -0,0 +1,34 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"abdecider/src/main/scala",
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
"finagle/finagle-core/src/main",
"frigate/frigate-common:base",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
"scrooge/scrooge-serializer/src/main/scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/ml/api:data-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"timelines/src/main/scala/com/twitter/timelines/clientevent",
"util-internal/scribe/src/main/scala/com/twitter/logging",
],
)

View File

@ -0,0 +1,489 @@
package com.twitter.cr_mixer.logging
import com.google.common.base.CaseFormat
import com.twitter.abdecider.ScribingABDeciderUtil
import com.twitter.scribelib.marshallers.ClientDataProvider
import com.twitter.scribelib.marshallers.ScribeSerialization
import com.twitter.timelines.clientevent.MinimalClientDataProvider
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.scribe.ScribeCategories
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.InterleaveResult
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.cr_mixer.thriftscala.RankResult
import com.twitter.cr_mixer.thriftscala.Result
import com.twitter.cr_mixer.thriftscala.SourceSignal
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.cr_mixer.util.MetricTagUtil
import com.twitter.decider.SimpleRecipient
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.finatra.kafka.producers.KafkaProducerBase
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
import scala.util.Random
@Singleton
case class CrMixerScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
import CrMixerScribeLogger._
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
private val serialization = new ScribeSerialization {}
def scribeSignalSources(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchSignalSourcesResult
)
}
def scribeInitialCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
def scribeInterleaveCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[BlendedCandidate]]
): Future[Seq[BlendedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertInterleaveResult,
enableKafkaScribe = true
)
}
def scribeRankedCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertRankResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getTweetRecommendations() endpoint.
*/
def scribeGetTweetRecommendations(
request: CrMixerTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[CrMixerTweetResponse]
): Future[CrMixerTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
publishTopLevelDdgMetrics(
logger = ddgMetricsLogger,
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
latencyMs = latencyMs,
candidateSize = response.tweets.length)
}
}
}
/**
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
*/
def scribeGetTweetRecommendationsForBlueVerified(
scribeMetadata: ScribeMetadata,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
getResultFn.onSuccess { rankedCandidates =>
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
}
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
blueVerifiedTweetCandidatesStats
.scope(scribeMetadata.product.name).counter(
candidate.tweetInfo.authorId.toString).incr()
VITTweetCandidateScribe(
tweetId = candidate.tweetId,
authorId = candidate.tweetInfo.authorId,
score = candidate.predictionScore,
metricTags = MetricTagUtil.buildMetricTags(candidate)
)
}
val blueVerifiedScribe =
VITTweetCandidatesScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
candidates = blueVerifiedCandidateScribes,
product = scribeMetadata.product,
impressedBuckets = impressedBuckets
)
publish(
logger = blueVerifiedTweetRecsScribeLogger,
codec = VITTweetCandidatesScribe,
message = blueVerifiedScribe)
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch signals, fetch candidates, filters, ranker, etc
*/
private[logging] def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => Result,
enableKafkaScribe: Boolean = false
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
// forks the scribe as a Kafka message for async feature hydration
if (enableKafkaScribe && shouldScribeKafkaMessage(
scribeMetadata.userId,
scribeMetadata.product)) {
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
batchedKafkaMessages.foreach { kafkaMessage =>
kafkaProducer.send(
topic = ScribeCategories.TweetsRecs.scribeCategory,
key = traceId.toString,
value = kafkaMessage,
timestamp = Time.now.inMilliseconds
)
}
}
}
}
private def convertTopLevelAPIResult(
request: CrMixerTweetRequest,
response: CrMixerTweetResponse,
startTime: Long
): Result = {
Result.TopLevelApiResult(
TopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchSignalSourcesResult(
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
requestUserId: UserId
): Result = {
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
SourceSignal(id = Some(sourceInfo.internalId))
}
// For source graphs, we pass in requestUserId as a placeholder
val sourceGraphs = sourceInfoSetTuple._2.map {
case (_, _) =>
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
}
Result.FetchSignalSourcesResult(
FetchSignalSourcesResult(
signals = Some(sourceSignals ++ sourceGraphs)
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
// We take InterleaveResult for Unconstrained dataset ML ranker training
private def convertInterleaveResult(
blendedCandidates: Seq[BlendedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = blendedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
score = Some(blendedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
) // hydrate fields for light ranking training data
}
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
}
private def convertRankResult(
rankedCandidates: Seq[RankedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = rankedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
score = Some(rankedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
)
}
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: Result,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetTweetsRecommendationsScribe = {
GetTweetsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetTweetsRecommendationsScribe
): Unit = {
publish(
logger = tweetRecsScribeLogger,
codec = GetTweetsRecommendationsScribe,
message = scribeMsg)
}
/**
* Gate for producing messages to Kafka for async feature hydration
*/
private def shouldScribeKafkaMessage(
userId: UserId,
product: Product
): Boolean = {
val isEligibleUser = decider.isAvailable(
DeciderConstants.kafkaMessageScribeSampleRate,
Some(SimpleRecipient(userId)))
val isHomeProduct = (product == Product.Home)
isEligibleUser && isHomeProduct
}
/**
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
*/
private[logging] def downsampleKafkaMessage(
scribeMsg: GetTweetsRecommendationsScribe
): Seq[GetTweetsRecommendationsScribe] = {
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
case Result.InterleaveResult(interleaveResult) =>
val sampledTweetsSeq = interleaveResult.tweets
.map { tweets =>
Random
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
.grouped(BatchSize).toSeq
}.getOrElse(Seq.empty)
sampledTweetsSeq.map { sampledTweets =>
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
}
// if it's an unrecognized type, err on the side of sending no candidates
case _ =>
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
Seq(Result.InterleaveResult(InterleaveResult(None)))
}
sampledResultSeq.map { sampledResult =>
GetTweetsRecommendationsScribe(
uuid = scribeMsg.uuid,
userId = scribeMsg.userId,
result = sampledResult,
traceId = scribeMsg.traceId,
performanceMetrics = None,
impressedBuckets = None
)
}
}
/**
* Handles client_event serialization to log data into DDG metrics
*/
private[logging] def publishTopLevelDdgMetrics(
logger: Logger,
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
candidateSize: Long,
latencyMs: Long,
): Unit = {
val data = Map[Any, Any](
"latency_ms" -> latencyMs,
"event_value" -> candidateSize
)
val label: (String, String) = ("tweetrec", "")
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
val message =
serialization
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
logger.info(message)
}
private def getClientData(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
): ClientDataProvider =
MinimalClientDataProvider(
userId = topLevelDdgMetricsMetadata.userId,
guestId = None,
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
countryCode = topLevelDdgMetricsMetadata.countryCode
)
private def getNamespace(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
label: (String, String)
): Map[String, String] = {
val productName =
CaseFormat.UPPER_CAMEL
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
Map(
"client" -> ScribingABDeciderUtil.clientForAppId(
topLevelDdgMetricsMetadata.clientApplicationId),
"page" -> "cr-mixer",
"section" -> productName,
"component" -> label._1,
"element" -> label._2
)
}
}
object CrMixerScribeLogger {
val KafkaMaxTweetsPerMessage: Int = 200
val BatchSize: Int = 20
}

View File

@ -0,0 +1,193 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class RelatedTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
def scribeInitialCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getRelatedTweets endpoint.
*/
def scribeGetRelatedTweets(
request: RelatedTweetRequest,
startTime: Long,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[RelatedTweetResponse]
): Future[RelatedTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => RelatedTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
private def convertTopLevelAPIResult(
request: RelatedTweetRequest,
response: RelatedTweetResponse,
startTime: Long
): RelatedTweetResult = {
RelatedTweetResult.RelatedTweetTopLevelApiResult(
RelatedTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
}
RelatedTweetResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(candidate.tweetInfo.authorId),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None
)
}
}
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
relatedTweetResult: RelatedTweetResult,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
latencyMs: Long,
traceId: Long
): GetRelatedTweetsScribe = {
GetRelatedTweetsScribe(
uuid = relatedTweetScribeMetadata.requestUUID,
internalId = relatedTweetScribeMetadata.internalId,
relatedTweetResult = relatedTweetResult,
requesterId = relatedTweetScribeMetadata.clientContext.userId,
guestId = relatedTweetScribeMetadata.clientContext.guestId,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetRelatedTweetsScribe
): Unit = {
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
}
}

View File

@ -0,0 +1,43 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.logging.Logger
import com.twitter.scrooge.BinaryThriftStructSerializer
import com.twitter.scrooge.ThriftStruct
import com.twitter.scrooge.ThriftStructCodec
object ScribeLoggerUtils {
/**
* Handles base64-encoding, serialization, and publish.
*/
private[logging] def publish[T <: ThriftStruct](
logger: Logger,
codec: ThriftStructCodec[T],
message: T
): Unit = {
logger.info(BinaryThriftStructSerializer(codec).toString(message))
}
private[logging] def getImpressedBuckets(
scopedStats: StatsReceiver
): Option[List[ImpressesedBucketInfo]] = {
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
val listBucketsSet = listBuckets.toSet
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
listBucketsSet.map { bucket =>
ImpressesedBucketInfo(
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
bucketName = bucket.name,
version = bucket.experiment.settings.version,
)
}.toList
}
}
}
}

View File

@ -0,0 +1,45 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
case class ScribeMetadata(
requestUUID: Long,
userId: UserId,
product: Product)
object ScribeMetadata {
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
}
case class RelatedTweetScribeMetadata(
requestUUID: Long,
internalId: InternalId,
clientContext: ClientContext,
product: Product)
object RelatedTweetScribeMetadata {
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
RelatedTweetScribeMetadata(
query.requestUUID,
query.internalId,
query.clientContext,
query.product)
}
}

View File

@ -0,0 +1,22 @@
package com.twitter.cr_mixer
package logging
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.Product
case class TopLevelDdgMetricsMetadata(
userId: Option[Long],
product: Product,
clientApplicationId: Option[Long],
countryCode: Option[String])
object TopLevelDdgMetricsMetadata {
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
TopLevelDdgMetricsMetadata(
userId = request.clientContext.userId,
product = request.product,
clientApplicationId = request.clientContext.appId,
countryCode = request.clientContext.countryCode
)
}
}

View File

@ -0,0 +1,147 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class UtegTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
def scribeInitialCandidates(
query: UtegTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
): Future[Seq[TweetWithScoreAndSocialProof]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the GetUtegTweetRecommendations() endpoint.
*/
def scribeGetUtegTweetRecommendations(
request: UtegTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[UtegTweetResponse]
): Future[UtegTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
private def convertTopLevelAPIResult(
request: UtegTweetRequest,
response: UtegTweetResponse,
startTime: Long
): UtegTweetResult = {
UtegTweetResult.UtegTweetTopLevelApiResult(
UtegTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def buildScribeMessage(
utegTweetResult: UtegTweetResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetUtegTweetsScribe = {
GetUtegTweetsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
utegTweetResult = utegTweetResult,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetUtegTweetsScribe
): Unit = {
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
}
private def convertFetchCandidatesResult(
candidates: Seq[TweetWithScoreAndSocialProof],
requestUserId: UserId
): UtegTweetResult = {
val tweetCandidatesWithMetadata = candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => UtegTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
}

View File

@ -0,0 +1,16 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,200 @@
package com.twitter.cr_mixer.model
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.simclusters_v2.common.TweetId
sealed trait Candidate {
val tweetId: TweetId
override def hashCode: Int = tweetId.toInt
}
case class TweetWithCandidateGenerationInfo(
tweetId: TweetId,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
}
case class InitialCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedCandidate = {
BlendedCandidate(
tweetId,
tweetInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedCandidate(): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class InitialAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedAdsCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedAdsCandidate = {
BlendedAdsCandidate(
tweetId,
lineItemInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedAdsCandidate(): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class BlendedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class BlendedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class RankedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class RankedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate

View File

@ -0,0 +1,67 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Time
/***
* Tweet-level attributes. Represents the source used in candidate generation
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
*
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
* @param internalId, e.g., UserId(0L), TweetId(0L)
*/
case class SourceInfo(
sourceType: SourceType,
internalId: InternalId,
sourceEventTime: Option[Time])
/***
* Tweet-level attributes. Represents the source User Graph used in candidate generation
* It is an intermediate product, and will not be stored, unlike SourceInfo.
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
* UTG or UTEG will leverage these sources to build candidates.
*
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
*
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
* have a unified interface.
*/
case class GraphSourceInfo(
sourceType: SourceType,
seedWithScores: Map[UserId, Double])
/***
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
* candidate generation along with their metadata.
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
* @param score - a score generated by this sim engine
*/
case class SimilarityEngineInfo(
similarityEngineType: SimilarityEngineType,
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
score: Option[Double])
/****
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
*
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
*/
case class CandidateGenerationInfo(
sourceInfoOpt: Option[SourceInfo],
similarityEngineInfo: SimilarityEngineInfo,
contributingSimilarityEngines: Seq[SimilarityEngineInfo])

View File

@ -0,0 +1,96 @@
package com.twitter.cr_mixer.model
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.timelines.configapi.Params
sealed trait CandidateGeneratorQuery {
val product: Product
val maxNumResults: Int
val impressedTweetList: Set[TweetId]
val params: Params
val requestUUID: Long
}
sealed trait HasUserId {
val userId: UserId
}
case class CrCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
languageCode: Option[String] = None)
extends CandidateGeneratorQuery
with HasUserId
case class UtegTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
with HasUserId
case class RelatedTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class RelatedVideoTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class FrsTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
maxNumResults: Int,
impressedUserList: Set[UserId],
impressedTweetList: Set[TweetId],
params: Params,
languageCodeOpt: Option[String] = None,
countryCodeOpt: Option[String] = None,
requestUUID: Long)
extends CandidateGeneratorQuery
case class AdsCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
params: Params,
requestUUID: Long)
case class TopicTweetCandidateGeneratorQuery(
userId: UserId,
topicIds: Set[TopicId],
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
isVideoOnly: Boolean)
extends CandidateGeneratorQuery

View File

@ -0,0 +1,6 @@
package com.twitter.cr_mixer.model
sealed trait EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType

View File

@ -0,0 +1,11 @@
package com.twitter.cr_mixer.model
object HealthThreshold {
object Enum extends Enumeration {
val Off: Value = Value(1)
val Moderate: Value = Value(2)
val Strict: Value = Value(3)
val Stricter: Value = Value(4)
val StricterPlus: Value = Value(5)
}
}

View File

@ -0,0 +1,77 @@
package com.twitter.cr_mixer.model
/**
* A Configuration class for all Model Based Candidate Sources.
*
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
* If your model is used for multiple product surfaces, name it as all
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
*
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
*/
object ModelConfig {
// Offline SimClusters CG Experiment related Model Ids
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
// Twhin Model Ids
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
"ConsumerBasedTwHINRegularUpdate_All_20221024"
// Averaged Twhin Model Ids
val TweetBasedTwHINRegularUpdateAll20221024: String =
"TweetBasedTwHINRegularUpdate_All_20221024"
// Collaborative Filtering Twhin Model Ids
val TwhinCollabFilterForFollow: String =
"TwhinCollabFilterForFollow"
val TwhinCollabFilterForEngagement: String =
"TwhinCollabFilterForEngagement"
val TwhinMultiClusterForFollow: String =
"TwhinMultiClusterForFollow"
val TwhinMultiClusterForEngagement: String =
"TwhinMultiClusterForEngagement"
// Two Tower model Ids
val TwoTowerFavALL20220808: String =
"TwoTowerFav_ALL_20220808"
// Debugger Demo-Only Model Ids
val DebuggerDemo: String = "DebuggerDemo"
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
// indicate ColdStartLookalike candidate source, which is currently being pluged into
// CustomizedRetrievalCandidateGeneration temporarily.
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
// consumersBasedUTG-RealGraphOon Model Id
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
// consumersBasedUAG-RealGraphOon Model Id
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
// FTR
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
// All Model Ids of HnswANNSimilarityEngines
val allHnswANNSimilarityEngineModelIds = Seq(
ConsumerBasedTwHINRegularUpdateAll20221024,
TwoTowerFavALL20220808,
DebuggerDemo
)
val ConsumerLogFavBasedInterestedInEmbedding: String =
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
val ConsumerFollowBasedInterestedInEmbedding: String =
"ConsumerFollowBasedInterestedIn_ALL_20221228"
val RetweetBasedDiffusion: String =
"RetweetBasedDiffusion"
}

View File

@ -0,0 +1,122 @@
package com.twitter.cr_mixer.model
/**
* Define name annotated module names here
*/
object ModuleNames {
final val FrsStore = "FrsStore"
final val UssStore = "UssStore"
final val UssStratoColumn = "UssStratoColumn"
final val RsxStore = "RsxStore"
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
final val StpStore = "StpStore"
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
final val TripCandidateStore = "TripCandidateStore"
final val ConsumerEmbeddingBasedTripSimilarityEngine =
"ConsumerEmbeddingBasedTripSimilarityEngine"
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
final val ConsumersBasedUserAdGraphSimilarityEngine =
"ConsumersBasedUserAdGraphSimilarityEngine"
final val ConsumersBasedUserVideoGraphSimilarityEngine =
"ConsumersBasedUserVideoGraphSimilarityEngine"
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
final val UnifiedCache = "unifiedCache"
final val MLScoreCache = "mlScoreCache"
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
"earlybirdTweetsWithRetweetsRepliesCacheStore"
final val AbDeciderLogger = "abDeciderLogger"
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
final val TweetRecsLogger = "tweetRecsLogger"
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
final val RelatedTweetsLogger = "relatedTweetsLogger"
final val UtegTweetsLogger = "utegTweetsLogger"
final val AdsRecommendationsLogger = "adsRecommendationLogger"
final val OfflineSimClustersANNInterestedInSimilarityEngine =
"OfflineSimClustersANNInterestedInSimilarityEngine"
final val RealGraphOonStore = "RealGraphOonStore"
final val RealGraphInStore = "RealGraphInStore"
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
final val ProducerBasedUserAdGraphSimilarityEngine =
"ProducerBasedUserAdGraphSimilarityEngine"
final val ProducerBasedUserTweetGraphSimilarityEngine =
"ProducerBasedUserTweetGraphSimilarityEngine"
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
final val UserTweetEntityGraphSimilarityEngine =
"UserTweetEntityGraphSimilarityEngine"
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
final val CertoStratoStoreName = "CertoStratoStore"
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
final val SkitHighPrecisionTopicTweetSimilarityEngine =
"SkitHighPrecisionTopicTweetSimilarityEngine"
final val SkitStratoStoreName = "SkitStratoStore"
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
}

View File

@ -0,0 +1,13 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
* @param similarityEngineType, which underlying topic source the topic tweet is from
*/
case class TopicTweetWithScore(
tweetId: TweetId,
score: Double,
similarityEngineType: SimilarityEngineType)

View File

@ -0,0 +1,6 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)

View File

@ -0,0 +1,8 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
*/
case class TweetWithScore(tweetId: TweetId, score: Double)

View File

@ -0,0 +1,12 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.recos.recos_common.thriftscala.SocialProofType
/***
* Bind a tweetId with a raw score and social proofs by type
*/
case class TweetWithScoreAndSocialProof(
tweetId: TweetId,
score: Double,
socialProofByType: Map[SocialProofType, Seq[Long]])

View File

@ -0,0 +1,135 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.thrift.CompactThriftCodec
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
import com.twitter.bijection.Injection
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.DataRecord
import com.twitter.ml.api.DataType
import com.twitter.ml.api.Feature
import com.twitter.ml.api.GeneralTensor
import com.twitter.ml.api.RichDataRecord
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.manhattan.Revenue
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.util.Future
import javax.inject.Named
import scala.collection.JavaConverters._
object ActivePromotedTweetStoreModule extends TwitterModule {
case class ActivePromotedTweetStore(
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
statsReceiver: StatsReceiver)
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
activePromotedTweetMHStore.get(tweetId.toString).map {
_.map { dataRecord =>
val richDataRecord = new RichDataRecord(dataRecord)
val lineItemIdsFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
val lineItemObjectivesFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
val lineItemObjectivesTensor: GeneralTensor =
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
val lineItemIds: Seq[Long] =
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
} else Seq.empty
val lineItemObjectives: Seq[LineItemObjective] =
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
LineItemObjective(objective.toInt))
} else Seq.empty
val lineItemInfo =
if (lineItemIds.size == lineItemObjectives.size) {
lineItemIds.zipWithIndex.map {
case (lineItemId, index) =>
LineItemInfo(
lineItemId = lineItemId,
lineItemObjective = lineItemObjectives(index)
)
}
} else Seq.empty
lineItemInfo
}
}
}
}
@Provides
@Singleton
def providesActivePromotedTweetStore(
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
crMixerStatsReceiver: StatsReceiver
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
val mhConfig = new ManhattanROConfig {
val hdfsPath = HDFSPath("")
val applicationID = ApplicationID("ads_bigquery_features")
val datasetName = DatasetName("active_promoted_tweets")
val cluster = Revenue
override def statsReceiver: StatsReceiver =
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
}
val mhStore: ReadableStore[String, DataRecord] =
ManhattanRO
.getReadableStoreWithMtls[String, DataRecord](
mhConfig,
manhattanKVClientMtlsParams
)(
implicitly[Injection[String, Array[Byte]]],
CompactThriftCodec[DataRecord]
)
val underlyingStore =
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 60.minutes,
asyncUpdate = false
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
keyToString = { k: TweetId => s"apt/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 30.minutes,
maxKeys = 250000, // size of promoted tweet is around 200,000
windowSize = 10000L,
cacheName = "active_promoted_tweet_cache",
maxMultiGetSize = 20
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
}
}

View File

@ -0,0 +1,130 @@
scala_library(
sources = [
"*.scala",
"core/*.scala",
"grpc_client/*.scala",
"similarity_engine/*.scala",
"source_signal/*.scala",
"thrift_client/*.scala",
],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/bijection:core",
"3rdparty/jvm/com/twitter/bijection:scrooge",
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/com/twitter/storehaus:memcache",
"3rdparty/jvm/io/grpc:grpc-api",
"3rdparty/jvm/io/grpc:grpc-auth",
"3rdparty/jvm/io/grpc:grpc-core",
"3rdparty/jvm/io/grpc:grpc-netty",
"3rdparty/jvm/io/grpc:grpc-protobuf",
"3rdparty/jvm/io/grpc:grpc-stub",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/org/scalanlp:breeze",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"abdecider/src/main/scala",
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
"configapi/configapi-abdecider",
"configapi/configapi-core",
"configapi/configapi-featureswitches:v2",
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
"featureswitches/featureswitches-core",
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
"finagle-internal/finagle-grpc/src/main/scala",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-modules/src/main/scala",
"finatra/inject/inject-thrift-client",
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"hydra/partition/thrift/src/main/thrift:thrift-scala",
"hydra/root/thrift/src/main/thrift:thrift-scala",
"mediaservices/commons/src/main/scala:futuretracker",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"qig-ranker/thrift/src/main/thrift:thrift-scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"relevance-platform/thrift/src/main/thrift:thrift-scala",
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
"representation-manager/server/src/main/thrift:thrift-scala",
"representation-scorer/server/src/main/thrift:thrift-scala",
"servo/decider",
"servo/util/src/main/scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/java/com/twitter/ml/api:api-base",
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
"src/scala/com/twitter/algebird_internal/injection",
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
"src/scala/com/twitter/ml/api/embedding",
"src/scala/com/twitter/ml/featurestore/lib",
"src/scala/com/twitter/scalding_internal/multiformat/format",
"src/scala/com/twitter/simclusters_v2/candidate_source",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/storehaus_internal/manhattan",
"src/scala/com/twitter/storehaus_internal/manhattan/config",
"src/scala/com/twitter/storehaus_internal/memcache",
"src/scala/com/twitter/storehaus_internal/memcache/config",
"src/scala/com/twitter/storehaus_internal/offline",
"src/scala/com/twitter/storehaus_internal/util",
"src/scala/com/twitter/topic_recos/stores",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
"src/thrift/com/twitter/ml/api:data-java",
"src/thrift/com/twitter/ml/api:embedding-scala",
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
"src/thrift/com/twitter/tweetypie:service-scala",
"src/thrift/com/twitter/twistly:twistly-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"stitch/stitch-storehaus",
"stitch/stitch-tweetypie/src/main/scala",
"strato/src/main/scala/com/twitter/strato/client",
"user-signal-service/thrift/src/main/thrift:thrift-scala",
"util-internal/scribe/src/main/scala/com/twitter/logging",
"util/util-hashing",
],
)

View File

@ -0,0 +1,52 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Athena
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.hermit.store.common.ObservedCachedReadableStore
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.BlueVerifiedAnnotationStore)
def providesBlueVerifiedAnnotationStore(
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("content_recommender_athena"),
DatasetName("blue_verified_annotations"),
Athena),
manhattanKVClientMtlsParams
)
ObservedCachedReadableStore.from(
underlyingStore,
ttl = 24.hours,
maxKeys = 100000,
windowSize = 10000L,
cacheName = "blue_verified_annotation_cache"
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
}
}

View File

@ -0,0 +1,57 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
import com.twitter.topic_recos.thriftscala.TweetWithScores
object CertoStratoStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.CertoStratoStoreName)
def providesCertoStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = certoStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
statsReceiver = statsReceiver.scope("memcached_certo_store"),
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "certo_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("certo_in_memory_cache"))
}
}

View File

@ -0,0 +1,30 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
def providesConsumerBasedUserAdGraphStore(
userAdGraphService: UserAdGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
override def get(
k: ConsumersBasedRelatedAdRequest
): Future[Option[RelatedAdResponse]] = {
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
}
}
}
}

View File

@ -0,0 +1,30 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
def providesConsumerBasedUserTweetGraphStore(
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -0,0 +1,30 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
def providesConsumerBasedUserVideoGraphStore(
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -0,0 +1,16 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.timelines.configapi.Config
import com.twitter.cr_mixer.param.CrMixerParamConfig
import com.twitter.inject.TwitterModule
import javax.inject.Singleton
object CrMixerParamConfigModule extends TwitterModule {
@Provides
@Singleton
def provideConfig(): Config = {
CrMixerParamConfig.config
}
}

View File

@ -0,0 +1,54 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object DiffusionStoreModule extends TwitterModule {
type UserId = Long
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
BinaryScalaCodec(TweetsWithScore)
@Provides
@Singleton
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
def retweetBasedDiffusionRecsMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[Long, TweetsWithScore] = {
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("diffusion_retweet_tweet_recs"),
Apollo
)
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
}
private def buildTweetRecsStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[Long, TweetsWithScore] = {
ManhattanRO
.getReadableStoreWithMtls[Long, TweetsWithScore](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, tweetRecsInjection)
}
}

View File

@ -0,0 +1,189 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.SeqLongInjection
import com.twitter.hashing.KeyHasher
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
import com.twitter.search.earlybird.thriftscala.EarlybirdService
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
import com.twitter.util.Future
import javax.inject.Named
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
// Home based EB filters out retweets and replies
val earlybirdRequest =
buildEarlybirdRequest(
userId,
FilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
timeoutConfig.earlybirdServerTimeout)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
keyToString = { k =>
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
}
)
}
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
val earlybirdRequest = buildEarlybirdRequest(
userId,
// Notifications based EB keeps retweets and replies
NotFilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
processingTimeout = timeoutConfig.earlybirdServerTimeout
)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
keyToString = { k =>
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
}
)
}
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
/**
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
* If the value changes, it will increase the size of the memcache.
*/
private val DefaultMaxNumTweetPerUser: Int = 100
private val FilterOutRetweetsAndReplies = true
private val NotFilterOutRetweetsAndReplies = false
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
private def buildEarlybirdRequest(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): EarlybirdRequest =
EarlybirdRequest(
searchQuery = getThriftSearchQuery(
seedUserId = seedUserId,
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
processingTimeout = processingTimeout
),
clientId = Some(EarlybirdClientId),
timeoutMs = processingTimeout.inMilliseconds.intValue(),
getOlderResults = Some(false),
adjustedProtectedRequestParams = None,
adjustedFullArchiveRequestParams = None,
getProtectedTweetsOnly = Some(false),
skipVeryRecentTweets = true,
)
private def getThriftSearchQuery(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): ThriftSearchQuery = ThriftSearchQuery(
serializedQuery = GetEarlybirdQuery(
None,
None,
Set.empty,
filterOutRetweetsAndReplies
).map(_.serialize),
fromUserIDFilter64 = Some(Seq(seedUserId)),
numResults = maxNumTweetsPerSeedUser,
rankingMode = ThriftSearchRankingMode.Recency,
collectorParams = Some(
CollectorParams(
// numResultsToReturn defines how many results each EB shard will return to search root
numResultsToReturn = maxNumTweetsPerSeedUser,
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
terminationParams =
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
)),
facetFieldNames = Some(FacetsToFetch),
resultMetadataOptions = Some(MetadataOptions),
searchStatusIds = None
)
private def getEarlybirdSearchResult(
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
request: EarlybirdRequest,
statsReceiver: StatsReceiver
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
.search(request)
.map { response =>
response.responseCode match {
case EarlybirdResponseCode.Success =>
val earlybirdSearchResult =
response.searchResults
.map {
_.results
.map(searchResult => searchResult.id)
}
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
earlybirdSearchResult
case e =>
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
Some(Seq.empty)
}
}
}

View File

@ -0,0 +1,195 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.{thriftscala => api}
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object EmbeddingStoreModule extends TwitterModule {
type UserId = Long
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
CompactScalaCodec(api.Embedding)
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
def twHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_user_embedding_regular_update_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
@Provides
@Singleton
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
def twoTowerFavConsumerEmbeddingMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("two_tower_fav_user_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
@Provides
@Singleton
@Named(DebuggerDemoUserEmbeddingMhStoreName)
def debuggerDemoUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_user_embedding"),
Apollo
)
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
@Provides
@Singleton
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
def debuggerDemoTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_tweet_embedding"),
Apollo
)
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
private def buildUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
private def buildTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
}

View File

@ -0,0 +1,29 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.source_signal.FrsStore
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object FrsStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.FrsStore)
def providesFrsStore(
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
statsReceiver: StatsReceiver,
decider: CrMixerDecider
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
statsReceiver.scope("follow_recommendations_store"))
}
}

View File

@ -0,0 +1,17 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import javax.inject.Singleton
object MHMtlsParamsModule extends TwitterModule {
@Singleton
@Provides
def providesManhattanMtlsParams(
serviceIdentifier: ServiceIdentifier
): ManhattanKVClientMtlsParams = {
ManhattanKVClientMtlsParams(serviceIdentifier)
}
}

View File

@ -0,0 +1,150 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object OfflineCandidateStoreModule extends TwitterModule {
type UserId = Long
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020CandidateStore)
def offlineTweet2020CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
def offlineTweet2020Hl0El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
def offlineTweet2020Hl2El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
def offlineTweet2020Hl2El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
def offlineTweet2020Hl8El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
def offlineTweetMTSCandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
def offlineFavDecayedSumCandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_decayed_sum"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
)
}
private def buildOfflineCandidateStore(
serviceIdentifier: ServiceIdentifier,
datasetName: String
): ReadableStore[UserId, CandidateTweetsList] = {
ManhattanRO
.getReadableStoreWithMtls[Long, CandidateTweetsList](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("multi_type_simclusters"),
DatasetName(datasetName),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)
}
}

View File

@ -0,0 +1,39 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
import javax.inject.Singleton
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphOonStoreModule extends TwitterModule {
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
name = "crMixer.userRealGraphOonColumnPath",
default = "recommendations/twistly/userRealgraphOon",
help = "Strato column path for user real graph OON Store"
)
@Provides
@Singleton
@Named(ModuleNames.RealGraphOonStore)
def providesRealGraphOonStore(
stratoClient: StratoClient,
statsReceiver: StatsReceiver
): ReadableStore[UserId, CandidateSeq] = {
val realGraphOonStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
ObservedReadableStore(
realGraphOonStratoFetchableStore
)(statsReceiver.scope("user_real_graph_oon_store"))
}
}

View File

@ -0,0 +1,67 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphStoreMhModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.RealGraphInStore)
def providesRealGraphStoreMh(
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
): ReadableStore[UserId, CandidateSeq] = {
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[UserId, CandidateSeq](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("cr_mixer_apollo"),
DatasetName("real_graph_scores_apollo"),
Apollo),
manhattanKVClientMtlsParams
)
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 24.hours,
)(
valueInjection = valueCodec,
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
keyToString = { k: UserId => s"uRGraph/$k" }
)
DeciderableReadableStore(
memCachedStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
statsReceiver.scope("RealGraphMh")
)
}
}

View File

@ -0,0 +1,107 @@
package com.twitter.cr_mixer.module
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.google.inject.Provides
import com.google.inject.Singleton
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
object RepresentationManagerModule extends TwitterModule {
private val ColPathPrefix = "recommendations/representation_manager/"
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
@Provides
@Singleton
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
def providesRepresentationManagerTweetStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTweetColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavLongestL2EmbeddingTweet,
ModelVersion.Model20m145k2020))
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FavBasedProducer,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_fav_based_producer_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FollowBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_follow_interestedin_store"))
}
}

View File

@ -0,0 +1,56 @@
package com.twitter.cr_mixer.module
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.storehaus.ReadableStore
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.hermit.store.common.ObservedReadableStore
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.representationscorer.thriftscala.ListScoreId
object RepresentationScorerModule extends TwitterModule {
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
@Provides
@Singleton
@Named(ModuleNames.RsxStore)
def providesRepresentationScorerStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[(UserId, TweetId), Double] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
UserId,
TweetId
)] { key =>
representationScorerStoreKeyMapping(key._1, key._2)
}
)(statsReceiver.scope("rsx_store"))
}
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
ListScoreId(
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
modelVersion = SimClusterModelVersion,
targetEmbeddingType = TweetEmbeddingType,
targetId = InternalId.TweetId(t1),
candidateEmbeddingType = TweetEmbeddingType,
candidateIds = Seq(InternalId.TweetId(t2))
)
}
}

View File

@ -0,0 +1,90 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Singleton
/**
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
*/
object SimpleSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesSimpleSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStore = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
3L -> Seq((300L, 3.0), (301L, 3.0))
))
new StandardSimilarityEngine[UserId, (TweetId, Double)](
implementingStore = dummyStore,
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}
/**
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
*/
object LookupSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesLookupSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStoreV1 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
val dummyStoreV2 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
new LookupSimilarityEngine[UserId, (TweetId, Double)](
versionedStoreMap = Map(
"V1" -> dummyStoreV1,
"V2" -> dummyStoreV2
),
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}

View File

@ -0,0 +1,33 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import javax.inject.Named
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
@Provides
@Singleton
def providesSimClustersANNServiceNameToClientMapping(
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
Map[String, SimClustersANNService.MethodPerEndpoint](
"simclusters-ann" -> simClustersANNServiceProd,
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
"simclusters-ann-1" -> simClustersANNService1,
"simclusters-ann-2" -> simClustersANNService2,
"simclusters-ann-3" -> simClustersANNService3,
"simclusters-ann-5" -> simClustersANNService5,
"simclusters-ann-4" -> simClustersANNService4
)
}
}

View File

@ -0,0 +1,65 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.thriftscala.TopicTopTweets
import com.twitter.topic_recos.thriftscala.TopicTweet
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
/**
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
*/
object SkitStratoStoreModule extends TwitterModule {
val column = "recommendations/topic_recos/topicTopTweets"
@Provides
@Singleton
@Named(ModuleNames.SkitStratoStoreName)
def providesSkitStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
val skitStore = ObservedReadableStore(
StratoFetchableStore
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
topicTopTweets.topTweets
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = skitStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
statsReceiver = statsReceiver.scope("memcached_skit_store"),
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "skit_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("skit_in_memory_cache"))
}
}

View File

@ -0,0 +1,39 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.hermit.stp.thriftscala.STPResult
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import javax.inject.Named
object StrongTiePredictionStoreModule extends TwitterModule {
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
name = "crMixer.strongTiePredictionColumnPath",
default = "onboarding/userrecs/strong_tie_prediction_big",
help = "Strato column path for StrongTiePredictionStore"
)
@Provides
@Singleton
@Named(ModuleNames.StpStore)
def providesStrongTiePredictionStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, STPResult] = {
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
ObservedReadableStore(
strongTiePredictionStratoFetchableStore
)(statsReceiver.scope("strong_tie_prediction_big_store"))
}
}

View File

@ -0,0 +1,34 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
import javax.inject.Named
object TripCandidateStoreModule extends TwitterModule {
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
@Provides
@Named(ModuleNames.TripCandidateStore)
def providesSimClustersTripCandidateStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient
): ReadableStore[TripDomain, Seq[TripTweet]] = {
val tripCandidateStratoFetchableStore =
StratoFetchableStore
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
.mapValues(_.tweets)
ObservedReadableStore(
tripCandidateStratoFetchableStore
)(statsReceiver.scope("simclusters_trip_candidate_store"))
}
}

View File

@ -0,0 +1,205 @@
package com.twitter.cr_mixer.module
import com.google.inject.Module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.frigate.common.store.health.TweetHealthModelStore
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
import com.twitter.frigate.common.store.health.UserHealthModelStore
import com.twitter.frigate.thriftscala.TweetHealthScores
import com.twitter.frigate.thriftscala.UserAgathaScores
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.contentrecommender.store.TweetInfoStore
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.tweetypie.thriftscala.TweetService
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
import javax.inject.Named
object TweetInfoStoreModule extends TwitterModule {
implicit val timer: Timer = new JavaTimer(true)
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
@Provides
@Singleton
def providesTweetInfoStore(
statsReceiver: StatsReceiver,
serviceIdentifier: ServiceIdentifier,
stratoClient: StratoClient,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
tweetyPieService: TweetService.MethodPerEndpoint,
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
String,
BlueVerifiedAnnotationsV2
],
decider: CrMixerDecider
): ReadableStore[TweetId, TweetInfo] = {
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
val underlyingStore =
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
override def get(
k: TweetId
): Future[Option[TweetEngagementScores]] = {
userTweetGraphPlusService.tweetEngagementScore(k).map {
Some(_)
}
}
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
)
}
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
val underlyingStore = TweetHealthModelStore.buildReadableStore(
stratoClient,
Some(
TweetHealthModelStoreConfig(
enablePBlock = true,
enableToxicity = true,
enablePSpammy = true,
enablePReported = true,
enableSpammyTweetContent = true,
enablePNegMultimodal = true,
))
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 2.hours
)(
valueInjection = BinaryScalaCodec(TweetHealthScores),
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
keyToString = { k: TweetId => s"tHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
statsReceiver.scope("TweetHealthModelStore")
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
}
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
statsReceiver.scope("UnderlyingUserHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 18.hours
)(
valueInjection = BinaryScalaCodec(UserAgathaScores),
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
keyToString = { k: UserId => s"uHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
statsReceiver.scope("UserHealthModelStore")
)
}
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
val underlyingStore =
UserMediaRepresentationHealthStore.buildReadableStore(
manhattanKVClientMtlsParams,
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
)
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 12.hours
)(
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
keyToString = { k: UserId => s"uMRHS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
statsReceiver.scope("UserMediaRepresentationHealthStore")
)
}
val magicRecsRealTimeAggregatesStore: ReadableStore[
TweetId,
MagicRecsRealTimeAggregatesScores
] = {
val underlyingStore =
MagicRecsRealTimeAggregatesStore.buildReadableStore(
serviceIdentifier,
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
)
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
)
}
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
val underlyingStore = TweetInfoStore(
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
userMediaRepresentationHealthStore,
magicRecsRealTimeAggregatesStore,
tweetEngagementScoreStore,
blueVerifiedAnnotationStore
)(statsReceiver.scope("tweetInfoStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 15.minutes,
// Hydrating tweetInfo is now a required step for all candidates,
// hence we needed to tune these thresholds.
asyncUpdate = serviceIdentifier.environment == "prod"
)(
valueInjection = BinaryScalaCodec(TweetInfo),
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
keyToString = { k: TweetId => s"tIS/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 15.minutes,
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
windowSize = 10000L,
cacheName = "tweet_info_cache",
maxMultiGetSize = 20
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
}
tweetInfoStore
}
}

View File

@ -0,0 +1,42 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
object TweetRecentEngagedUserStoreModule extends TwitterModule {
private val tweetRecentEngagedUsersStoreDefaultVersion =
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
name = "crMixer.tweetRecentEngagedUsersColumnPath",
default = "recommendations/twistly/tweetRecentEngagedUsers",
help = "Strato column path for TweetRecentEngagedUsersStore"
)
private type Version = Long
@Provides
@Singleton
def providesTweetRecentEngagedUserStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
stratoClient,
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
ObservedReadableStore(
tweetRecentEngagedUsersStratoFetchableStore
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
}
}

View File

@ -0,0 +1,32 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.hermit.store.common.ReadableWritableStore
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
import com.twitter.simclusters_v2.common.UserId
import javax.inject.Named
object TweetRecommendationResultsStoreModule extends TwitterModule {
@Provides
@Singleton
def providesTweetRecommendationResultsStore(
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
statsReceiver: StatsReceiver
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
ObservedReadableWritableMemcacheStore.fromCacheClient(
cacheClient = tweetRecommendationResultsCacheClient,
ttl = 24.hours)(
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
keyToString = { k: UserId => k.toString }
)
}
}

View File

@ -0,0 +1,67 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.inject.TwitterModule
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
def providesTwhinCollabFilterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
def providesTwhinCollabFilterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
def providesTwhinMultiClusterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterFollow20220921")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
def providesTwhinMultiClusterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterEng20220921"))
}
}

View File

@ -0,0 +1,42 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.simclusters_v2.thriftscala.OrderedClustersAndMembers
import javax.inject.Named
object TwiceClustersMembersStoreModule extends TwitterModule {
private val twiceClustersMembersColumnPath: Flag[String] = flag[String](
name = "crMixer.twiceClustersMembersColumnPath",
default =
"recommendations/simclusters_v2/embeddings/TwiceClustersMembersLargestDimApeSimilarity",
help = "Strato column path for TweetRecentEngagedUsersStore"
)
@Provides
@Singleton
@Named(ModuleNames.TwiceClustersMembersStore)
def providesTweetRecentEngagedUserStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, OrderedClustersAndMembers] = {
val twiceClustersMembersStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, OrderedClustersAndMembers](
stratoClient,
twiceClustersMembersColumnPath())
ObservedReadableStore(
twiceClustersMembersStratoFetchableStore
)(statsReceiver.scope("twice_clusters_members_largestDimApe_similarity_store"))
}
}

View File

@ -0,0 +1,83 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.storehaus_internal.memcache.MemcacheStore
import com.twitter.storehaus_internal.util.ClientName
import com.twitter.storehaus_internal.util.ZkEndPoint
import javax.inject.Named
object UnifiedCacheClient extends TwitterModule {
private val TIME_OUT = 20.milliseconds
val crMixerUnifiedCacheDest: Flag[String] = flag[String](
name = "crMixer.unifiedCacheDest",
default = "/s/cache/content_recommender_unified_v2",
help = "Wily path to Content Recommender unified cache"
)
val tweetRecommendationResultsCacheDest: Flag[String] = flag[String](
name = "tweetRecommendationResults.CacheDest",
default = "/s/cache/tweet_recommendation_results",
help = "Wily path to CrMixer getTweetRecommendations() results cache"
)
val earlybirdTweetsCacheDest: Flag[String] = flag[String](
name = "earlybirdTweets.CacheDest",
default = "/s/cache/crmixer_earlybird_tweets",
help = "Wily path to CrMixer Earlybird Recency Based Similarity Engine result cache"
)
@Provides
@Singleton
@Named(ModuleNames.UnifiedCache)
def provideUnifiedCacheClient(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-content-recommender-unified"),
dest = ZkEndPoint(crMixerUnifiedCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
@Provides
@Singleton
@Named(ModuleNames.TweetRecommendationResultsCache)
def providesTweetRecommendationResultsCache(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-tweet-recommendation-results"),
dest = ZkEndPoint(tweetRecommendationResultsCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
@Provides
@Singleton
@Named(ModuleNames.EarlybirdTweetsCache)
def providesEarlybirdTweetsCache(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-crmixer-earlybird-tweets"),
dest = ZkEndPoint(earlybirdTweetsCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
}

Some files were not shown because too many files have changed in this diff Show More