Twitter Recommendation Algorithm

Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
This commit is contained in:
twitter-team
2023-03-31 17:36:31 -05:00
commit ef4c5eb65e
5364 changed files with 460239 additions and 0 deletions

View File

@ -0,0 +1,7 @@
resources(
sources = [
"*.xml",
"config/*.yml",
],
tags = ["bazel-compatible"],
)

View File

@ -0,0 +1,95 @@
# SimClusters embedding store enable / disable decider values
# ---------- Dark Traffic Proxy ----------
dark_traffic_filter:
comment: Proportion of the requests that are forwarded as dark traffic to the proxy
default_availability: 0
# Tweet embeddings
enable_LogFavBasedTweet_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# Entity embeddings
enable_FavTfgTopic_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedKgoApeTopic_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# KnownFor embeddings
enable_FavBasedProducer_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_FollowBasedProducer_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# InterestedIn embeddings
enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_FavBasedUserInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_FollowBasedUserInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_FilteredUserInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_UnfilteredUserInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_UserNextInterestedIn_Model20m145k2020:
comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000

View File

@ -0,0 +1,167 @@
<configuration>
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
<!-- ===================================================== -->
<!-- Service Config -->
<!-- ===================================================== -->
<property name="DEFAULT_SERVICE_PATTERN"
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
<property name="DEFAULT_ACCESS_PATTERN"
value="%msg"/>
<!-- ===================================================== -->
<!-- Common Config -->
<!-- ===================================================== -->
<!-- JUL/JDK14 to Logback bridge -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>
<!-- ====================================================================================== -->
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
<!-- ====================================================================================== -->
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.service.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>3GB</totalSizeCap>
<!-- keep maximum 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.access.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>100MB</totalSizeCap>
<!-- keep maximum 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
</encoder>
</appender>
<!--LogLens -->
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/service</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- LogLens Access -->
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/access</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- Pipeline Execution Logs -->
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>allow_listed_pipeline_executions.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>100MB</totalSizeCap>
<!-- keep maximum 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- ===================================================== -->
<!-- Primary Async Appenders -->
<!-- ===================================================== -->
<property name="async_queue_size" value="${queue.size:-50000}"/>
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="SERVICE"/>
</appender>
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ACCESS"/>
</appender>
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
</appender>
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS"/>
</appender>
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS-ACCESS"/>
</appender>
<!-- ===================================================== -->
<!-- Package Config -->
<!-- ===================================================== -->
<!-- Per-Package Config -->
<logger name="com.twitter" level="INHERITED"/>
<logger name="com.twitter.wilyns" level="INHERITED"/>
<logger name="com.twitter.configbus.client.file" level="INHERITED"/>
<logger name="com.twitter.finagle.mux" level="INHERITED"/>
<logger name="com.twitter.finagle.serverset2" level="INHERITED"/>
<logger name="com.twitter.logging.ScribeHandler" level="INHERITED"/>
<logger name="com.twitter.zookeeper.client.internal" level="INHERITED"/>
<!-- Disable deadline exceeded logs by default. This can be overriden dynamically in the admin panel of individual instances. -->
<logger name="com.twitter.relevance_platform.common.exceptions.DeadlineExceededExceptionMapper" level="OFF"/>
<!-- Root Config -->
<!-- For all logs except access logs, disable logging below log_level level by default. This can be overriden in the per-package loggers, and dynamically in the admin panel of individual instances. -->
<root level="${log_level:-INFO}">
<appender-ref ref="ASYNC-SERVICE"/>
<appender-ref ref="ASYNC-LOGLENS"/>
</root>
<!-- Access Logging -->
<!-- Access logs are turned off by default -->
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter" level="OFF" additivity="false">
<appender-ref ref="ASYNC-ACCESS"/>
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
</logger>
</configuration>

View File

@ -0,0 +1,31 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/net/codingwell:scala-guice",
"finagle/finagle-core/src/main",
"finagle/finagle-http/src/main/scala",
"finagle/finagle-thriftmux/src/main/scala",
"finatra-internal/decider/src/main/scala",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/inject/inject-app/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-server/src/main/scala",
"finatra/inject/inject-thrift-client/src/main/scala",
"finatra/inject/inject-utils/src/main/scala",
"finatra/utils/src/main/java/com/twitter/finatra/annotations",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/exceptions",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
"simclusters-ann/server/src/main/resources",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
"twitter-server/server/src/main/scala",
"util/util-app/src/main/scala",
"util/util-core:scala",
],
)

View File

@ -0,0 +1,70 @@
package com.twitter.simclustersann
import com.google.inject.Module
import com.twitter.finatra.decider.modules.DeciderModule
import com.twitter.finatra.mtls.thriftmux.Mtls
import com.twitter.finatra.thrift.ThriftServer
import com.twitter.finatra.thrift.filters._
import com.twitter.finatra.thrift.routing.ThriftRouter
import com.twitter.inject.thrift.modules.ThriftClientIdModule
import com.twitter.relevance_platform.common.exceptions._
import com.twitter.simclustersann.controllers.SimClustersANNController
import com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantExceptionMapper
import com.twitter.simclustersann.modules._
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import com.twitter.finagle.Filter
import com.twitter.finatra.annotations.DarkTrafficFilterType
import com.twitter.inject.annotations.Flags
import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
import com.twitter.relevance_platform.common.filters.ClientStatsFilter
import com.twitter.simclustersann.common.FlagNames.DisableWarmup
object SimClustersAnnServerMain extends SimClustersAnnServer
class SimClustersAnnServer extends ThriftServer with Mtls {
flag(
name = DisableWarmup,
default = false,
help = "If true, no warmup will be run."
)
override val name = "simclusters-ann-server"
override val modules: Seq[Module] = Seq(
CacheModule,
ServiceNameMapperModule,
ClusterConfigMapperModule,
ClusterConfigModule,
ClusterTweetIndexProviderModule,
DeciderModule,
EmbeddingStoreModule,
FlagsModule,
FuturePoolProvider,
RateLimiterModule,
SimClustersANNCandidateSourceModule,
StratoClientProviderModule,
ThriftClientIdModule,
new CustomMtlsThriftWebFormsModule[SimClustersANNService.MethodPerEndpoint](this),
new DarkTrafficFilterModule[SimClustersANNService.ReqRepServicePerEndpoint]()
)
def configureThrift(router: ThriftRouter): Unit = {
router
.filter[LoggingMDCFilter]
.filter[TraceIdMDCFilter]
.filter[ThriftMDCFilter]
.filter[ClientStatsFilter]
.filter[ExceptionMappingFilter]
.filter[Filter.TypeAgnostic, DarkTrafficFilterType]
.exceptionMapper[InvalidRequestForSimClustersAnnVariantExceptionMapper]
.exceptionMapper[DeadlineExceededExceptionMapper]
.exceptionMapper[UnhandledExceptionMapper]
.add[SimClustersANNController]
}
override protected def warmup(): Unit = {
if (!injector.instance[Boolean](Flags.named(DisableWarmup))) {
handle[SimclustersAnnWarmupHandler]()
}
}
}

View File

@ -0,0 +1,73 @@
package com.twitter.simclustersann
import com.twitter.inject.Logging
import com.twitter.inject.utils.Handler
import javax.inject.Inject
import scala.util.control.NonFatal
import com.google.common.util.concurrent.RateLimiter
import com.twitter.conversions.DurationOps.richDurationFromInt
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Await
import com.twitter.util.ExecutorServiceFuturePool
import com.twitter.util.Future
class SimclustersAnnWarmupHandler @Inject() (
clusterTweetCandidatesStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
futurePool: ExecutorServiceFuturePool,
rateLimiter: RateLimiter,
statsReceiver: StatsReceiver)
extends Handler
with Logging {
private val stats = statsReceiver.scope(this.getClass.getName)
private val scopedStats = stats.scope("fetchFromCache")
private val clusters = scopedStats.counter("clusters")
private val fetchedKeys = scopedStats.counter("keys")
private val failures = scopedStats.counter("failures")
private val success = scopedStats.counter("success")
private val SimclustersNumber = 144428
override def handle(): Unit = {
try {
val clusterIds = List.range(1, SimclustersNumber)
val futures: Seq[Future[Unit]] = clusterIds
.map { clusterId =>
clusters.incr()
futurePool {
rateLimiter.acquire()
Await.result(
clusterTweetCandidatesStore
.get(clusterId)
.onSuccess { _ =>
success.incr()
}
.handle {
case NonFatal(e) =>
failures.incr()
},
timeout = 10.seconds
)
fetchedKeys.incr()
}
}
Await.result(Future.collect(futures), timeout = 10.minutes)
} catch {
case NonFatal(e) => error(e.getMessage, e)
} finally {
try {
futurePool.executor.shutdown()
} catch {
case NonFatal(_) =>
}
info("Warmup done.")
}
}
}

View File

@ -0,0 +1,129 @@
package com.twitter.simclustersann.candidate_source
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.util.Duration
import com.twitter.util.Time
import scala.collection.mutable
/**
* This store looks for tweets whose similarity is close to a Source SimClustersEmbeddingId.
*
* Approximate cosine similarity is the core algorithm to drive this store.
*
* Step 1 - 4 are in "fetchCandidates" method.
* 1. Retrieve the SimClusters Embedding by the SimClustersEmbeddingId
* 2. Fetch top N clusters' top tweets from the clusterTweetCandidatesStore (TopTweetsPerCluster index).
* 3. Calculate all the tweet candidates' dot-product or approximate cosine similarity to source tweets.
* 4. Take top M tweet candidates by the step 3's score
*/
trait ApproximateCosineSimilarity {
type ScoredTweet = (Long, Double)
def apply(
sourceEmbedding: SimClustersEmbedding,
sourceEmbeddingId: SimClustersEmbeddingId,
config: SimClustersANNConfig,
candidateScoresStat: Int => Unit,
clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]],
clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
): Seq[ScoredTweet]
}
object ApproximateCosineSimilarity extends ApproximateCosineSimilarity {
final val InitialCandidateMapSize = 16384
val MaxNumResultsUpperBound = 1000
final val MaxTweetCandidateAgeUpperBound = 175200
private class HashMap[A, B](initSize: Int) extends mutable.HashMap[A, B] {
override def initialSize: Int = initSize // 16 - by default
}
private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
embeddingId.internalId match {
case InternalId.TweetId(tweetId) =>
Some(tweetId)
case _ =>
None
}
}
override def apply(
sourceEmbedding: SimClustersEmbedding,
sourceEmbeddingId: SimClustersEmbeddingId,
config: SimClustersANNConfig,
candidateScoresStat: Int => Unit,
clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
): Seq[ScoredTweet] = {
val now = Time.now
val earliestTweetId =
if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
0L // Disable max tweet age filter
else
SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
val latestTweetId =
SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
// Use Mutable map to optimize performance. The method is thread-safe.
// Set initial map size to around p75 of map size distribution to avoid too many copying
// from extending the size of the mutable hashmap
val candidateScoresMap =
new HashMap[TweetId, Double](InitialCandidateMapSize)
val candidateNormalizationMap =
new HashMap[TweetId, Double](InitialCandidateMapSize)
clusterTweetsMap.foreach {
case (clusterId, Some(tweetScores)) if sourceEmbedding.contains(clusterId) =>
val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
val (tweetId, score) = tweetScores(i)
if (!parseTweetId(sourceEmbeddingId).contains(tweetId) &&
tweetId >= earliestTweetId && tweetId <= latestTweetId) {
candidateScoresMap.put(
tweetId,
candidateScoresMap.getOrElse(tweetId, 0.0) + score * sourceClusterScore)
candidateNormalizationMap
.put(tweetId, candidateNormalizationMap.getOrElse(tweetId, 0.0) + score * score)
}
}
case _ => ()
}
candidateScoresStat(candidateScoresMap.size)
// Re-Rank the candidate by configuration
val processedCandidateScores: Seq[(TweetId, Double)] = candidateScoresMap.map {
case (candidateId, score) =>
// Enable Partial Normalization
val processedScore = {
// We applied the "log" version of partial normalization when we rank candidates
// by log cosine similarity
config.annAlgorithm match {
case ScoringAlgorithm.LogCosineSimilarity =>
score / sourceEmbedding.logNorm / math.log(1 + candidateNormalizationMap(candidateId))
case ScoringAlgorithm.CosineSimilarity =>
score / sourceEmbedding.l2norm / math.sqrt(candidateNormalizationMap(candidateId))
case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
score / math.sqrt(candidateNormalizationMap(candidateId))
case ScoringAlgorithm.DotProduct => score
}
}
candidateId -> processedScore
}.toSeq
processedCandidateScores
.filter(_._2 >= config.minScore)
.sortBy(-_._2)
.take(Math.min(config.maxNumResults, MaxNumResultsUpperBound))
}
}

View File

@ -0,0 +1,14 @@
scala_library(
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/guava",
"3rdparty/jvm/com/twitter/storehaus:core",
"frigate/frigate-common:base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"util/util-stats/src/main/scala/com/twitter/finagle/stats",
],
)

View File

@ -0,0 +1,131 @@
package com.twitter.simclustersann.candidate_source
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.util.Duration
import com.twitter.util.Time
import com.google.common.collect.Comparators
import com.twitter.simclusters_v2.common.ClusterId
/**
* A modified version of OptimizedApproximateCosineSimilarity which uses more java streams to avoid
* materializing intermediate collections. Its performance is still under investigation.
*/
object ExperimentalApproximateCosineSimilarity extends ApproximateCosineSimilarity {
final val InitialCandidateMapSize = 16384
val MaxNumResultsUpperBound = 1000
final val MaxTweetCandidateAgeUpperBound = 175200
private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
embeddingId.internalId match {
case InternalId.TweetId(tweetId) =>
Some(tweetId)
case _ =>
None
}
}
private val CompareByScore: java.util.Comparator[(Long, Double)] =
new java.util.Comparator[(Long, Double)] {
override def compare(o1: (Long, Double), o2: (Long, Double)): Int = {
java.lang.Double.compare(o1._2, o2._2)
}
}
class Scores(var score: Double, var norm: Double)
override def apply(
sourceEmbedding: SimClustersEmbedding,
sourceEmbeddingId: SimClustersEmbeddingId,
config: SimClustersANNConfig,
candidateScoresStat: Int => Unit,
clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
): Seq[ScoredTweet] = {
val now = Time.now
val earliestTweetId =
if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
0L // Disable max tweet age filter
else
SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
val latestTweetId =
SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
val candidateScoresMap = new java.util.HashMap[Long, Scores](InitialCandidateMapSize)
val sourceTweetId = parseTweetId(sourceEmbeddingId).getOrElse(0L)
clusterTweetsMap.foreach {
case (clusterId, Some(tweetScores)) =>
val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
val (tweetId, score) = tweetScores(i)
if (tweetId >= earliestTweetId &&
tweetId <= latestTweetId &&
tweetId != sourceTweetId) {
val scores = candidateScoresMap.get(tweetId)
if (scores == null) {
val scorePair = new Scores(
score = score * sourceClusterScore,
norm = score * score
)
candidateScoresMap.put(tweetId, scorePair)
} else {
scores.score = scores.score + (score * sourceClusterScore)
scores.norm = scores.norm + (score * score)
}
}
}
case _ => ()
}
candidateScoresStat(candidateScoresMap.size)
val normFn: (Long, Scores) => (Long, Double) = config.annAlgorithm match {
case ScoringAlgorithm.LogCosineSimilarity =>
(candidateId: Long, score: Scores) =>
(
candidateId,
score.score / sourceEmbedding.logNorm / math.log(1 + score.norm)
)
case ScoringAlgorithm.CosineSimilarity =>
(candidateId: Long, score: Scores) =>
(
candidateId,
score.score / sourceEmbedding.l2norm / math.sqrt(score.norm)
)
case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
(candidateId: Long, score: Scores) =>
(
candidateId,
score.score / math.sqrt(score.norm)
)
case ScoringAlgorithm.DotProduct =>
(candidateId: Long, score: Scores) =>
(
candidateId,
score.score
)
}
import scala.collection.JavaConverters._
val topKCollector = Comparators.greatest(
Math.min(config.maxNumResults, MaxNumResultsUpperBound),
CompareByScore
)
candidateScoresMap
.entrySet().stream()
.map[(Long, Double)]((e: java.util.Map.Entry[Long, Scores]) => normFn(e.getKey, e.getValue))
.filter((s: (Long, Double)) => s._2 >= config.minScore)
.collect(topKCollector)
.asScala
}
}

View File

@ -0,0 +1,112 @@
package com.twitter.simclustersann.candidate_source
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.util.Duration
import com.twitter.util.Time
/**
* Compared with ApproximateCosineSimilarity, this implementation:
* - moves some computation aroudn to reduce allocations
* - uses a single hashmap to store both scores and normalization coefficients
* - uses some java collections in place of scala ones
* Testing is still in progress, but this implementation shows significant (> 2x) improvements in
* CPU utilization and allocations with 800 tweets per cluster.
*/
object OptimizedApproximateCosineSimilarity extends ApproximateCosineSimilarity {
final val InitialCandidateMapSize = 16384
val MaxNumResultsUpperBound = 1000
final val MaxTweetCandidateAgeUpperBound = 175200
private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
embeddingId.internalId match {
case InternalId.TweetId(tweetId) =>
Some(tweetId)
case _ =>
None
}
}
override def apply(
sourceEmbedding: SimClustersEmbedding,
sourceEmbeddingId: SimClustersEmbeddingId,
config: SimClustersANNConfig,
candidateScoresStat: Int => Unit,
clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
): Seq[ScoredTweet] = {
val now = Time.now
val earliestTweetId =
if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
0L // Disable max tweet age filter
else
SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
val latestTweetId =
SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
val candidateScoresMap = new java.util.HashMap[Long, (Double, Double)](InitialCandidateMapSize)
val sourceTweetId = parseTweetId(sourceEmbeddingId).getOrElse(0L)
clusterTweetsMap.foreach {
case (clusterId, Some(tweetScores)) if sourceEmbedding.contains(clusterId) =>
val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
val (tweetId, score) = tweetScores(i)
if (tweetId >= earliestTweetId &&
tweetId <= latestTweetId &&
tweetId != sourceTweetId) {
val scores = candidateScoresMap.getOrDefault(tweetId, (0.0, 0.0))
val newScores = (
scores._1 + score * sourceClusterScore,
scores._2 + score * score,
)
candidateScoresMap.put(tweetId, newScores)
}
}
case _ => ()
}
candidateScoresStat(candidateScoresMap.size)
val normFn: (Long, (Double, Double)) => (Long, Double) = config.annAlgorithm match {
case ScoringAlgorithm.LogCosineSimilarity =>
(candidateId: Long, score: (Double, Double)) =>
candidateId -> score._1 / sourceEmbedding.logNorm / math.log(1 + score._2)
case ScoringAlgorithm.CosineSimilarity =>
(candidateId: Long, score: (Double, Double)) =>
candidateId -> score._1 / sourceEmbedding.l2norm / math.sqrt(score._2)
case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
(candidateId: Long, score: (Double, Double)) =>
candidateId -> score._1 / math.sqrt(score._2)
case ScoringAlgorithm.DotProduct =>
(candidateId: Long, score: (Double, Double)) => (candidateId, score._1)
}
val scoredTweets: java.util.ArrayList[(Long, Double)] =
new java.util.ArrayList(candidateScoresMap.size)
val it = candidateScoresMap.entrySet().iterator()
while (it.hasNext) {
val mapEntry = it.next()
val normedScore = normFn(mapEntry.getKey, mapEntry.getValue)
if (normedScore._2 >= config.minScore)
scoredTweets.add(normedScore)
}
import scala.collection.JavaConverters._
scoredTweets.asScala
.sortBy(-_._2)
.take(Math.min(config.maxNumResults, MaxNumResultsUpperBound))
}
}

View File

@ -0,0 +1,102 @@
package com.twitter.simclustersann.candidate_source
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.base.Stats
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
/**
* This store looks for tweets whose similarity is close to a Source SimClustersEmbeddingId.
*
* Approximate cosine similarity is the core algorithm to drive this store.
*
* Step 1 - 4 are in "fetchCandidates" method.
* 1. Retrieve the SimClusters Embedding by the SimClustersEmbeddingId
* 2. Fetch top N clusters' top tweets from the clusterTweetCandidatesStore (TopTweetsPerCluster index).
* 3. Calculate all the tweet candidates' dot-product or approximate cosine similarity to source tweets.
* 4. Take top M tweet candidates by the step 3's score
*/
case class SimClustersANNCandidateSource(
approximateCosineSimilarity: ApproximateCosineSimilarity,
clusterTweetCandidatesStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
statsReceiver: StatsReceiver) {
private val stats = statsReceiver.scope(this.getClass.getName)
private val fetchSourceEmbeddingStat = stats.scope("fetchSourceEmbedding")
private val fetchCandidatesStat = stats.scope("fetchCandidates")
private val candidateScoresStat = stats.stat("candidateScoresMap")
def get(
query: SimClustersANNCandidateSource.Query
): Future[Option[Seq[SimClustersANNTweetCandidate]]] = {
val sourceEmbeddingId = query.sourceEmbeddingId
val config = query.config
for {
maybeSimClustersEmbedding <- Stats.track(fetchSourceEmbeddingStat) {
simClustersEmbeddingStore.get(query.sourceEmbeddingId)
}
maybeFilteredCandidates <- maybeSimClustersEmbedding match {
case Some(sourceEmbedding) =>
for {
candidates <- Stats.trackSeq(fetchCandidatesStat) {
fetchCandidates(sourceEmbeddingId, sourceEmbedding, config)
}
} yield {
fetchCandidatesStat
.stat(sourceEmbeddingId.embeddingType.name, sourceEmbeddingId.modelVersion.name).add(
candidates.size)
Some(candidates)
}
case None =>
fetchCandidatesStat
.stat(sourceEmbeddingId.embeddingType.name, sourceEmbeddingId.modelVersion.name).add(0)
Future.None
}
} yield {
maybeFilteredCandidates
}
}
private def fetchCandidates(
sourceEmbeddingId: SimClustersEmbeddingId,
sourceEmbedding: SimClustersEmbedding,
config: SimClustersANNConfig
): Future[Seq[SimClustersANNTweetCandidate]] = {
val clusterIds =
sourceEmbedding
.truncate(config.maxScanClusters).getClusterIds()
.toSet
Future
.collect {
clusterTweetCandidatesStore.multiGet(clusterIds)
}.map { clusterTweetsMap =>
approximateCosineSimilarity(
sourceEmbedding = sourceEmbedding,
sourceEmbeddingId = sourceEmbeddingId,
config = config,
candidateScoresStat = (i: Int) => candidateScoresStat.add(i),
clusterTweetsMap = clusterTweetsMap
).map {
case (tweetId, score) =>
SimClustersANNTweetCandidate(
tweetId = tweetId,
score = score
)
}
}
}
}
object SimClustersANNCandidateSource {
case class Query(
sourceEmbeddingId: SimClustersEmbeddingId,
config: SimClustersANNConfig)
}

View File

@ -0,0 +1,5 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
tags = ["bazel-compatible"],
dependencies = [],
)

View File

@ -0,0 +1,31 @@
package com.twitter.simclustersann.common
object FlagNames {
/**
* Global Settings
*/
final val ServiceTimeout = "service.timeout"
final val DarkTrafficFilterDeciderKey = "thrift.dark.traffic.filter.decider_key"
/**
* Cache Setting
*/
final val CacheDest = "cache_module.dest"
final val CacheTimeout = "cache_module.timeout"
// Only turn on the async update when the SANN Cluster has the production taffic.
final val CacheAsyncUpdate = "cache_module.async_update"
/**
* Warmup Settings
*/
final val DisableWarmup = "warmup.disable"
final val NumberOfThreads = "warmup.thread_number"
final val RateLimiterQPS = "warmup.rate_limiter_qps"
/**
* Algorithm Parameters
*/
final val MaxTopTweetPerCluster = "sim_clusters.ann.max_top_tweets_per_cluster"
}

View File

@ -0,0 +1,29 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/net/codingwell:scala-guice",
"decider/src/main/scala",
"finagle/finagle-core/src/main",
"finatra/inject/inject-core/src/main/scala",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift/exceptions",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift/filters",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift/modules",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift/response",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift/routing",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
"scrooge/scrooge-core/src/main/scala",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/candidate_source",
"twitter-server/server/src/main/scala",
"util/util-core:scala",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -0,0 +1,80 @@
package com.twitter.simclustersann.controllers
import com.twitter.conversions.DurationOps._
import com.twitter.finatra.thrift.Controller
import com.twitter.simclustersann.thriftscala.SimClustersANNService.GetTweetCandidates
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import com.twitter.simclustersann.thriftscala.Query
import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
import com.twitter.scrooge.Request
import com.twitter.scrooge.Response
import javax.inject.Inject
import com.twitter.finagle.Service
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.annotations.Flag
import com.twitter.simclustersann.candidate_source.{
SimClustersANNCandidateSource => SANNSimClustersANNCandidateSource
}
import com.twitter.simclustersann.common.FlagNames
import com.twitter.simclustersann.filters.GetTweetCandidatesResponseStatsFilter
import com.twitter.simclustersann.filters.SimClustersAnnVariantFilter
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
class SimClustersANNController @Inject() (
@Flag(FlagNames.ServiceTimeout) serviceTimeout: Int,
variantFilter: SimClustersAnnVariantFilter,
getTweetCandidatesResponseStatsFilter: GetTweetCandidatesResponseStatsFilter,
sannCandidateSource: SANNSimClustersANNCandidateSource,
globalStats: StatsReceiver)
extends Controller(SimClustersANNService) {
import SimClustersANNController._
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val timer: Timer = new JavaTimer(true)
val filteredService: Service[Request[GetTweetCandidates.Args], Response[
Seq[SimClustersANNTweetCandidate]
]] = {
variantFilter
.andThen(getTweetCandidatesResponseStatsFilter)
.andThen(Service.mk(handler))
}
handle(GetTweetCandidates).withService(filteredService)
private def handler(
request: Request[GetTweetCandidates.Args]
): Future[Response[Seq[SimClustersANNTweetCandidate]]] = {
val query: Query = request.args.query
val simClustersANNCandidateSourceQuery = SANNSimClustersANNCandidateSource.Query(
sourceEmbeddingId = query.sourceEmbeddingId,
config = query.config
)
val result = sannCandidateSource
.get(simClustersANNCandidateSourceQuery).map {
case Some(tweetCandidatesSeq) =>
Response(tweetCandidatesSeq.map { tweetCandidate =>
SimClustersANNTweetCandidate(
tweetId = tweetCandidate.tweetId,
score = tweetCandidate.score
)
})
case None =>
DefaultResponse
}
result.raiseWithin(serviceTimeout.milliseconds)(timer).rescue {
case e: Throwable =>
stats.scope("failures").counter(e.getClass.getCanonicalName).incr()
Future.value(DefaultResponse)
}
}
}
object SimClustersANNController {
val DefaultResponse: Response[Seq[SimClustersANNTweetCandidate]] = Response(Seq.empty)
}

View File

@ -0,0 +1,12 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"finagle/finagle-core/src/main",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra-internal/thrift/src/main/thrift:thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,16 @@
package com.twitter.simclustersann.exceptions
import com.twitter.finagle.RequestException
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
case class InvalidRequestForSimClustersAnnVariantException(
modelVersion: ModelVersion,
embeddingType: EmbeddingType,
actualServiceName: String,
expectedServiceName: Option[String])
extends RequestException(
s"Request with model version ($modelVersion) and embedding type ($embeddingType) cannot be " +
s"processed by service variant ($actualServiceName)." +
s" Expected service variant: $expectedServiceName.",
null)

View File

@ -0,0 +1,27 @@
package com.twitter.simclustersann.exceptions
import com.twitter.finatra.thrift.exceptions.ExceptionMapper
import com.twitter.finatra.thrift.thriftscala.ClientError
import com.twitter.finatra.thrift.thriftscala.ClientErrorCause
import com.twitter.util.Future
import com.twitter.util.logging.Logging
import javax.inject.Singleton
/**
* An exception mapper designed to handle
* [[com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantException]]
* by returning a Thrift IDL defined Client Error.
*/
@Singleton
class InvalidRequestForSimClustersAnnVariantExceptionMapper
extends ExceptionMapper[InvalidRequestForSimClustersAnnVariantException, Nothing]
with Logging {
override def handleException(
throwable: InvalidRequestForSimClustersAnnVariantException
): Future[Nothing] = {
error("Invalid Request For SimClusters Ann Variant Exception", throwable)
Future.exception(ClientError(ClientErrorCause.BadRequest, throwable.getMessage()))
}
}

View File

@ -0,0 +1,6 @@
package com.twitter.simclustersann.exceptions
case class MissingClusterConfigForSimClustersAnnVariantException(sannServiceName: String)
extends IllegalStateException(
s"No cluster configuration found for service ($sannServiceName)",
null)

View File

@ -0,0 +1,13 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
tags = ["bazel-compatible"],
dependencies = [
"finagle/finagle-core/src/main",
"finatra/inject/inject-app/src/main/java/com/twitter/inject/annotations",
"finatra/inject/inject-core/src/main/scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster",
"scrooge/scrooge-core/src/main/scala",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
],
)

View File

@ -0,0 +1,43 @@
package com.twitter.simclustersann.filters
import com.twitter.finagle.Service
import com.twitter.finagle.SimpleFilter
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.scrooge.Request
import com.twitter.scrooge.Response
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class GetTweetCandidatesResponseStatsFilter @Inject() (
statsReceiver: StatsReceiver)
extends SimpleFilter[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
SimClustersANNService.GetTweetCandidates.SuccessType
]] {
private[this] val stats = statsReceiver.scope("method_response_stats").scope("getTweetCandidates")
private[this] val candidateScoreStats = stats.stat("candidate_score_x1000")
private[this] val emptyResponseCounter = stats.counter("empty")
private[this] val nonEmptyResponseCounter = stats.counter("non_empty")
override def apply(
request: Request[SimClustersANNService.GetTweetCandidates.Args],
service: Service[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
SimClustersANNService.GetTweetCandidates.SuccessType
]]
): Future[Response[SimClustersANNService.GetTweetCandidates.SuccessType]] = {
val response = service(request)
response.onSuccess { successResponse =>
if (successResponse.value.size == 0)
emptyResponseCounter.incr()
else
nonEmptyResponseCounter.incr()
successResponse.value.foreach { candidate =>
candidateScoreStats.add(candidate.score.toFloat * 1000)
}
}
response
}
}

View File

@ -0,0 +1,53 @@
package com.twitter.simclustersann.filters
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.Service
import com.twitter.finagle.SimpleFilter
import com.twitter.relevance_platform.simclustersann.multicluster.ServiceNameMapper
import com.twitter.scrooge.Request
import com.twitter.scrooge.Response
import com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantException
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class SimClustersAnnVariantFilter @Inject() (
serviceNameMapper: ServiceNameMapper,
serviceIdentifier: ServiceIdentifier,
) extends SimpleFilter[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
SimClustersANNService.GetTweetCandidates.SuccessType
]] {
override def apply(
request: Request[SimClustersANNService.GetTweetCandidates.Args],
service: Service[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
SimClustersANNService.GetTweetCandidates.SuccessType
]]
): Future[Response[SimClustersANNService.GetTweetCandidates.SuccessType]] = {
validateRequest(request)
service(request)
}
private def validateRequest(
request: Request[SimClustersANNService.GetTweetCandidates.Args]
): Unit = {
val modelVersion = request.args.query.sourceEmbeddingId.modelVersion
val embeddingType = request.args.query.config.candidateEmbeddingType
val actualServiceName = serviceIdentifier.service
val expectedServiceName = serviceNameMapper.getServiceName(modelVersion, embeddingType)
expectedServiceName match {
case Some(name) if name == actualServiceName => ()
case _ =>
throw InvalidRequestForSimClustersAnnVariantException(
modelVersion,
embeddingType,
actualServiceName,
expectedServiceName)
}
}
}

View File

@ -0,0 +1,24 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
tags = ["bazel-compatible"],
dependencies = [
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"finagle/finagle-stats",
"finatra/inject/inject-core/src/main/scala",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster",
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common",
"simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/simclusters_v2/summingbird",
"src/scala/com/twitter/storehaus_internal/memcache",
"src/scala/com/twitter/storehaus_internal/util",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,34 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.finagle.memcached.Client
import javax.inject.Singleton
import com.twitter.conversions.DurationOps._
import com.twitter.inject.TwitterModule
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.annotations.Flag
import com.twitter.simclustersann.common.FlagNames
import com.twitter.storehaus_internal.memcache.MemcacheStore
import com.twitter.storehaus_internal.util.ClientName
import com.twitter.storehaus_internal.util.ZkEndPoint
object CacheModule extends TwitterModule {
@Singleton
@Provides
def providesCache(
@Flag(FlagNames.CacheDest) cacheDest: String,
@Flag(FlagNames.CacheTimeout) cacheTimeout: Int,
serviceIdentifier: ServiceIdentifier,
stats: StatsReceiver
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache_simclusters_ann"),
dest = ZkEndPoint(cacheDest),
timeout = cacheTimeout.milliseconds,
retries = 0,
statsReceiver = stats.scope("cache_client"),
serviceIdentifier = serviceIdentifier
)
}

View File

@ -0,0 +1,15 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfigMapper
import javax.inject.Singleton
object ClusterConfigMapperModule extends TwitterModule {
@Singleton
@Provides
def providesClusterConfigMapper(
): ClusterConfigMapper = {
ClusterConfigMapper
}
}

View File

@ -0,0 +1,25 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfig
import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfigMapper
import com.twitter.simclustersann.exceptions.MissingClusterConfigForSimClustersAnnVariantException
import javax.inject.Singleton
object ClusterConfigModule extends TwitterModule {
@Singleton
@Provides
def providesClusterConfig(
serviceIdentifier: ServiceIdentifier,
clusterConfigMapper: ClusterConfigMapper
): ClusterConfig = {
val serviceName = serviceIdentifier.service
clusterConfigMapper.getClusterConfig(serviceName) match {
case Some(config) => config
case None => throw MissingClusterConfigForSimClustersAnnVariantException(serviceName)
}
}
}

View File

@ -0,0 +1,95 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.conversions.DurationOps._
import com.twitter.decider.Decider
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.inject.annotations.Flag
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfig
import com.twitter.relevance_platform.simclustersann.multicluster.ClusterTweetIndexStoreConfig
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.summingbird.stores.ClusterKey
import com.twitter.simclusters_v2.summingbird.stores.TopKTweetsForClusterKeyReadableStore
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclustersann.common.FlagNames
import com.twitter.storehaus.ReadableStore
import javax.inject.Singleton
object ClusterTweetIndexProviderModule extends TwitterModule {
@Singleton
@Provides
// Provides ClusterTweetIndex Store based on different maxResults settings on the same store
// Create a different provider if index is in a different store
def providesClusterTweetIndex(
@Flag(FlagNames.MaxTopTweetPerCluster) maxTopTweetPerCluster: Int,
@Flag(FlagNames.CacheAsyncUpdate) asyncUpdate: Boolean,
clusterConfig: ClusterConfig,
serviceIdentifier: ServiceIdentifier,
stats: StatsReceiver,
decider: Decider,
simClustersANNCacheClient: Client
): ReadableStore[ClusterId, Seq[(TweetId, Double)]] = {
// Build the underling cluster-to-tweet store
val topTweetsForClusterStore = clusterConfig.clusterTweetIndexStoreConfig match {
// If the config returns Manhattan tweet index config, we read from a RO MH store
case manhattanConfig: ClusterTweetIndexStoreConfig.Manhattan =>
TopKTweetsForClusterKeyReadableStore.getClusterToTopKTweetsStoreFromManhattanRO(
maxTopTweetPerCluster,
manhattanConfig,
serviceIdentifier)
case memCacheConfig: ClusterTweetIndexStoreConfig.Memcached =>
TopKTweetsForClusterKeyReadableStore.getClusterToTopKTweetsStoreFromMemCache(
maxTopTweetPerCluster,
memCacheConfig,
serviceIdentifier)
case _ =>
// Bad instance
ReadableStore.empty
}
val embeddingType: EmbeddingType = clusterConfig.candidateTweetEmbeddingType
val modelVersion: String = ModelVersions.toKnownForModelVersion(clusterConfig.modelVersion)
val store: ReadableStore[ClusterId, Seq[(TweetId, Double)]] =
topTweetsForClusterStore.composeKeyMapping { id: ClusterId =>
ClusterKey(id, modelVersion, embeddingType)
}
val memcachedTopTweetsForClusterStore =
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = store,
cacheClient = simClustersANNCacheClient,
ttl = 15.minutes,
asyncUpdate = asyncUpdate
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[(Long, Double)]()),
statsReceiver = stats.scope("cluster_tweet_index_mem_cache"),
keyToString = { k =>
// prod cache key : SimClusters_LZ4/cluster_to_tweet/clusterId_embeddingType_modelVersion
s"scz:c2t:${k}_${embeddingType}_${modelVersion}_$maxTopTweetPerCluster"
}
)
val cachedStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]] = {
ObservedCachedReadableStore.from[ClusterId, Seq[(TweetId, Double)]](
memcachedTopTweetsForClusterStore,
ttl = 10.minute,
maxKeys = 150000,
cacheName = "cluster_tweet_index_cache",
windowSize = 10000L
)(stats.scope("cluster_tweet_index_store"))
}
cachedStore
}
}

View File

@ -0,0 +1,99 @@
package com.twitter.simclustersann.modules
import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
import com.twitter.finatra.thrift.ThriftServer
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.thriftwebforms.MethodOptions
import com.twitter.thriftwebforms.view.ServiceResponseView
import com.twitter.util.Future
import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
import com.twitter.simclustersann.thriftscala.Query
import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.thriftwebforms.MethodOptions.Access
import scala.reflect.ClassTag
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import scala.collection.mutable
class CustomMtlsThriftWebFormsModule[T: ClassTag](server: ThriftServer)
extends MtlsThriftWebFormsModule[T](server: ThriftServer) {
private val Nbsp = "&nbsp;"
private val LdapGroups = Seq("recosplat-sensitive-data-medium", "simclusters-ann-admins")
override protected def methodOptions: Map[String, MethodOptions] = {
val tweetId = 1568796529690902529L
val sannDefaultQuery = SimClustersANNService.GetTweetCandidates.Args(
query = Query(
sourceEmbeddingId = SimClustersEmbeddingId(
embeddingType = EmbeddingType.LogFavLongestL2EmbeddingTweet,
modelVersion = ModelVersion.Model20m145k2020,
internalId = InternalId.TweetId(tweetId)
),
config = SimClustersANNConfig(
maxNumResults = 10,
minScore = 0.0,
candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
maxTopTweetsPerCluster = 400,
maxScanClusters = 50,
maxTweetCandidateAgeHours = 24,
minTweetCandidateAgeHours = 0,
annAlgorithm = ScoringAlgorithm.CosineSimilarity
)
))
Seq("getTweetCandidates")
.map(
_ -> MethodOptions(
defaultRequestValue = Some(sannDefaultQuery),
responseRenderers = Seq(renderTimeline),
allowedAccessOverride = Some(Access.ByLdapGroup(LdapGroups))
)).toMap
}
val FullAccessLdapGroups: Seq[String] =
Seq(
"recosplat-sensitive-data-medium",
"simclusters-ann-admins",
"recos-platform-admins"
)
override protected def defaultMethodAccess: MethodOptions.Access = {
MethodOptions.Access.ByLdapGroup(FullAccessLdapGroups)
}
def renderTimeline(r: AnyRef): Future[ServiceResponseView] = {
val simClustersANNTweetCandidates = r match {
case response: Iterable[_] =>
response.map(x => x.asInstanceOf[SimClustersANNTweetCandidate]).toSeq
case _ => Seq()
}
renderTweets(simClustersANNTweetCandidates)
}
private def renderTweets(
simClustersANNTweetCandidates: Seq[SimClustersANNTweetCandidate]
): Future[ServiceResponseView] = {
val htmlSb = new mutable.StringBuilder()
val headerHtml = s"""<h3>Tweet Candidates</h3>"""
val tweetsHtml = simClustersANNTweetCandidates.map { simClustersANNTweetCandidate =>
val tweetId = simClustersANNTweetCandidate.tweetId
val score = simClustersANNTweetCandidate.score
s"""<blockquote class="twitter-tweet"><a href="https://twitter.com/tweet/statuses/$tweetId"></a></blockquote> <b>score:</b> $score <br><br>"""
}.mkString
htmlSb ++= headerHtml
htmlSb ++= Nbsp
htmlSb ++= tweetsHtml
Future.value(
ServiceResponseView(
"SimClusters ANN Tweet Candidates",
htmlSb.toString(),
Seq("//platform.twitter.com/widgets.js")
)
)
}
}

View File

@ -0,0 +1,110 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.decider.Decider
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.representation_manager.StoreBuilder
import com.twitter.representation_manager.config.{
DefaultClientConfig => RepresentationManagerDefaultClientConfig
}
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import javax.inject.Singleton
object EmbeddingStoreModule extends TwitterModule {
val TweetEmbeddings: Set[SimClustersEmbeddingView] = Set(
SimClustersEmbeddingView(LogFavLongestL2EmbeddingTweet, Model20m145kUpdated),
SimClustersEmbeddingView(LogFavLongestL2EmbeddingTweet, Model20m145k2020)
)
val UserEmbeddings: Set[SimClustersEmbeddingView] = Set(
// KnownFor
SimClustersEmbeddingView(FavBasedProducer, Model20m145kUpdated),
SimClustersEmbeddingView(FavBasedProducer, Model20m145k2020),
SimClustersEmbeddingView(FollowBasedProducer, Model20m145k2020),
SimClustersEmbeddingView(AggregatableLogFavBasedProducer, Model20m145k2020),
// InterestedIn
SimClustersEmbeddingView(UnfilteredUserInterestedIn, Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020),
SimClustersEmbeddingView(UserNextInterestedIn, Model20m145k2020),
SimClustersEmbeddingView(LogFavBasedUserInterestedInFromAPE, Model20m145k2020)
)
@Singleton
@Provides
def providesEmbeddingStore(
stratoClient: StratoClient,
memCachedClient: MemcachedClient,
decider: Decider,
stats: StatsReceiver
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rmsStoreBuilder = new StoreBuilder(
clientConfig = RepresentationManagerDefaultClientConfig,
stratoClient = stratoClient,
memCachedClient = memCachedClient,
globalStats = stats,
)
val underlyingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = {
val tweetEmbeddingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = TweetEmbeddings
.map(embeddingView =>
(
(embeddingView.embeddingType, embeddingView.modelVersion),
rmsStoreBuilder
.buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey(embeddingView))).toMap
val userEmbeddingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = UserEmbeddings
.map(embeddingView =>
(
(embeddingView.embeddingType, embeddingView.modelVersion),
rmsStoreBuilder
.buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey(embeddingView))).toMap
tweetEmbeddingStores ++ userEmbeddingStores
}
SimClustersEmbeddingStore.buildWithDecider(
underlyingStores = underlyingStores,
decider = decider,
statsReceiver = stats
)
}
}

View File

@ -0,0 +1,44 @@
package com.twitter.simclustersann.modules
import com.twitter.inject.TwitterModule
import com.twitter.simclustersann.common.FlagNames
object FlagsModule extends TwitterModule {
flag[Int](
name = FlagNames.ServiceTimeout,
default = 40,
help = "The threshold of Request Timeout"
)
flag[String](
name = FlagNames.DarkTrafficFilterDeciderKey,
default = "dark_traffic_filter",
help = "Dark traffic filter decider key"
)
flag[String](
name = FlagNames.CacheDest,
default = "/s/cache/content_recommender_unified_v2",
help = "Path to memcache service. Currently using CR uniform scoring cache"
)
flag[Int](
name = FlagNames.CacheTimeout,
default = 15,
help = "The threshold of MemCache Timeout"
)
flag[Boolean](
name = FlagNames.CacheAsyncUpdate,
default = false,
help = "Whether to enable the async update for the MemCache"
)
flag[Int](
name = FlagNames.MaxTopTweetPerCluster,
default = 200,
help = "Maximum number of tweets to take per each simclusters"
)
}

View File

@ -0,0 +1,27 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.inject.TwitterModule
import com.twitter.inject.annotations.Flag
import com.twitter.simclustersann.common.FlagNames.NumberOfThreads
import com.twitter.util.ExecutorServiceFuturePool
import java.util.concurrent.Executors
import javax.inject.Singleton
object FuturePoolProvider extends TwitterModule {
flag[Int](
name = NumberOfThreads,
default = 20,
help = "The number of threads in the future pool."
)
@Singleton
@Provides
def providesFuturePool(
@Flag(NumberOfThreads) numberOfThreads: Int
): ExecutorServiceFuturePool = {
val threadPool = Executors.newFixedThreadPool(numberOfThreads)
new ExecutorServiceFuturePool(threadPool) {
override def toString: String = s"warmup-future-pool-$executor)"
}
}
}

View File

@ -0,0 +1,23 @@
package com.twitter.simclustersann.modules
import com.google.common.util.concurrent.RateLimiter
import com.google.inject.Provides
import com.twitter.inject.TwitterModule
import com.twitter.inject.annotations.Flag
import com.twitter.simclustersann.common.FlagNames.RateLimiterQPS
import javax.inject.Singleton
object RateLimiterModule extends TwitterModule {
flag[Int](
name = RateLimiterQPS,
default = 1000,
help = "The QPS allowed by the rate limiter."
)
@Singleton
@Provides
def providesRateLimiter(
@Flag(RateLimiterQPS) rateLimiterQps: Int
): RateLimiter =
RateLimiter.create(rateLimiterQps)
}

View File

@ -0,0 +1,15 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.simclustersann.multicluster.ServiceNameMapper
import javax.inject.Singleton
object ServiceNameMapperModule extends TwitterModule {
@Singleton
@Provides
def providesServiceNameMapper(
): ServiceNameMapper = {
ServiceNameMapper
}
}

View File

@ -0,0 +1,47 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.ClusterId
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.storehaus.ReadableStore
import javax.inject.Singleton
import com.twitter.simclustersann.candidate_source.ApproximateCosineSimilarity
import com.twitter.simclustersann.candidate_source.ExperimentalApproximateCosineSimilarity
import com.twitter.simclustersann.candidate_source.OptimizedApproximateCosineSimilarity
import com.twitter.simclustersann.candidate_source.SimClustersANNCandidateSource
object SimClustersANNCandidateSourceModule extends TwitterModule {
val acsFlag = flag[String](
name = "approximate_cosine_similarity",
default = "original",
help =
"Select different implementations of the approximate cosine similarity algorithm, for testing optimizations",
)
@Singleton
@Provides
def provides(
embeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
cachedClusterTweetIndexStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
statsReceiver: StatsReceiver
): SimClustersANNCandidateSource = {
val approximateCosineSimilarity = acsFlag() match {
case "original" => ApproximateCosineSimilarity
case "optimized" => OptimizedApproximateCosineSimilarity
case "experimental" => ExperimentalApproximateCosineSimilarity
case _ => ApproximateCosineSimilarity
}
new SimClustersANNCandidateSource(
approximateCosineSimilarity = approximateCosineSimilarity,
clusterTweetCandidatesStore = cachedClusterTweetIndexStore,
simClustersEmbeddingStore = embeddingStore,
statsReceiver = statsReceiver.scope("simClustersANNCandidateSource")
)
}
}

View File

@ -0,0 +1,20 @@
package com.twitter.simclustersann.modules
import com.google.inject.Provides
import javax.inject.Singleton
import com.twitter.inject.TwitterModule
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.strato.client.Client
import com.twitter.strato.client.Strato
object StratoClientProviderModule extends TwitterModule {
@Singleton
@Provides
def providesCache(
serviceIdentifier: ServiceIdentifier,
): Client = Strato.client
.withMutualTls(serviceIdentifier)
.build()
}