Twitter Recommendation Algorithm

Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2025-06-10 22:58:17 -05:00 · 2023-03-31 17:36:31 -05:00
commit ef4c5eb65e
5364 changed files with 460239 additions and 0 deletions
--- a/simclusters-ann/server/src/main/resources/BUILD
+++ b/simclusters-ann/server/src/main/resources/BUILD
@ -0,0 +1,7 @@
+resources(
+    sources = [
+        "*.xml",
+        "config/*.yml",
+    ],
+    tags = ["bazel-compatible"],
+)
--- a/simclusters-ann/server/src/main/resources/config/decider.yml
+++ b/simclusters-ann/server/src/main/resources/config/decider.yml
@ -0,0 +1,95 @@
+# SimClusters embedding store enable / disable decider values
+
+# ---------- Dark Traffic Proxy ----------
+dark_traffic_filter:
+  comment: Proportion of the requests that are forwarded as dark traffic to the proxy
+  default_availability: 0
+
+# Tweet embeddings
+enable_LogFavBasedTweet_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+# Entity embeddings
+enable_FavTfgTopic_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+
+enable_LogFavBasedKgoApeTopic_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+# KnownFor embeddings
+enable_FavBasedProducer_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_FollowBasedProducer_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+# InterestedIn embeddings
+enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_FavBasedUserInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_FollowBasedUserInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_FilteredUserInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_UnfilteredUserInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
+
+enable_UserNextInterestedIn_Model20m145k2020:
+  comment: "Enable the read traffic to (embeddingType, modelVersion) from 0% to 100%. 0 means return EMPTY for all requests."
+  default_availability: 10000
--- a/simclusters-ann/server/src/main/resources/logback.xml
+++ b/simclusters-ann/server/src/main/resources/logback.xml
@ -0,0 +1,167 @@
+<configuration>
+    <shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
+
+    <!-- ===================================================== -->
+    <!-- Service Config -->
+    <!-- ===================================================== -->
+    <property name="DEFAULT_SERVICE_PATTERN"
+              value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
+
+    <property name="DEFAULT_ACCESS_PATTERN"
+              value="%msg"/>
+
+    <!-- ===================================================== -->
+    <!-- Common Config -->
+    <!-- ===================================================== -->
+
+    <!-- JUL/JDK14 to Logback bridge -->
+    <contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
+        <resetJUL>true</resetJUL>
+    </contextListener>
+
+    <!-- ====================================================================================== -->
+    <!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration.       -->
+    <!--       You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy.    -->
+    <!--       See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy  -->
+    <!-- ====================================================================================== -->
+
+    <!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
+    <appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>${log.service.output}</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
+            <!-- daily rollover -->
+            <fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
+            <!-- the maximum total size of all the log files -->
+            <totalSizeCap>3GB</totalSizeCap>
+            <!-- keep maximum 21 days' worth of history -->
+            <maxHistory>21</maxHistory>
+            <cleanHistoryOnStart>true</cleanHistoryOnStart>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
+    <appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>${log.access.output}</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
+            <!-- daily rollover -->
+            <fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
+            <!-- the maximum total size of all the log files -->
+            <totalSizeCap>100MB</totalSizeCap>
+            <!-- keep maximum 7 days' worth of history -->
+            <maxHistory>7</maxHistory>
+            <cleanHistoryOnStart>true</cleanHistoryOnStart>
+        </rollingPolicy>
+        <encoder>
+            <pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
+        </encoder>
+    </appender>
+
+    <!--LogLens -->
+    <appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
+        <mdcAdditionalContext>true</mdcAdditionalContext>
+        <category>${log.lens.category}</category>
+        <index>${log.lens.index}</index>
+        <tag>${log.lens.tag}/service</tag>
+        <encoder>
+            <pattern>%msg</pattern>
+        </encoder>
+    </appender>
+
+    <!-- LogLens Access -->
+    <appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
+        <mdcAdditionalContext>true</mdcAdditionalContext>
+        <category>${log.lens.category}</category>
+        <index>${log.lens.index}</index>
+        <tag>${log.lens.tag}/access</tag>
+        <encoder>
+            <pattern>%msg</pattern>
+        </encoder>
+    </appender>
+
+    <!-- Pipeline Execution Logs -->
+    <appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>allow_listed_pipeline_executions.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
+            <!-- daily rollover -->
+            <fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
+             <!-- the maximum total size of all the log files -->
+            <totalSizeCap>100MB</totalSizeCap>
+            <!-- keep maximum 7 days' worth of history -->
+            <maxHistory>7</maxHistory>
+            <cleanHistoryOnStart>true</cleanHistoryOnStart>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- ===================================================== -->
+    <!-- Primary Async Appenders -->
+    <!-- ===================================================== -->
+
+    <property name="async_queue_size" value="${queue.size:-50000}"/>
+    <property name="async_max_flush_time" value="${max.flush.time:-0}"/>
+
+    <appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
+        <queueSize>${async_queue_size}</queueSize>
+        <maxFlushTime>${async_max_flush_time}</maxFlushTime>
+        <appender-ref ref="SERVICE"/>
+    </appender>
+
+    <appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
+        <queueSize>${async_queue_size}</queueSize>
+        <maxFlushTime>${async_max_flush_time}</maxFlushTime>
+        <appender-ref ref="ACCESS"/>
+    </appender>
+
+    <appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
+        <queueSize>${async_queue_size}</queueSize>
+        <maxFlushTime>${async_max_flush_time}</maxFlushTime>
+        <appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
+    </appender>
+
+    <appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
+        <queueSize>${async_queue_size}</queueSize>
+        <maxFlushTime>${async_max_flush_time}</maxFlushTime>
+        <appender-ref ref="LOGLENS"/>
+    </appender>
+
+    <appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
+        <queueSize>${async_queue_size}</queueSize>
+        <maxFlushTime>${async_max_flush_time}</maxFlushTime>
+        <appender-ref ref="LOGLENS-ACCESS"/>
+    </appender>
+
+    <!-- ===================================================== -->
+    <!-- Package Config -->
+    <!-- ===================================================== -->
+
+    <!-- Per-Package Config -->
+    <logger name="com.twitter" level="INHERITED"/>
+    <logger name="com.twitter.wilyns" level="INHERITED"/>
+    <logger name="com.twitter.configbus.client.file" level="INHERITED"/>
+    <logger name="com.twitter.finagle.mux" level="INHERITED"/>
+    <logger name="com.twitter.finagle.serverset2" level="INHERITED"/>
+    <logger name="com.twitter.logging.ScribeHandler" level="INHERITED"/>
+    <logger name="com.twitter.zookeeper.client.internal" level="INHERITED"/>
+    <!-- Disable deadline exceeded logs by default. This can be overriden dynamically in the admin panel of individual instances. -->
+    <logger name="com.twitter.relevance_platform.common.exceptions.DeadlineExceededExceptionMapper" level="OFF"/>
+
+    <!-- Root Config -->
+    <!-- For all logs except access logs, disable logging below log_level level by default. This can be overriden in the per-package loggers, and dynamically in the admin panel of individual instances. -->
+    <root level="${log_level:-INFO}">
+        <appender-ref ref="ASYNC-SERVICE"/>
+        <appender-ref ref="ASYNC-LOGLENS"/>
+    </root>
+
+    <!-- Access Logging -->
+   <!-- Access logs are turned off by default -->
+    <logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter" level="OFF" additivity="false">
+        <appender-ref ref="ASYNC-ACCESS"/>
+        <appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
+    </logger>
+
+</configuration>
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/BUILD
@ -0,0 +1,31 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/com/google/inject:guice",
+        "3rdparty/jvm/javax/inject:javax.inject",
+        "3rdparty/jvm/net/codingwell:scala-guice",
+        "finagle/finagle-core/src/main",
+        "finagle/finagle-http/src/main/scala",
+        "finagle/finagle-thriftmux/src/main/scala",
+        "finatra-internal/decider/src/main/scala",
+        "finatra-internal/mtls-thriftmux/src/main/scala",
+        "finatra/inject/inject-app/src/main/scala",
+        "finatra/inject/inject-core/src/main/scala",
+        "finatra/inject/inject-server/src/main/scala",
+        "finatra/inject/inject-thrift-client/src/main/scala",
+        "finatra/inject/inject-utils/src/main/scala",
+        "finatra/utils/src/main/java/com/twitter/finatra/annotations",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/exceptions",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
+        "simclusters-ann/server/src/main/resources",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules",
+        "simclusters-ann/thrift/src/main/thrift:thrift-scala",
+        "src/thrift/com/twitter/search:earlybird-scala",
+        "thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
+        "twitter-server/server/src/main/scala",
+        "util/util-app/src/main/scala",
+        "util/util-core:scala",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/SimclustersAnnServer.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/SimclustersAnnServer.scala
@ -0,0 +1,70 @@
+package com.twitter.simclustersann
+
+import com.google.inject.Module
+import com.twitter.finatra.decider.modules.DeciderModule
+import com.twitter.finatra.mtls.thriftmux.Mtls
+import com.twitter.finatra.thrift.ThriftServer
+import com.twitter.finatra.thrift.filters._
+import com.twitter.finatra.thrift.routing.ThriftRouter
+import com.twitter.inject.thrift.modules.ThriftClientIdModule
+import com.twitter.relevance_platform.common.exceptions._
+import com.twitter.simclustersann.controllers.SimClustersANNController
+import com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantExceptionMapper
+import com.twitter.simclustersann.modules._
+import com.twitter.simclustersann.thriftscala.SimClustersANNService
+import com.twitter.finagle.Filter
+import com.twitter.finatra.annotations.DarkTrafficFilterType
+import com.twitter.inject.annotations.Flags
+import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
+import com.twitter.relevance_platform.common.filters.ClientStatsFilter
+import com.twitter.simclustersann.common.FlagNames.DisableWarmup
+
+object SimClustersAnnServerMain extends SimClustersAnnServer
+
+class SimClustersAnnServer extends ThriftServer with Mtls {
+  flag(
+    name = DisableWarmup,
+    default = false,
+    help = "If true, no warmup will be run."
+  )
+
+  override val name = "simclusters-ann-server"
+
+  override val modules: Seq[Module] = Seq(
+    CacheModule,
+    ServiceNameMapperModule,
+    ClusterConfigMapperModule,
+    ClusterConfigModule,
+    ClusterTweetIndexProviderModule,
+    DeciderModule,
+    EmbeddingStoreModule,
+    FlagsModule,
+    FuturePoolProvider,
+    RateLimiterModule,
+    SimClustersANNCandidateSourceModule,
+    StratoClientProviderModule,
+    ThriftClientIdModule,
+    new CustomMtlsThriftWebFormsModule[SimClustersANNService.MethodPerEndpoint](this),
+    new DarkTrafficFilterModule[SimClustersANNService.ReqRepServicePerEndpoint]()
+  )
+
+  def configureThrift(router: ThriftRouter): Unit = {
+    router
+      .filter[LoggingMDCFilter]
+      .filter[TraceIdMDCFilter]
+      .filter[ThriftMDCFilter]
+      .filter[ClientStatsFilter]
+      .filter[ExceptionMappingFilter]
+      .filter[Filter.TypeAgnostic, DarkTrafficFilterType]
+      .exceptionMapper[InvalidRequestForSimClustersAnnVariantExceptionMapper]
+      .exceptionMapper[DeadlineExceededExceptionMapper]
+      .exceptionMapper[UnhandledExceptionMapper]
+      .add[SimClustersANNController]
+  }
+
+  override protected def warmup(): Unit = {
+    if (!injector.instance[Boolean](Flags.named(DisableWarmup))) {
+      handle[SimclustersAnnWarmupHandler]()
+    }
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/SimclustersAnnWarmupHandler.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/SimclustersAnnWarmupHandler.scala
@ -0,0 +1,73 @@
+package com.twitter.simclustersann
+
+import com.twitter.inject.Logging
+import com.twitter.inject.utils.Handler
+import javax.inject.Inject
+import scala.util.control.NonFatal
+import com.google.common.util.concurrent.RateLimiter
+import com.twitter.conversions.DurationOps.richDurationFromInt
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.storehaus.ReadableStore
+import com.twitter.util.Await
+import com.twitter.util.ExecutorServiceFuturePool
+import com.twitter.util.Future
+
+class SimclustersAnnWarmupHandler @Inject() (
+  clusterTweetCandidatesStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
+  futurePool: ExecutorServiceFuturePool,
+  rateLimiter: RateLimiter,
+  statsReceiver: StatsReceiver)
+    extends Handler
+    with Logging {
+
+  private val stats = statsReceiver.scope(this.getClass.getName)
+
+  private val scopedStats = stats.scope("fetchFromCache")
+  private val clusters = scopedStats.counter("clusters")
+  private val fetchedKeys = scopedStats.counter("keys")
+  private val failures = scopedStats.counter("failures")
+  private val success = scopedStats.counter("success")
+
+  private val SimclustersNumber = 144428
+
+  override def handle(): Unit = {
+    try {
+      val clusterIds = List.range(1, SimclustersNumber)
+      val futures: Seq[Future[Unit]] = clusterIds
+        .map { clusterId =>
+          clusters.incr()
+          futurePool {
+            rateLimiter.acquire()
+
+            Await.result(
+              clusterTweetCandidatesStore
+                .get(clusterId)
+                .onSuccess { _ =>
+                  success.incr()
+                }
+                .handle {
+                  case NonFatal(e) =>
+                    failures.incr()
+                },
+              timeout = 10.seconds
+            )
+            fetchedKeys.incr()
+          }
+        }
+
+      Await.result(Future.collect(futures), timeout = 10.minutes)
+
+    } catch {
+      case NonFatal(e) => error(e.getMessage, e)
+    } finally {
+      try {
+        futurePool.executor.shutdown()
+      } catch {
+        case NonFatal(_) =>
+      }
+      info("Warmup done.")
+    }
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/ApproximateCosineSimilarity.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/ApproximateCosineSimilarity.scala
@ -0,0 +1,129 @@
+package com.twitter.simclustersann.candidate_source
+
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.thriftscala.InternalId
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
+import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.util.Duration
+import com.twitter.util.Time
+import scala.collection.mutable
+
+/**
+ * This store looks for tweets whose similarity is close to a Source SimClustersEmbeddingId.
+ *
+ * Approximate cosine similarity is the core algorithm to drive this store.
+ *
+ * Step 1 - 4 are in "fetchCandidates" method.
+ * 1. Retrieve the SimClusters Embedding by the SimClustersEmbeddingId
+ * 2. Fetch top N clusters' top tweets from the clusterTweetCandidatesStore (TopTweetsPerCluster index).
+ * 3. Calculate all the tweet candidates' dot-product or approximate cosine similarity to source tweets.
+ * 4. Take top M tweet candidates by the step 3's score
+ */
+trait ApproximateCosineSimilarity {
+  type ScoredTweet = (Long, Double)
+  def apply(
+    sourceEmbedding: SimClustersEmbedding,
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    config: SimClustersANNConfig,
+    candidateScoresStat: Int => Unit,
+    clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]],
+    clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
+  ): Seq[ScoredTweet]
+}
+
+object ApproximateCosineSimilarity extends ApproximateCosineSimilarity {
+
+  final val InitialCandidateMapSize = 16384
+  val MaxNumResultsUpperBound = 1000
+  final val MaxTweetCandidateAgeUpperBound = 175200
+
+  private class HashMap[A, B](initSize: Int) extends mutable.HashMap[A, B] {
+    override def initialSize: Int = initSize // 16 - by default
+  }
+
+  private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
+    embeddingId.internalId match {
+      case InternalId.TweetId(tweetId) =>
+        Some(tweetId)
+      case _ =>
+        None
+    }
+  }
+
+  override def apply(
+    sourceEmbedding: SimClustersEmbedding,
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    config: SimClustersANNConfig,
+    candidateScoresStat: Int => Unit,
+    clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
+    clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
+  ): Seq[ScoredTweet] = {
+    val now = Time.now
+    val earliestTweetId =
+      if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
+        0L // Disable max tweet age filter
+      else
+        SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
+    val latestTweetId =
+      SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
+
+    // Use Mutable map to optimize performance. The method is thread-safe.
+
+    // Set initial map size to around p75 of map size distribution to avoid too many copying
+    // from extending the size of the mutable hashmap
+    val candidateScoresMap =
+      new HashMap[TweetId, Double](InitialCandidateMapSize)
+    val candidateNormalizationMap =
+      new HashMap[TweetId, Double](InitialCandidateMapSize)
+
+    clusterTweetsMap.foreach {
+      case (clusterId, Some(tweetScores)) if sourceEmbedding.contains(clusterId) =>
+        val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
+
+        for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
+          val (tweetId, score) = tweetScores(i)
+
+          if (!parseTweetId(sourceEmbeddingId).contains(tweetId) &&
+            tweetId >= earliestTweetId && tweetId <= latestTweetId) {
+            candidateScoresMap.put(
+              tweetId,
+              candidateScoresMap.getOrElse(tweetId, 0.0) + score * sourceClusterScore)
+            candidateNormalizationMap
+              .put(tweetId, candidateNormalizationMap.getOrElse(tweetId, 0.0) + score * score)
+          }
+        }
+      case _ => ()
+    }
+
+    candidateScoresStat(candidateScoresMap.size)
+
+    // Re-Rank the candidate by configuration
+    val processedCandidateScores: Seq[(TweetId, Double)] = candidateScoresMap.map {
+      case (candidateId, score) =>
+        // Enable Partial Normalization
+        val processedScore = {
+          // We applied the "log" version of partial normalization when we rank candidates
+          // by log cosine similarity
+          config.annAlgorithm match {
+            case ScoringAlgorithm.LogCosineSimilarity =>
+              score / sourceEmbedding.logNorm / math.log(1 + candidateNormalizationMap(candidateId))
+            case ScoringAlgorithm.CosineSimilarity =>
+              score / sourceEmbedding.l2norm / math.sqrt(candidateNormalizationMap(candidateId))
+            case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
+              score / math.sqrt(candidateNormalizationMap(candidateId))
+            case ScoringAlgorithm.DotProduct => score
+          }
+        }
+        candidateId -> processedScore
+    }.toSeq
+
+    processedCandidateScores
+      .filter(_._2 >= config.minScore)
+      .sortBy(-_._2)
+      .take(Math.min(config.maxNumResults, MaxNumResultsUpperBound))
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/BUILD
@ -0,0 +1,14 @@
+scala_library(
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/com/google/guava",
+        "3rdparty/jvm/com/twitter/storehaus:core",
+        "frigate/frigate-common:base",
+        "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
+        "simclusters-ann/thrift/src/main/thrift:thrift-scala",
+        "src/scala/com/twitter/simclusters_v2/common",
+        "src/scala/com/twitter/simclusters_v2/summingbird/stores",
+        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
+        "util/util-stats/src/main/scala/com/twitter/finagle/stats",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/ExperimentalApproximateCosineSimilarity.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/ExperimentalApproximateCosineSimilarity.scala
@ -0,0 +1,131 @@
+package com.twitter.simclustersann.candidate_source
+
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.thriftscala.InternalId
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
+import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.util.Duration
+import com.twitter.util.Time
+import com.google.common.collect.Comparators
+import com.twitter.simclusters_v2.common.ClusterId
+
+/**
+ * A modified version of OptimizedApproximateCosineSimilarity which uses more java streams to avoid
+ * materializing intermediate collections. Its performance is still under investigation.
+ */
+object ExperimentalApproximateCosineSimilarity extends ApproximateCosineSimilarity {
+
+  final val InitialCandidateMapSize = 16384
+  val MaxNumResultsUpperBound = 1000
+  final val MaxTweetCandidateAgeUpperBound = 175200
+
+  private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
+    embeddingId.internalId match {
+      case InternalId.TweetId(tweetId) =>
+        Some(tweetId)
+      case _ =>
+        None
+    }
+  }
+  private val CompareByScore: java.util.Comparator[(Long, Double)] =
+    new java.util.Comparator[(Long, Double)] {
+      override def compare(o1: (Long, Double), o2: (Long, Double)): Int = {
+        java.lang.Double.compare(o1._2, o2._2)
+      }
+    }
+  class Scores(var score: Double, var norm: Double)
+
+  override def apply(
+    sourceEmbedding: SimClustersEmbedding,
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    config: SimClustersANNConfig,
+    candidateScoresStat: Int => Unit,
+    clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
+    clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
+  ): Seq[ScoredTweet] = {
+    val now = Time.now
+    val earliestTweetId =
+      if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
+        0L // Disable max tweet age filter
+      else
+        SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
+    val latestTweetId =
+      SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
+
+    val candidateScoresMap = new java.util.HashMap[Long, Scores](InitialCandidateMapSize)
+    val sourceTweetId = parseTweetId(sourceEmbeddingId).getOrElse(0L)
+
+    clusterTweetsMap.foreach {
+      case (clusterId, Some(tweetScores)) =>
+        val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
+
+        for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
+          val (tweetId, score) = tweetScores(i)
+
+          if (tweetId >= earliestTweetId &&
+            tweetId <= latestTweetId &&
+            tweetId != sourceTweetId) {
+
+            val scores = candidateScoresMap.get(tweetId)
+            if (scores == null) {
+              val scorePair = new Scores(
+                score = score * sourceClusterScore,
+                norm = score * score
+              )
+              candidateScoresMap.put(tweetId, scorePair)
+            } else {
+              scores.score = scores.score + (score * sourceClusterScore)
+              scores.norm = scores.norm + (score * score)
+            }
+          }
+        }
+      case _ => ()
+    }
+
+    candidateScoresStat(candidateScoresMap.size)
+
+    val normFn: (Long, Scores) => (Long, Double) = config.annAlgorithm match {
+      case ScoringAlgorithm.LogCosineSimilarity =>
+        (candidateId: Long, score: Scores) =>
+          (
+            candidateId,
+            score.score / sourceEmbedding.logNorm / math.log(1 + score.norm)
+          )
+      case ScoringAlgorithm.CosineSimilarity =>
+        (candidateId: Long, score: Scores) =>
+          (
+            candidateId,
+            score.score / sourceEmbedding.l2norm / math.sqrt(score.norm)
+          )
+      case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
+        (candidateId: Long, score: Scores) =>
+          (
+            candidateId,
+            score.score / math.sqrt(score.norm)
+          )
+      case ScoringAlgorithm.DotProduct =>
+        (candidateId: Long, score: Scores) =>
+          (
+            candidateId,
+            score.score
+          )
+    }
+
+    import scala.collection.JavaConverters._
+
+    val topKCollector = Comparators.greatest(
+      Math.min(config.maxNumResults, MaxNumResultsUpperBound),
+      CompareByScore
+    )
+
+    candidateScoresMap
+      .entrySet().stream()
+      .map[(Long, Double)]((e: java.util.Map.Entry[Long, Scores]) => normFn(e.getKey, e.getValue))
+      .filter((s: (Long, Double)) => s._2 >= config.minScore)
+      .collect(topKCollector)
+      .asScala
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/OptimizedApproximateCosineSimilarity.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/OptimizedApproximateCosineSimilarity.scala
@ -0,0 +1,112 @@
+package com.twitter.simclustersann.candidate_source
+
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.thriftscala.InternalId
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
+import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.util.Duration
+import com.twitter.util.Time
+
+/**
+ * Compared with ApproximateCosineSimilarity, this implementation:
+ * - moves some computation aroudn to reduce allocations
+ * - uses a single hashmap to store both scores and normalization coefficients
+ * - uses some java collections in place of scala ones
+ * Testing is still in progress, but this implementation shows significant (> 2x) improvements in
+ * CPU utilization and allocations with 800 tweets per cluster.
+ */
+object OptimizedApproximateCosineSimilarity extends ApproximateCosineSimilarity {
+
+  final val InitialCandidateMapSize = 16384
+  val MaxNumResultsUpperBound = 1000
+  final val MaxTweetCandidateAgeUpperBound = 175200
+
+  private def parseTweetId(embeddingId: SimClustersEmbeddingId): Option[TweetId] = {
+    embeddingId.internalId match {
+      case InternalId.TweetId(tweetId) =>
+        Some(tweetId)
+      case _ =>
+        None
+    }
+  }
+
+  override def apply(
+    sourceEmbedding: SimClustersEmbedding,
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    config: SimClustersANNConfig,
+    candidateScoresStat: Int => Unit,
+    clusterTweetsMap: Map[ClusterId, Option[Seq[(TweetId, Double)]]] = Map.empty,
+    clusterTweetsMapArray: Map[ClusterId, Option[Array[(TweetId, Double)]]] = Map.empty
+  ): Seq[ScoredTweet] = {
+    val now = Time.now
+    val earliestTweetId =
+      if (config.maxTweetCandidateAgeHours >= MaxTweetCandidateAgeUpperBound)
+        0L // Disable max tweet age filter
+      else
+        SnowflakeId.firstIdFor(now - Duration.fromHours(config.maxTweetCandidateAgeHours))
+    val latestTweetId =
+      SnowflakeId.firstIdFor(now - Duration.fromHours(config.minTweetCandidateAgeHours))
+
+    val candidateScoresMap = new java.util.HashMap[Long, (Double, Double)](InitialCandidateMapSize)
+
+    val sourceTweetId = parseTweetId(sourceEmbeddingId).getOrElse(0L)
+
+    clusterTweetsMap.foreach {
+      case (clusterId, Some(tweetScores)) if sourceEmbedding.contains(clusterId) =>
+        val sourceClusterScore = sourceEmbedding.getOrElse(clusterId)
+
+        for (i <- 0 until Math.min(tweetScores.size, config.maxTopTweetsPerCluster)) {
+          val (tweetId, score) = tweetScores(i)
+
+          if (tweetId >= earliestTweetId &&
+            tweetId <= latestTweetId &&
+            tweetId != sourceTweetId) {
+
+            val scores = candidateScoresMap.getOrDefault(tweetId, (0.0, 0.0))
+            val newScores = (
+              scores._1 + score * sourceClusterScore,
+              scores._2 + score * score,
+            )
+            candidateScoresMap.put(tweetId, newScores)
+          }
+        }
+      case _ => ()
+    }
+
+    candidateScoresStat(candidateScoresMap.size)
+
+    val normFn: (Long, (Double, Double)) => (Long, Double) = config.annAlgorithm match {
+      case ScoringAlgorithm.LogCosineSimilarity =>
+        (candidateId: Long, score: (Double, Double)) =>
+          candidateId -> score._1 / sourceEmbedding.logNorm / math.log(1 + score._2)
+      case ScoringAlgorithm.CosineSimilarity =>
+        (candidateId: Long, score: (Double, Double)) =>
+          candidateId -> score._1 / sourceEmbedding.l2norm / math.sqrt(score._2)
+      case ScoringAlgorithm.CosineSimilarityNoSourceEmbeddingNormalization =>
+        (candidateId: Long, score: (Double, Double)) =>
+          candidateId -> score._1 / math.sqrt(score._2)
+      case ScoringAlgorithm.DotProduct =>
+        (candidateId: Long, score: (Double, Double)) => (candidateId, score._1)
+    }
+
+    val scoredTweets: java.util.ArrayList[(Long, Double)] =
+      new java.util.ArrayList(candidateScoresMap.size)
+
+    val it = candidateScoresMap.entrySet().iterator()
+    while (it.hasNext) {
+      val mapEntry = it.next()
+      val normedScore = normFn(mapEntry.getKey, mapEntry.getValue)
+      if (normedScore._2 >= config.minScore)
+        scoredTweets.add(normedScore)
+    }
+    import scala.collection.JavaConverters._
+
+    scoredTweets.asScala
+      .sortBy(-_._2)
+      .take(Math.min(config.maxNumResults, MaxNumResultsUpperBound))
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/SimClustersANNCandidateSource.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source/SimClustersANNCandidateSource.scala
@ -0,0 +1,102 @@
+package com.twitter.simclustersann.candidate_source
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.frigate.common.base.Stats
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
+import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
+import com.twitter.storehaus.ReadableStore
+import com.twitter.util.Future
+
+/**
+ * This store looks for tweets whose similarity is close to a Source SimClustersEmbeddingId.
+ *
+ * Approximate cosine similarity is the core algorithm to drive this store.
+ *
+ * Step 1 - 4 are in "fetchCandidates" method.
+ * 1. Retrieve the SimClusters Embedding by the SimClustersEmbeddingId
+ * 2. Fetch top N clusters' top tweets from the clusterTweetCandidatesStore (TopTweetsPerCluster index).
+ * 3. Calculate all the tweet candidates' dot-product or approximate cosine similarity to source tweets.
+ * 4. Take top M tweet candidates by the step 3's score
+ */
+case class SimClustersANNCandidateSource(
+  approximateCosineSimilarity: ApproximateCosineSimilarity,
+  clusterTweetCandidatesStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
+  simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
+  statsReceiver: StatsReceiver) {
+  private val stats = statsReceiver.scope(this.getClass.getName)
+  private val fetchSourceEmbeddingStat = stats.scope("fetchSourceEmbedding")
+  private val fetchCandidatesStat = stats.scope("fetchCandidates")
+  private val candidateScoresStat = stats.stat("candidateScoresMap")
+
+  def get(
+    query: SimClustersANNCandidateSource.Query
+  ): Future[Option[Seq[SimClustersANNTweetCandidate]]] = {
+    val sourceEmbeddingId = query.sourceEmbeddingId
+    val config = query.config
+    for {
+      maybeSimClustersEmbedding <- Stats.track(fetchSourceEmbeddingStat) {
+        simClustersEmbeddingStore.get(query.sourceEmbeddingId)
+      }
+      maybeFilteredCandidates <- maybeSimClustersEmbedding match {
+        case Some(sourceEmbedding) =>
+          for {
+            candidates <- Stats.trackSeq(fetchCandidatesStat) {
+              fetchCandidates(sourceEmbeddingId, sourceEmbedding, config)
+            }
+          } yield {
+            fetchCandidatesStat
+              .stat(sourceEmbeddingId.embeddingType.name, sourceEmbeddingId.modelVersion.name).add(
+                candidates.size)
+            Some(candidates)
+          }
+        case None =>
+          fetchCandidatesStat
+            .stat(sourceEmbeddingId.embeddingType.name, sourceEmbeddingId.modelVersion.name).add(0)
+          Future.None
+      }
+    } yield {
+      maybeFilteredCandidates
+    }
+  }
+
+  private def fetchCandidates(
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    sourceEmbedding: SimClustersEmbedding,
+    config: SimClustersANNConfig
+  ): Future[Seq[SimClustersANNTweetCandidate]] = {
+
+    val clusterIds =
+      sourceEmbedding
+        .truncate(config.maxScanClusters).getClusterIds()
+        .toSet
+
+    Future
+      .collect {
+        clusterTweetCandidatesStore.multiGet(clusterIds)
+      }.map { clusterTweetsMap =>
+        approximateCosineSimilarity(
+          sourceEmbedding = sourceEmbedding,
+          sourceEmbeddingId = sourceEmbeddingId,
+          config = config,
+          candidateScoresStat = (i: Int) => candidateScoresStat.add(i),
+          clusterTweetsMap = clusterTweetsMap
+        ).map {
+          case (tweetId, score) =>
+            SimClustersANNTweetCandidate(
+              tweetId = tweetId,
+              score = score
+            )
+        }
+      }
+  }
+}
+
+object SimClustersANNCandidateSource {
+  case class Query(
+    sourceEmbeddingId: SimClustersEmbeddingId,
+    config: SimClustersANNConfig)
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common/BUILD
@ -0,0 +1,5 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    tags = ["bazel-compatible"],
+    dependencies = [],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common/FlagNames.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common/FlagNames.scala
@ -0,0 +1,31 @@
+package com.twitter.simclustersann.common
+
+object FlagNames {
+
+  /**
+   * Global Settings
+   */
+  final val ServiceTimeout = "service.timeout"
+  final val DarkTrafficFilterDeciderKey = "thrift.dark.traffic.filter.decider_key"
+
+  /**
+   * Cache Setting
+   */
+  final val CacheDest = "cache_module.dest"
+  final val CacheTimeout = "cache_module.timeout"
+  // Only turn on the async update when the SANN Cluster has the production taffic.
+  final val CacheAsyncUpdate = "cache_module.async_update"
+
+  /**
+   * Warmup Settings
+   */
+  final val DisableWarmup = "warmup.disable"
+  final val NumberOfThreads = "warmup.thread_number"
+  final val RateLimiterQPS = "warmup.rate_limiter_qps"
+
+  /**
+   * Algorithm Parameters
+   */
+  final val MaxTopTweetPerCluster = "sim_clusters.ann.max_top_tweets_per_cluster"
+
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers/BUILD
@ -0,0 +1,29 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/com/google/inject:guice",
+        "3rdparty/jvm/javax/inject:javax.inject",
+        "3rdparty/jvm/net/codingwell:scala-guice",
+        "decider/src/main/scala",
+        "finagle/finagle-core/src/main",
+        "finatra/inject/inject-core/src/main/scala",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift/exceptions",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift/filters",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift/modules",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift/response",
+        "finatra/thrift/src/main/scala/com/twitter/finatra/thrift/routing",
+        "representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
+        "scrooge/scrooge-core/src/main/scala",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters",
+        "simclusters-ann/thrift/src/main/thrift:thrift-scala",
+        "src/scala/com/twitter/simclusters_v2/candidate_source",
+        "twitter-server/server/src/main/scala",
+        "util/util-core:scala",
+        "util/util-slf4j-api/src/main/scala",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers/SimClustersANNController.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/controllers/SimClustersANNController.scala
@ -0,0 +1,80 @@
+package com.twitter.simclustersann.controllers
+
+import com.twitter.conversions.DurationOps._
+import com.twitter.finatra.thrift.Controller
+import com.twitter.simclustersann.thriftscala.SimClustersANNService.GetTweetCandidates
+import com.twitter.simclustersann.thriftscala.SimClustersANNService
+import com.twitter.simclustersann.thriftscala.Query
+import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
+import com.twitter.scrooge.Request
+import com.twitter.scrooge.Response
+import javax.inject.Inject
+import com.twitter.finagle.Service
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.inject.annotations.Flag
+import com.twitter.simclustersann.candidate_source.{
+  SimClustersANNCandidateSource => SANNSimClustersANNCandidateSource
+}
+import com.twitter.simclustersann.common.FlagNames
+import com.twitter.simclustersann.filters.GetTweetCandidatesResponseStatsFilter
+import com.twitter.simclustersann.filters.SimClustersAnnVariantFilter
+import com.twitter.util.Future
+import com.twitter.util.JavaTimer
+import com.twitter.util.Timer
+
+class SimClustersANNController @Inject() (
+  @Flag(FlagNames.ServiceTimeout) serviceTimeout: Int,
+  variantFilter: SimClustersAnnVariantFilter,
+  getTweetCandidatesResponseStatsFilter: GetTweetCandidatesResponseStatsFilter,
+  sannCandidateSource: SANNSimClustersANNCandidateSource,
+  globalStats: StatsReceiver)
+    extends Controller(SimClustersANNService) {
+
+  import SimClustersANNController._
+
+  private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
+  private val timer: Timer = new JavaTimer(true)
+
+  val filteredService: Service[Request[GetTweetCandidates.Args], Response[
+    Seq[SimClustersANNTweetCandidate]
+  ]] = {
+    variantFilter
+      .andThen(getTweetCandidatesResponseStatsFilter)
+      .andThen(Service.mk(handler))
+  }
+
+  handle(GetTweetCandidates).withService(filteredService)
+
+  private def handler(
+    request: Request[GetTweetCandidates.Args]
+  ): Future[Response[Seq[SimClustersANNTweetCandidate]]] = {
+    val query: Query = request.args.query
+    val simClustersANNCandidateSourceQuery = SANNSimClustersANNCandidateSource.Query(
+      sourceEmbeddingId = query.sourceEmbeddingId,
+      config = query.config
+    )
+
+    val result = sannCandidateSource
+      .get(simClustersANNCandidateSourceQuery).map {
+        case Some(tweetCandidatesSeq) =>
+          Response(tweetCandidatesSeq.map { tweetCandidate =>
+            SimClustersANNTweetCandidate(
+              tweetId = tweetCandidate.tweetId,
+              score = tweetCandidate.score
+            )
+          })
+        case None =>
+          DefaultResponse
+      }
+
+    result.raiseWithin(serviceTimeout.milliseconds)(timer).rescue {
+      case e: Throwable =>
+        stats.scope("failures").counter(e.getClass.getCanonicalName).incr()
+        Future.value(DefaultResponse)
+    }
+  }
+}
+
+object SimClustersANNController {
+  val DefaultResponse: Response[Seq[SimClustersANNTweetCandidate]] = Response(Seq.empty)
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/BUILD
@ -0,0 +1,12 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "finatra-internal/mtls-thriftmux/src/main/scala",
+        "finatra-internal/thrift/src/main/thrift:thrift-scala",
+        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/InvalidRequestForSimClustersAnnVariantException.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/InvalidRequestForSimClustersAnnVariantException.scala
@ -0,0 +1,16 @@
+package com.twitter.simclustersann.exceptions
+
+import com.twitter.finagle.RequestException
+import com.twitter.simclusters_v2.thriftscala.EmbeddingType
+import com.twitter.simclusters_v2.thriftscala.ModelVersion
+
+case class InvalidRequestForSimClustersAnnVariantException(
+  modelVersion: ModelVersion,
+  embeddingType: EmbeddingType,
+  actualServiceName: String,
+  expectedServiceName: Option[String])
+    extends RequestException(
+      s"Request with model version ($modelVersion) and embedding type ($embeddingType) cannot be " +
+        s"processed by service variant ($actualServiceName)." +
+        s" Expected service variant: $expectedServiceName.",
+      null)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/InvalidRequestForSimClustersAnnVariantExceptionMapper.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/InvalidRequestForSimClustersAnnVariantExceptionMapper.scala
@ -0,0 +1,27 @@
+package com.twitter.simclustersann.exceptions
+
+import com.twitter.finatra.thrift.exceptions.ExceptionMapper
+import com.twitter.finatra.thrift.thriftscala.ClientError
+import com.twitter.finatra.thrift.thriftscala.ClientErrorCause
+import com.twitter.util.Future
+import com.twitter.util.logging.Logging
+import javax.inject.Singleton
+
+/**
+ * An exception mapper designed to handle
+ * [[com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantException]]
+ * by returning a Thrift IDL defined Client Error.
+ */
+@Singleton
+class InvalidRequestForSimClustersAnnVariantExceptionMapper
+    extends ExceptionMapper[InvalidRequestForSimClustersAnnVariantException, Nothing]
+    with Logging {
+
+  override def handleException(
+    throwable: InvalidRequestForSimClustersAnnVariantException
+  ): Future[Nothing] = {
+    error("Invalid Request For SimClusters Ann Variant Exception", throwable)
+
+    Future.exception(ClientError(ClientErrorCause.BadRequest, throwable.getMessage()))
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/MissingClusterConfigForSimClustersAnnVariantException.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions/MissingClusterConfigForSimClustersAnnVariantException.scala
@ -0,0 +1,6 @@
+package com.twitter.simclustersann.exceptions
+
+case class MissingClusterConfigForSimClustersAnnVariantException(sannServiceName: String)
+    extends IllegalStateException(
+      s"No cluster configuration found for service ($sannServiceName)",
+      null)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/BUILD
@ -0,0 +1,13 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "finatra/inject/inject-app/src/main/java/com/twitter/inject/annotations",
+        "finatra/inject/inject-core/src/main/scala",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster",
+        "scrooge/scrooge-core/src/main/scala",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions",
+        "simclusters-ann/thrift/src/main/thrift:thrift-scala",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/GetTweetCandidatesResponseStatsFilter.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/GetTweetCandidatesResponseStatsFilter.scala
@ -0,0 +1,43 @@
+package com.twitter.simclustersann.filters
+
+import com.twitter.finagle.Service
+import com.twitter.finagle.SimpleFilter
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.scrooge.Request
+import com.twitter.scrooge.Response
+import com.twitter.simclustersann.thriftscala.SimClustersANNService
+import com.twitter.util.Future
+import javax.inject.Inject
+import javax.inject.Singleton
+
+@Singleton
+class GetTweetCandidatesResponseStatsFilter @Inject() (
+  statsReceiver: StatsReceiver)
+    extends SimpleFilter[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
+      SimClustersANNService.GetTweetCandidates.SuccessType
+    ]] {
+
+  private[this] val stats = statsReceiver.scope("method_response_stats").scope("getTweetCandidates")
+  private[this] val candidateScoreStats = stats.stat("candidate_score_x1000")
+  private[this] val emptyResponseCounter = stats.counter("empty")
+  private[this] val nonEmptyResponseCounter = stats.counter("non_empty")
+  override def apply(
+    request: Request[SimClustersANNService.GetTweetCandidates.Args],
+    service: Service[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
+      SimClustersANNService.GetTweetCandidates.SuccessType
+    ]]
+  ): Future[Response[SimClustersANNService.GetTweetCandidates.SuccessType]] = {
+    val response = service(request)
+
+    response.onSuccess { successResponse =>
+      if (successResponse.value.size == 0)
+        emptyResponseCounter.incr()
+      else
+        nonEmptyResponseCounter.incr()
+      successResponse.value.foreach { candidate =>
+        candidateScoreStats.add(candidate.score.toFloat * 1000)
+      }
+    }
+    response
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/SimClustersAnnVariantFilter.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/filters/SimClustersAnnVariantFilter.scala
@ -0,0 +1,53 @@
+package com.twitter.simclustersann.filters
+
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.finagle.Service
+import com.twitter.finagle.SimpleFilter
+import com.twitter.relevance_platform.simclustersann.multicluster.ServiceNameMapper
+import com.twitter.scrooge.Request
+import com.twitter.scrooge.Response
+import com.twitter.simclustersann.exceptions.InvalidRequestForSimClustersAnnVariantException
+import com.twitter.simclustersann.thriftscala.SimClustersANNService
+import com.twitter.util.Future
+import javax.inject.Inject
+import javax.inject.Singleton
+
+@Singleton
+class SimClustersAnnVariantFilter @Inject() (
+  serviceNameMapper: ServiceNameMapper,
+  serviceIdentifier: ServiceIdentifier,
+) extends SimpleFilter[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
+      SimClustersANNService.GetTweetCandidates.SuccessType
+    ]] {
+  override def apply(
+    request: Request[SimClustersANNService.GetTweetCandidates.Args],
+    service: Service[Request[SimClustersANNService.GetTweetCandidates.Args], Response[
+      SimClustersANNService.GetTweetCandidates.SuccessType
+    ]]
+  ): Future[Response[SimClustersANNService.GetTweetCandidates.SuccessType]] = {
+
+    validateRequest(request)
+    service(request)
+  }
+
+  private def validateRequest(
+    request: Request[SimClustersANNService.GetTweetCandidates.Args]
+  ): Unit = {
+    val modelVersion = request.args.query.sourceEmbeddingId.modelVersion
+    val embeddingType = request.args.query.config.candidateEmbeddingType
+
+    val actualServiceName = serviceIdentifier.service
+
+    val expectedServiceName = serviceNameMapper.getServiceName(modelVersion, embeddingType)
+
+    expectedServiceName match {
+      case Some(name) if name == actualServiceName => ()
+      case _ =>
+        throw InvalidRequestForSimClustersAnnVariantException(
+          modelVersion,
+          embeddingType,
+          actualServiceName,
+          expectedServiceName)
+    }
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/BUILD
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/BUILD
@ -0,0 +1,24 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
+        "finagle/finagle-stats",
+        "finatra/inject/inject-core/src/main/scala",
+        "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
+        "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
+        "relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster",
+        "representation-manager/client/src/main/scala/com/twitter/representation_manager",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/candidate_source",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/common",
+        "simclusters-ann/server/src/main/scala/com/twitter/simclustersann/exceptions",
+        "simclusters-ann/thrift/src/main/thrift:thrift-scala",
+        "src/scala/com/twitter/simclusters_v2/common",
+        "src/scala/com/twitter/simclusters_v2/summingbird",
+        "src/scala/com/twitter/storehaus_internal/memcache",
+        "src/scala/com/twitter/storehaus_internal/util",
+        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
+    ],
+)
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/CacheModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/CacheModule.scala
@ -0,0 +1,34 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.finagle.memcached.Client
+import javax.inject.Singleton
+import com.twitter.conversions.DurationOps._
+import com.twitter.inject.TwitterModule
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.inject.annotations.Flag
+import com.twitter.simclustersann.common.FlagNames
+import com.twitter.storehaus_internal.memcache.MemcacheStore
+import com.twitter.storehaus_internal.util.ClientName
+import com.twitter.storehaus_internal.util.ZkEndPoint
+
+object CacheModule extends TwitterModule {
+
+  @Singleton
+  @Provides
+  def providesCache(
+    @Flag(FlagNames.CacheDest) cacheDest: String,
+    @Flag(FlagNames.CacheTimeout) cacheTimeout: Int,
+    serviceIdentifier: ServiceIdentifier,
+    stats: StatsReceiver
+  ): Client =
+    MemcacheStore.memcachedClient(
+      name = ClientName("memcache_simclusters_ann"),
+      dest = ZkEndPoint(cacheDest),
+      timeout = cacheTimeout.milliseconds,
+      retries = 0,
+      statsReceiver = stats.scope("cache_client"),
+      serviceIdentifier = serviceIdentifier
+    )
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterConfigMapperModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterConfigMapperModule.scala
@ -0,0 +1,15 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.inject.TwitterModule
+import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfigMapper
+import javax.inject.Singleton
+
+object ClusterConfigMapperModule extends TwitterModule {
+  @Singleton
+  @Provides
+  def providesClusterConfigMapper(
+  ): ClusterConfigMapper = {
+    ClusterConfigMapper
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterConfigModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterConfigModule.scala
@ -0,0 +1,25 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.inject.TwitterModule
+import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfig
+import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfigMapper
+import com.twitter.simclustersann.exceptions.MissingClusterConfigForSimClustersAnnVariantException
+import javax.inject.Singleton
+
+object ClusterConfigModule extends TwitterModule {
+  @Singleton
+  @Provides
+  def providesClusterConfig(
+    serviceIdentifier: ServiceIdentifier,
+    clusterConfigMapper: ClusterConfigMapper
+  ): ClusterConfig = {
+    val serviceName = serviceIdentifier.service
+
+    clusterConfigMapper.getClusterConfig(serviceName) match {
+      case Some(config) => config
+      case None => throw MissingClusterConfigForSimClustersAnnVariantException(serviceName)
+    }
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterTweetIndexProviderModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ClusterTweetIndexProviderModule.scala
@ -0,0 +1,95 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.conversions.DurationOps._
+import com.twitter.decider.Decider
+import com.twitter.finagle.memcached.Client
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.hermit.store.common.ObservedCachedReadableStore
+import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
+import com.twitter.inject.TwitterModule
+import com.twitter.inject.annotations.Flag
+import com.twitter.relevance_platform.common.injection.LZ4Injection
+import com.twitter.relevance_platform.common.injection.SeqObjectInjection
+import com.twitter.relevance_platform.simclustersann.multicluster.ClusterConfig
+import com.twitter.relevance_platform.simclustersann.multicluster.ClusterTweetIndexStoreConfig
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.ModelVersions
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.summingbird.stores.ClusterKey
+import com.twitter.simclusters_v2.summingbird.stores.TopKTweetsForClusterKeyReadableStore
+import com.twitter.simclusters_v2.thriftscala.EmbeddingType
+import com.twitter.simclustersann.common.FlagNames
+import com.twitter.storehaus.ReadableStore
+
+import javax.inject.Singleton
+
+object ClusterTweetIndexProviderModule extends TwitterModule {
+
+  @Singleton
+  @Provides
+  // Provides ClusterTweetIndex Store based on different maxResults settings on the same store
+  // Create a different provider if index is in a different store
+  def providesClusterTweetIndex(
+    @Flag(FlagNames.MaxTopTweetPerCluster) maxTopTweetPerCluster: Int,
+    @Flag(FlagNames.CacheAsyncUpdate) asyncUpdate: Boolean,
+    clusterConfig: ClusterConfig,
+    serviceIdentifier: ServiceIdentifier,
+    stats: StatsReceiver,
+    decider: Decider,
+    simClustersANNCacheClient: Client
+  ): ReadableStore[ClusterId, Seq[(TweetId, Double)]] = {
+    // Build the underling cluster-to-tweet store
+    val topTweetsForClusterStore = clusterConfig.clusterTweetIndexStoreConfig match {
+      // If the config returns Manhattan tweet index config, we read from a RO MH store
+      case manhattanConfig: ClusterTweetIndexStoreConfig.Manhattan =>
+        TopKTweetsForClusterKeyReadableStore.getClusterToTopKTweetsStoreFromManhattanRO(
+          maxTopTweetPerCluster,
+          manhattanConfig,
+          serviceIdentifier)
+      case memCacheConfig: ClusterTweetIndexStoreConfig.Memcached =>
+        TopKTweetsForClusterKeyReadableStore.getClusterToTopKTweetsStoreFromMemCache(
+          maxTopTweetPerCluster,
+          memCacheConfig,
+          serviceIdentifier)
+      case _ =>
+        // Bad instance
+        ReadableStore.empty
+    }
+
+    val embeddingType: EmbeddingType = clusterConfig.candidateTweetEmbeddingType
+    val modelVersion: String = ModelVersions.toKnownForModelVersion(clusterConfig.modelVersion)
+
+    val store: ReadableStore[ClusterId, Seq[(TweetId, Double)]] =
+      topTweetsForClusterStore.composeKeyMapping { id: ClusterId =>
+        ClusterKey(id, modelVersion, embeddingType)
+      }
+
+    val memcachedTopTweetsForClusterStore =
+      ObservedMemcachedReadableStore.fromCacheClient(
+        backingStore = store,
+        cacheClient = simClustersANNCacheClient,
+        ttl = 15.minutes,
+        asyncUpdate = asyncUpdate
+      )(
+        valueInjection = LZ4Injection.compose(SeqObjectInjection[(Long, Double)]()),
+        statsReceiver = stats.scope("cluster_tweet_index_mem_cache"),
+        keyToString = { k =>
+          // prod cache key : SimClusters_LZ4/cluster_to_tweet/clusterId_embeddingType_modelVersion
+          s"scz:c2t:${k}_${embeddingType}_${modelVersion}_$maxTopTweetPerCluster"
+        }
+      )
+
+    val cachedStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]] = {
+      ObservedCachedReadableStore.from[ClusterId, Seq[(TweetId, Double)]](
+        memcachedTopTweetsForClusterStore,
+        ttl = 10.minute,
+        maxKeys = 150000,
+        cacheName = "cluster_tweet_index_cache",
+        windowSize = 10000L
+      )(stats.scope("cluster_tweet_index_store"))
+    }
+    cachedStore
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/CustomMtlsThriftWebFormsModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/CustomMtlsThriftWebFormsModule.scala
@ -0,0 +1,99 @@
+package com.twitter.simclustersann.modules
+
+import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
+import com.twitter.finatra.thrift.ThriftServer
+import com.twitter.simclusters_v2.thriftscala.EmbeddingType
+import com.twitter.simclusters_v2.thriftscala.InternalId
+import com.twitter.simclusters_v2.thriftscala.ModelVersion
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.thriftwebforms.MethodOptions
+import com.twitter.thriftwebforms.view.ServiceResponseView
+import com.twitter.util.Future
+import com.twitter.simclustersann.thriftscala.SimClustersANNTweetCandidate
+import com.twitter.simclustersann.thriftscala.Query
+import com.twitter.simclustersann.thriftscala.SimClustersANNConfig
+import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
+import com.twitter.thriftwebforms.MethodOptions.Access
+import scala.reflect.ClassTag
+import com.twitter.simclustersann.thriftscala.SimClustersANNService
+import scala.collection.mutable
+
+class CustomMtlsThriftWebFormsModule[T: ClassTag](server: ThriftServer)
+    extends MtlsThriftWebFormsModule[T](server: ThriftServer) {
+
+  private val Nbsp = "&nbsp;"
+  private val LdapGroups = Seq("recosplat-sensitive-data-medium", "simclusters-ann-admins")
+
+  override protected def methodOptions: Map[String, MethodOptions] = {
+    val tweetId = 1568796529690902529L
+    val sannDefaultQuery = SimClustersANNService.GetTweetCandidates.Args(
+      query = Query(
+        sourceEmbeddingId = SimClustersEmbeddingId(
+          embeddingType = EmbeddingType.LogFavLongestL2EmbeddingTweet,
+          modelVersion = ModelVersion.Model20m145k2020,
+          internalId = InternalId.TweetId(tweetId)
+        ),
+        config = SimClustersANNConfig(
+          maxNumResults = 10,
+          minScore = 0.0,
+          candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
+          maxTopTweetsPerCluster = 400,
+          maxScanClusters = 50,
+          maxTweetCandidateAgeHours = 24,
+          minTweetCandidateAgeHours = 0,
+          annAlgorithm = ScoringAlgorithm.CosineSimilarity
+        )
+      ))
+
+    Seq("getTweetCandidates")
+      .map(
+        _ -> MethodOptions(
+          defaultRequestValue = Some(sannDefaultQuery),
+          responseRenderers = Seq(renderTimeline),
+          allowedAccessOverride = Some(Access.ByLdapGroup(LdapGroups))
+        )).toMap
+  }
+
+  val FullAccessLdapGroups: Seq[String] =
+    Seq(
+      "recosplat-sensitive-data-medium",
+      "simclusters-ann-admins",
+      "recos-platform-admins"
+    )
+
+  override protected def defaultMethodAccess: MethodOptions.Access = {
+    MethodOptions.Access.ByLdapGroup(FullAccessLdapGroups)
+  }
+
+  def renderTimeline(r: AnyRef): Future[ServiceResponseView] = {
+    val simClustersANNTweetCandidates = r match {
+      case response: Iterable[_] =>
+        response.map(x => x.asInstanceOf[SimClustersANNTweetCandidate]).toSeq
+      case _ => Seq()
+    }
+    renderTweets(simClustersANNTweetCandidates)
+  }
+
+  private def renderTweets(
+    simClustersANNTweetCandidates: Seq[SimClustersANNTweetCandidate]
+  ): Future[ServiceResponseView] = {
+    val htmlSb = new mutable.StringBuilder()
+    val headerHtml = s"""<h3>Tweet Candidates</h3>"""
+    val tweetsHtml = simClustersANNTweetCandidates.map { simClustersANNTweetCandidate =>
+      val tweetId = simClustersANNTweetCandidate.tweetId
+      val score = simClustersANNTweetCandidate.score
+      s"""<blockquote class="twitter-tweet"><a href="https://twitter.com/tweet/statuses/$tweetId"></a></blockquote> <b>score:</b> $score <br><br>"""
+    }.mkString
+
+    htmlSb ++= headerHtml
+    htmlSb ++= Nbsp
+    htmlSb ++= tweetsHtml
+    Future.value(
+      ServiceResponseView(
+        "SimClusters ANN Tweet Candidates",
+        htmlSb.toString(),
+        Seq("//platform.twitter.com/widgets.js")
+      )
+    )
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/EmbeddingStoreModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/EmbeddingStoreModule.scala
@ -0,0 +1,110 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.decider.Decider
+import com.twitter.finagle.memcached.{Client => MemcachedClient}
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.inject.TwitterModule
+import com.twitter.representation_manager.StoreBuilder
+import com.twitter.representation_manager.config.{
+  DefaultClientConfig => RepresentationManagerDefaultClientConfig
+}
+import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
+import com.twitter.simclusters_v2.thriftscala.EmbeddingType
+import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
+import com.twitter.simclusters_v2.thriftscala.ModelVersion
+import com.twitter.simclusters_v2.thriftscala.ModelVersion._
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.storehaus.ReadableStore
+import com.twitter.strato.client.{Client => StratoClient}
+import javax.inject.Singleton
+
+object EmbeddingStoreModule extends TwitterModule {
+
+  val TweetEmbeddings: Set[SimClustersEmbeddingView] = Set(
+    SimClustersEmbeddingView(LogFavLongestL2EmbeddingTweet, Model20m145kUpdated),
+    SimClustersEmbeddingView(LogFavLongestL2EmbeddingTweet, Model20m145k2020)
+  )
+
+  val UserEmbeddings: Set[SimClustersEmbeddingView] = Set(
+    // KnownFor
+    SimClustersEmbeddingView(FavBasedProducer, Model20m145kUpdated),
+    SimClustersEmbeddingView(FavBasedProducer, Model20m145k2020),
+    SimClustersEmbeddingView(FollowBasedProducer, Model20m145k2020),
+    SimClustersEmbeddingView(AggregatableLogFavBasedProducer, Model20m145k2020),
+    // InterestedIn
+    SimClustersEmbeddingView(UnfilteredUserInterestedIn, Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(
+      LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
+      Model20m145k2020),
+    SimClustersEmbeddingView(UserNextInterestedIn, Model20m145k2020),
+    SimClustersEmbeddingView(LogFavBasedUserInterestedInFromAPE, Model20m145k2020)
+  )
+
+  @Singleton
+  @Provides
+  def providesEmbeddingStore(
+    stratoClient: StratoClient,
+    memCachedClient: MemcachedClient,
+    decider: Decider,
+    stats: StatsReceiver
+  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
+
+    val rmsStoreBuilder = new StoreBuilder(
+      clientConfig = RepresentationManagerDefaultClientConfig,
+      stratoClient = stratoClient,
+      memCachedClient = memCachedClient,
+      globalStats = stats,
+    )
+
+    val underlyingStores: Map[
+      (EmbeddingType, ModelVersion),
+      ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
+    ] = {
+      val tweetEmbeddingStores: Map[
+        (EmbeddingType, ModelVersion),
+        ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
+      ] = TweetEmbeddings
+        .map(embeddingView =>
+          (
+            (embeddingView.embeddingType, embeddingView.modelVersion),
+            rmsStoreBuilder
+              .buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey(embeddingView))).toMap
+
+      val userEmbeddingStores: Map[
+        (EmbeddingType, ModelVersion),
+        ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
+      ] = UserEmbeddings
+        .map(embeddingView =>
+          (
+            (embeddingView.embeddingType, embeddingView.modelVersion),
+            rmsStoreBuilder
+              .buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey(embeddingView))).toMap
+
+      tweetEmbeddingStores ++ userEmbeddingStores
+    }
+
+    SimClustersEmbeddingStore.buildWithDecider(
+      underlyingStores = underlyingStores,
+      decider = decider,
+      statsReceiver = stats
+    )
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/FlagsModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/FlagsModule.scala
@ -0,0 +1,44 @@
+package com.twitter.simclustersann.modules
+
+import com.twitter.inject.TwitterModule
+import com.twitter.simclustersann.common.FlagNames
+
+object FlagsModule extends TwitterModule {
+
+  flag[Int](
+    name = FlagNames.ServiceTimeout,
+    default = 40,
+    help = "The threshold of Request Timeout"
+  )
+
+  flag[String](
+    name = FlagNames.DarkTrafficFilterDeciderKey,
+    default = "dark_traffic_filter",
+    help = "Dark traffic filter decider key"
+  )
+
+  flag[String](
+    name = FlagNames.CacheDest,
+    default = "/s/cache/content_recommender_unified_v2",
+    help = "Path to memcache service. Currently using CR uniform scoring cache"
+  )
+
+  flag[Int](
+    name = FlagNames.CacheTimeout,
+    default = 15,
+    help = "The threshold of MemCache Timeout"
+  )
+
+  flag[Boolean](
+    name = FlagNames.CacheAsyncUpdate,
+    default = false,
+    help = "Whether to enable the async update for the MemCache"
+  )
+
+  flag[Int](
+    name = FlagNames.MaxTopTweetPerCluster,
+    default = 200,
+    help = "Maximum number of tweets to take per each simclusters"
+  )
+
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/FuturePoolProvider.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/FuturePoolProvider.scala
@ -0,0 +1,27 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.inject.TwitterModule
+import com.twitter.inject.annotations.Flag
+import com.twitter.simclustersann.common.FlagNames.NumberOfThreads
+import com.twitter.util.ExecutorServiceFuturePool
+import java.util.concurrent.Executors
+import javax.inject.Singleton
+object FuturePoolProvider extends TwitterModule {
+  flag[Int](
+    name = NumberOfThreads,
+    default = 20,
+    help = "The number of threads in the future pool."
+  )
+
+  @Singleton
+  @Provides
+  def providesFuturePool(
+    @Flag(NumberOfThreads) numberOfThreads: Int
+  ): ExecutorServiceFuturePool = {
+    val threadPool = Executors.newFixedThreadPool(numberOfThreads)
+    new ExecutorServiceFuturePool(threadPool) {
+      override def toString: String = s"warmup-future-pool-$executor)"
+    }
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/RateLimiterModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/RateLimiterModule.scala
@ -0,0 +1,23 @@
+package com.twitter.simclustersann.modules
+
+import com.google.common.util.concurrent.RateLimiter
+import com.google.inject.Provides
+import com.twitter.inject.TwitterModule
+import com.twitter.inject.annotations.Flag
+import com.twitter.simclustersann.common.FlagNames.RateLimiterQPS
+import javax.inject.Singleton
+
+object RateLimiterModule extends TwitterModule {
+  flag[Int](
+    name = RateLimiterQPS,
+    default = 1000,
+    help = "The QPS allowed by the rate limiter."
+  )
+
+  @Singleton
+  @Provides
+  def providesRateLimiter(
+    @Flag(RateLimiterQPS) rateLimiterQps: Int
+  ): RateLimiter =
+    RateLimiter.create(rateLimiterQps)
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ServiceNameMapperModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/ServiceNameMapperModule.scala
@ -0,0 +1,15 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.inject.TwitterModule
+import com.twitter.relevance_platform.simclustersann.multicluster.ServiceNameMapper
+import javax.inject.Singleton
+
+object ServiceNameMapperModule extends TwitterModule {
+  @Singleton
+  @Provides
+  def providesServiceNameMapper(
+  ): ServiceNameMapper = {
+    ServiceNameMapper
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/SimClustersANNCandidateSourceModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/SimClustersANNCandidateSourceModule.scala
@ -0,0 +1,47 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.inject.TwitterModule
+import com.twitter.simclusters_v2.common.ClusterId
+import com.twitter.simclusters_v2.common.SimClustersEmbedding
+import com.twitter.simclusters_v2.common.TweetId
+import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
+import com.twitter.storehaus.ReadableStore
+import javax.inject.Singleton
+import com.twitter.simclustersann.candidate_source.ApproximateCosineSimilarity
+import com.twitter.simclustersann.candidate_source.ExperimentalApproximateCosineSimilarity
+import com.twitter.simclustersann.candidate_source.OptimizedApproximateCosineSimilarity
+import com.twitter.simclustersann.candidate_source.SimClustersANNCandidateSource
+
+object SimClustersANNCandidateSourceModule extends TwitterModule {
+
+  val acsFlag = flag[String](
+    name = "approximate_cosine_similarity",
+    default = "original",
+    help =
+      "Select different implementations of the approximate cosine similarity algorithm, for testing optimizations",
+  )
+  @Singleton
+  @Provides
+  def provides(
+    embeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
+    cachedClusterTweetIndexStore: ReadableStore[ClusterId, Seq[(TweetId, Double)]],
+    statsReceiver: StatsReceiver
+  ): SimClustersANNCandidateSource = {
+
+    val approximateCosineSimilarity = acsFlag() match {
+      case "original" => ApproximateCosineSimilarity
+      case "optimized" => OptimizedApproximateCosineSimilarity
+      case "experimental" => ExperimentalApproximateCosineSimilarity
+      case _ => ApproximateCosineSimilarity
+    }
+
+    new SimClustersANNCandidateSource(
+      approximateCosineSimilarity = approximateCosineSimilarity,
+      clusterTweetCandidatesStore = cachedClusterTweetIndexStore,
+      simClustersEmbeddingStore = embeddingStore,
+      statsReceiver = statsReceiver.scope("simClustersANNCandidateSource")
+    )
+  }
+}
--- a/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/StratoClientProviderModule.scala
+++ b/simclusters-ann/server/src/main/scala/com/twitter/simclustersann/modules/StratoClientProviderModule.scala
@ -0,0 +1,20 @@
+package com.twitter.simclustersann.modules
+
+import com.google.inject.Provides
+import javax.inject.Singleton
+import com.twitter.inject.TwitterModule
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.strato.client.Client
+import com.twitter.strato.client.Strato
+
+object StratoClientProviderModule extends TwitterModule {
+
+  @Singleton
+  @Provides
+  def providesCache(
+    serviceIdentifier: ServiceIdentifier,
+  ): Client = Strato.client
+    .withMutualTls(serviceIdentifier)
+    .build()
+
+}