Twitter Recommendation Algorithm

Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
This commit is contained in:
twitter-team
2023-03-31 17:36:31 -05:00
commit ef4c5eb65e
5364 changed files with 460239 additions and 0 deletions

View File

@ -0,0 +1,8 @@
[package]
name = "bpr_thrift"
description = "Thrift parser for Batch Prediction Request"
version = "0.1.0"
edition = "2021"
[dependencies]
thrift = "0.17.0"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,78 @@
// A feature value can be one of these
enum FeatureVal {
Empty,
U8Vector(Vec<u8>),
FloatVector(Vec<f32>),
}
// A Feture has a name and a value
// The name for now is 'id' of type string
// Eventually this needs to be flexible - example to accomodate feature-id
struct Feature {
id: String,
val: FeatureVal,
}
impl Feature {
fn new() -> Feature {
Feature {
id: String::new(),
val: FeatureVal::Empty
}
}
}
// A single inference record will have multiple features
struct Record {
fields: Vec<Feature>,
}
impl Record {
fn new() -> Record {
Record { fields: vec![] }
}
}
// This is the main API used by external components
// Given a serialized input, decode it into Records
fn decode(input: Vec<u8>) -> Vec<Record> {
// For helping define the interface
vec![get_random_record(), get_random_record()]
}
// Used for testing the API, will be eventually removed
fn get_random_record() -> Record {
let mut record: Record = Record::new();
let f1: Feature = Feature {
id: String::from("continuous_features"),
val: FeatureVal::FloatVector(vec![1.0f32; 2134]),
};
record.fields.push(f1);
let f2: Feature = Feature {
id: String::from("user_embedding"),
val: FeatureVal::FloatVector(vec![2.0f32; 200]),
};
record.fields.push(f2);
let f3: Feature = Feature {
id: String::from("author_embedding"),
val: FeatureVal::FloatVector(vec![3.0f32; 200]),
};
record.fields.push(f3);
let f4: Feature = Feature {
id: String::from("binary_features"),
val: FeatureVal::U8Vector(vec![4u8; 43]),
};
record.fields.push(f4);
record
}

View File

@ -0,0 +1,4 @@
pub mod prediction_service;
pub mod data;
pub mod tensor;

View File

@ -0,0 +1,81 @@
use std::collections::BTreeSet;
use std::collections::BTreeMap;
use bpr_thrift::data::DataRecord;
use bpr_thrift::prediction_service::BatchPredictionRequest;
use thrift::OrderedFloat;
use thrift::protocol::TBinaryInputProtocol;
use thrift::protocol::TSerializable;
use thrift::transport::TBufferChannel;
use thrift::Result;
fn main() {
let data_path = "/tmp/current/timelines/output-1";
let bin_data: Vec<u8> = std::fs::read(data_path).expect("Could not read file!");
println!("Length : {}", bin_data.len());
let mut bc = TBufferChannel::with_capacity(bin_data.len(), 0);
bc.set_readable_bytes(&bin_data);
let mut protocol = TBinaryInputProtocol::new(bc, true);
let result: Result<BatchPredictionRequest> =
BatchPredictionRequest::read_from_in_protocol(&mut protocol);
match result {
Ok(bpr) => logBP(bpr),
Err(err) => println!("Error {}", err),
}
}
fn logBP(bpr: BatchPredictionRequest) {
println!("-------[OUTPUT]---------------");
println!("data {:?}", bpr);
println!("------------------------------");
/*
let common = bpr.common_features;
let recs = bpr.individual_features_list;
println!("--------[Len : {}]------------------", recs.len());
println!("-------[COMMON]---------------");
match common {
Some(dr) => logDR(dr),
None => println!("None"),
}
println!("------------------------------");
for rec in recs {
logDR(rec);
}
println!("------------------------------");
*/
}
fn logDR(dr: DataRecord) {
println!("--------[DR]------------------");
match dr.binary_features {
Some(bf) => logBin(bf),
_ => (),
}
match dr.continuous_features {
Some(cf) => logCF(cf),
_ => (),
}
println!("------------------------------");
}
fn logBin(bin: BTreeSet<i64>) {
println!("B: {:?}", bin)
}
fn logCF(cf: BTreeMap<i64, OrderedFloat<f64>>) {
for (id, fs) in cf {
println!("C: {} -> [{}]", id, fs);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff