Skip to content

Commit

Permalink
plaintext_store_db::checkpoint_from_data bench
Browse files Browse the repository at this point in the history
  • Loading branch information
therealyingtong committed Nov 1, 2024
1 parent 54da4dc commit b59d697
Show file tree
Hide file tree
Showing 27 changed files with 1,022 additions and 159 deletions.
Empty file added .gitattributes
Empty file.
407 changes: 304 additions & 103 deletions Cargo.lock

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions iris-mpc-common/src/iris_db/iris.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ use rand::{
distributions::{Bernoulli, Distribution},
Rng,
};
use serde::{Deserialize, Serialize};
use serde_big_array::BigArray;

pub const MATCH_THRESHOLD_RATIO: f64 = 0.375;

#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IrisCodeArray(pub [u64; Self::IRIS_CODE_SIZE_U64]);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
pub struct IrisCodeArray(#[serde(with = "BigArray")] pub [u64; Self::IRIS_CODE_SIZE_U64]);
impl Default for IrisCodeArray {
fn default() -> Self {
Self::ZERO
Expand Down
11 changes: 10 additions & 1 deletion iris-mpc-cpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,29 @@ bytemuck.workspace = true
dashmap = "6.1.0"
eyre.workspace = true
futures.workspace = true
hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "d34a1b3" }
hawk-pack = { git = "https://github.com/therealyingtong/hawk-pack.git", branch = "new-with-params" }
iris-mpc-common = { path = "../iris-mpc-common" }
itertools.workspace = true
num-traits.workspace = true
rand.workspace = true
serde.workspace = true
sqlx.workspace = true
static_assertions.workspace = true
tokio.workspace = true
tracing.workspace = true
tracing-test = "0.2.5"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["async_tokio"] }
zip = "2.2.0"

[features]
db_dependent = []

[[bench]]
name = "hnsw"
harness = false

[[bench]]
name = "hnsw_db"
harness = false
17 changes: 17 additions & 0 deletions iris-mpc-cpu/benches/assets/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
hnsw_db_1000000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text
hnsw_db_1000000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text
hnsw_db_100000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text
hnsw_db_100000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text
hnsw_db_200000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text
hnsw_db_200000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text
100K_rust_format_synthetic_data.dat.zip filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_8 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_9 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_3 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_4 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_2 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_5 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_6 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_7 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_0 filter=lfs diff=lfs merge=lfs -text
processed_masked_irises_chunk_1 filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_0
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_1
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_2
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_3
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_4
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_5
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_6
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_7
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_8
Git LFS file not shown
3 changes: 3 additions & 0 deletions iris-mpc-cpu/benches/assets/processed_masked_irises_chunk_9
Git LFS file not shown
8 changes: 5 additions & 3 deletions iris-mpc-cpu/benches/hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn bench_plaintext_hnsw(c: &mut Criterion) {
group.sample_size(10);
group.sampling_mode(SamplingMode::Flat);

for database_size in [100_usize, 1000, 10000] {
for database_size in [10000] {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
Expand All @@ -29,7 +29,9 @@ fn bench_plaintext_hnsw(c: &mut Criterion) {

for _ in 0..database_size {
let raw_query = IrisCode::random_rng(&mut rng);
let query = plain_searcher.vector_store.prepare_query(raw_query.clone());
let query = plain_searcher
.vector_store
.prepare_query(raw_query.clone().into());
let neighbors = plain_searcher.search_to_insert(&query).await;
let inserted = plain_searcher.vector_store.insert(&query).await;
plain_searcher
Expand All @@ -45,7 +47,7 @@ fn bench_plaintext_hnsw(c: &mut Criterion) {
|mut my_db| async move {
let mut rng = AesRng::seed_from_u64(0_u64);
let on_the_fly_query = IrisDB::new_random_rng(1, &mut rng).db[0].clone();
let query = my_db.vector_store.prepare_query(on_the_fly_query);
let query = my_db.vector_store.prepare_query(on_the_fly_query.into());
let neighbors = my_db.search_to_insert(&query).await;
my_db.insert_from_search_results(query, neighbors).await;
},
Expand Down
132 changes: 132 additions & 0 deletions iris-mpc-cpu/benches/hnsw_db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
use aes_prng::AesRng;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode};
use hawk_pack::{
graph_store::{GraphMem, GraphPg},
hnsw_db::HawkSearcher,
DbStore, VectorStore,
};
use iris_mpc_common::iris_db::db::IrisDB;
use iris_mpc_cpu::hawkers::{
plaintext_store::PlaintextStore, plaintext_store_db::PlaintextStoreDb,
};
use rand::{RngCore, SeedableRng};
use std::vec;

const HAWK_DATABASE_URL: &str = "postgres://postgres:postgres@localhost/postgres";

/// Table names
const HAWK_GRAPH_ENTRY: &str = "hawk_graph_entry";
const HAWK_GRAPH_LINKS: &str = "hawk_graph_links";
const HAWK_VECTORS: &str = "hawk_vectors";

fn csv_filename(db_size: usize, table_name: String) -> String {
format!("hnsw_db_{}_{}.csv", db_size, table_name)
}

fn zip_filename(db_size: usize, table_name: String) -> String {
format!("{}.zip", csv_filename(db_size, table_name))
}

fn to_path(file: &str) -> String {
format!("./benches/assets/{}", file.to_string())
}

fn unzip(zip_file: &str) {
let zip_path = to_path(zip_file);

let path = std::path::Path::new(&zip_path);
let zip = std::fs::File::open(path).unwrap();
let mut archive: zip::ZipArchive<std::fs::File> = zip::ZipArchive::new(zip).unwrap();
archive
.extract("./benches/assets")
.expect(&format!("Could not extract {}", zip_file));
}

async fn hawk_searcher_from_csv(
mut rng: impl RngCore,
db_size: usize,
graph_store: GraphPg<PlaintextStoreDb>,
vector_store: PlaintextStoreDb,
) -> HawkSearcher<PlaintextStore, GraphMem<PlaintextStore>> {
// Unzip hawk_graph_links and hawk_vectors files
unzip(&zip_filename(db_size, HAWK_VECTORS.to_string()));
unzip(&zip_filename(db_size, HAWK_GRAPH_LINKS.to_string()));

let hawk_graph_entry_path = to_path(&csv_filename(db_size, HAWK_GRAPH_ENTRY.to_string()));
let hawk_graph_links_path = to_path(&csv_filename(db_size, HAWK_GRAPH_LINKS.to_string()));
let hawk_vectors_path = to_path(&csv_filename(db_size, HAWK_VECTORS.to_string()));

graph_store
.copy_in(vec![
(HAWK_GRAPH_ENTRY.to_string(), hawk_graph_entry_path),
(HAWK_GRAPH_LINKS.to_string(), hawk_graph_links_path.clone()),
])
.await
.unwrap();
std::fs::remove_file(hawk_graph_links_path).unwrap();
let graph_mem = graph_store.to_graph_mem().await;
graph_store.cleanup().await.unwrap();

vector_store
.copy_in(vec![(HAWK_VECTORS.to_string(), hawk_vectors_path.clone())])
.await
.unwrap();
std::fs::remove_file(hawk_vectors_path).unwrap();
let vector_mem = vector_store.to_plaintext_store().await;
vector_store.cleanup().await.unwrap();

HawkSearcher::new(vector_mem, graph_mem, &mut rng)
}

fn bench_hnsw_db(c: &mut Criterion) {
let mut group = c.benchmark_group("hnsw_db");
group.sample_size(10);
group.sampling_mode(SamplingMode::Flat);

for database_size in [100000, 200000, 1000000] {
let schema_name = format!("hnsw_db_{}", database_size.to_string());
let temporary_name = || format!("{}_{}", schema_name, rand::random::<u32>());

let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap();

let plain_searcher = rt.block_on(async move {
let rng = AesRng::seed_from_u64(0_u64);
let vector_store = PlaintextStoreDb::new(HAWK_DATABASE_URL, &temporary_name())
.await
.unwrap();
let graph_store = GraphPg::new(HAWK_DATABASE_URL, &temporary_name())
.await
.unwrap();
let plain_searcher =
hawk_searcher_from_csv(rng, database_size, graph_store, vector_store).await;

plain_searcher
});

group.bench_function(BenchmarkId::new("insert", database_size), |b| {
b.to_async(&rt).iter_batched(
|| plain_searcher.clone(),
|mut my_db| async move {
let mut rng = AesRng::seed_from_u64(0_u64);
let on_the_fly_query = IrisDB::new_random_rng(1, &mut rng).db[0].clone();
let query = my_db.vector_store.prepare_query(on_the_fly_query.into());
let neighbors = my_db.search_to_insert(&query).await;
my_db.insert_from_search_results(query, neighbors).await;
},
criterion::BatchSize::SmallInput,
)
});
}

group.finish();
}

criterion_group! {
hnsw,
bench_hnsw_db,
}

criterion_main!(hnsw);
1 change: 1 addition & 0 deletions iris-mpc-cpu/migrations/20240909105323_init.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE hawk_vectors;
5 changes: 5 additions & 0 deletions iris-mpc-cpu/migrations/20240909105323_init.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE IF NOT EXISTS hawk_vectors (
id integer NOT NULL,
point jsonb NOT NULL,
CONSTRAINT hawk_vectors_pkey PRIMARY KEY (id)
);
5 changes: 3 additions & 2 deletions iris-mpc-cpu/src/database_generators.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::shares::{ring_impl::RingElement, share::Share, vecshare::VecShare};
use iris_mpc_common::iris_db::iris::{IrisCode, IrisCodeArray};
use rand::{Rng, RngCore};
use serde::{Deserialize, Serialize};
use std::sync::Arc;

type ShareRing = u16;
Expand All @@ -9,13 +10,13 @@ type VecShareType = VecShare<u16>;
type ShareRingPlain = RingElement<ShareRing>;
// type ShareType = Share<u16>;

#[derive(PartialEq, Eq, Debug, Default, Clone)]
#[derive(PartialEq, Eq, Debug, Default, Clone, Serialize, Deserialize, Hash)]
pub struct SharedIris {
pub shares: VecShareType,
pub mask: IrisCodeArray,
}

#[derive(PartialEq, Eq, Debug, Default, Clone)]
#[derive(PartialEq, Eq, Debug, Default, Clone, Serialize, Deserialize, Hash)]
pub struct NgSharedIris {
pub code: VecShareType,
pub mask: VecShareType,
Expand Down
2 changes: 2 additions & 0 deletions iris-mpc-cpu/src/hawkers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
pub mod ng_aby3_store;
pub mod plaintext_store;
// #[cfg(feature = "db_dependent")]
pub mod plaintext_store_db;
26 changes: 12 additions & 14 deletions iris-mpc-cpu/src/hawkers/ng_aby3_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,13 @@ pub fn setup_local_store_aby3_players() -> eyre::Result<LocalNetAby3NgStoreProto
Ok(LocalNetAby3NgStoreProtocol { runtime, players })
}

impl LocalNetAby3NgStoreProtocol {
pub fn prepare_query(&mut self, code: Vec<NgSharedIris>) -> PointId {
impl VectorStore for LocalNetAby3NgStoreProtocol {
type QueryRef = PointId; // Vector ID, pending insertion.
type VectorRef = PointId; // Vector ID, inserted.
type DistanceRef = (PointId, PointId); // Lazy distance representation.
type Data = Vec<NgSharedIris>;

fn prepare_query(&mut self, code: Vec<NgSharedIris>) -> PointId {
assert_eq!(code.len(), 3);
assert_eq!(self.players.len(), 3);
let pid0 = self
Expand All @@ -128,12 +133,6 @@ impl LocalNetAby3NgStoreProtocol {
assert_eq!(pid1, pid2);
pid0
}
}

impl VectorStore for LocalNetAby3NgStoreProtocol {
type QueryRef = PointId; // Vector ID, pending insertion.
type VectorRef = PointId; // Vector ID, inserted.
type DistanceRef = (PointId, PointId); // Lazy distance representation.

async fn insert(&mut self, query: &Self::QueryRef) -> Self::VectorRef {
// The query is now accepted in the store. It keeps the same ID.
Expand Down Expand Up @@ -243,7 +242,7 @@ pub async fn ng_create_ready_made_hawk_searcher<R: RngCore + Clone>(
for raw_query in cleartext_database.iter() {
let query = cleartext_searcher
.vector_store
.prepare_query(raw_query.clone());
.prepare_query(raw_query.clone().into());
let neighbors = cleartext_searcher.search_to_insert(&query).await;
let inserted = cleartext_searcher.vector_store.insert(&query).await;
cleartext_searcher
Expand Down Expand Up @@ -315,10 +314,9 @@ mod tests {

let queries = (0..database_size)
.map(|id| {
db.vector_store.prepare_query(ng_generate_iris_shares(
&mut rng,
cleartext_database[id].clone(),
))
db.vector_store.prepare_query(
ng_generate_iris_shares(&mut rng, cleartext_database[id].clone()).into(),
)
})
.collect::<Vec<_>>();

Expand Down Expand Up @@ -437,7 +435,7 @@ mod tests {
// Now do the work for the plaintext store
let mut plaintext_store = PlaintextStore::default();
let plaintext_preps: Vec<_> = (0..db_dim)
.map(|id| plaintext_store.prepare_query(cleartext_database[id].clone()))
.map(|id| plaintext_store.prepare_query(cleartext_database[id].clone().into()))
.collect();
let mut plaintext_inserts = Vec::new();
for p in plaintext_preps.iter() {
Expand Down
Loading

0 comments on commit b59d697

Please sign in to comment.