From 2c5a0932784a328ed44f24d9073fc2f4f38557a6 Mon Sep 17 00:00:00 2001 From: therealyingtong Date: Fri, 11 Oct 2024 08:32:02 +0800 Subject: [PATCH] checkpoint_from_dat bench --- .gitattributes | 4 - Cargo.lock | 204 +++++++-------- iris-mpc-cpu/Cargo.toml | 2 +- iris-mpc-cpu/benches/.gitattributes | 2 - iris-mpc-cpu/benches/assets/.gitattributes | 5 + .../100K_rust_format_synthetic_data.dat.zip | 3 + iris-mpc-cpu/src/hawkers/plaintext_store.rs | 33 +-- .../src/hawkers/plaintext_store_db.rs | 238 +++++++++++++++--- 8 files changed, 334 insertions(+), 157 deletions(-) delete mode 100644 iris-mpc-cpu/benches/.gitattributes create mode 100644 iris-mpc-cpu/benches/assets/100K_rust_format_synthetic_data.dat.zip diff --git a/.gitattributes b/.gitattributes index beb3fcf60..e69de29bb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +0,0 @@ -iris-mpc-cpu/benches/assets/hnsw_db_100000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text -iris-mpc-cpu/benches/assets/hnsw_db_100000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text -iris-mpc-cpu/benches/assets/hnsw_db_200000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text -iris-mpc-cpu/benches/assets/hnsw_db_200000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text diff --git a/Cargo.lock b/Cargo.lock index 63d142034..3f29c96e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] @@ -212,9 +212,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.7" +version = "1.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8191fb3091fa0561d1379ef80333c3c7191c6f0435d986e85821bcf7acbd1126" +checksum = "7198e6f03240fdceba36656d8be440297b6b82270325908c7381f37d826a74f6" dependencies = [ "aws-credential-types", "aws-runtime", @@ -254,9 +254,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f95446d919226d587817a7d21379e6eb099b97b45110a7f272a444ca5c54070" +checksum = "cdd82dba44d209fddb11c190e0a94b78651f95299598e472215667417a03ff1d" dependencies = [ "aws-lc-sys", "mirai-annotations", @@ -266,9 +266,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.21.2" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ddc4a5b231dd6958b140ff3151b6412b3f4321fab354f399eec8f14b06df62" +checksum = "df7a4168111d7eb622a31b214057b8509c0a7e1794f44c546d742330dc793972" dependencies = [ "bindgen", "cc", @@ -307,9 +307,9 @@ dependencies = [ [[package]] name = "aws-sdk-kms" -version = "1.45.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0caf20b8855dbeb458552e6c8f8f9eb92b95e4a131725b93540ec73d60c38eb3" +checksum = "564a597a3c71a957d60a2e4c62c93d78ee5a0d636531e15b760acad983a5c18e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -329,9 +329,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.53.0" +version = "1.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43fad71130014e11f42fadbdcce5df12ee61866f8ab9bad773b138d4b3c11087" +checksum = "ef7861e9bdaff3e7f69cd636ceaa1d8d083076c69f69edc7f953a33661b86991" dependencies = [ "ahash", "aws-credential-types", @@ -364,9 +364,9 @@ dependencies = [ [[package]] name = "aws-sdk-secretsmanager" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34f0ed64c48d8d703d93791e5315f9c12789f7280e8ee7c2c8fe8c1bc5d91907" +checksum = "05a5d3faceba815f3a81039e0c6952e7afad1467c0e2378d28f642c2a5fb299b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -387,9 +387,9 @@ dependencies = [ [[package]] name = "aws-sdk-sns" -version = "1.45.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30768bb93c6f1e5b466e9d477d1172c02a55bc5697075f702e4935980695e935" +checksum = "93d16dcbb2991fb5e00997cb4ceabb80b53a9b8bf7c3755da112c8c5f011b4ef" dependencies = [ "aws-credential-types", "aws-runtime", @@ -410,9 +410,9 @@ dependencies = [ [[package]] name = "aws-sdk-sqs" -version = "1.44.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e88af26c1a077a59e1146c4bbb55d64cb84cb1a0dd14c7d40cc273e9292b43" +checksum = "657982a9e70b8aa1b903c84f8e76c36202358c48f119330d5f4b74d7e6cf27b7" dependencies = [ "aws-credential-types", "aws-runtime", @@ -432,9 +432,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.44.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b90cfe6504115e13c41d3ea90286ede5aa14da294f3fe077027a6e83850843c" +checksum = "0dc2faec3205d496c7e57eff685dd944203df7ce16a4116d0281c44021788a7b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -454,9 +454,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.45.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "167c0fad1f212952084137308359e8e4c4724d1c643038ce163f06de9662c1d0" +checksum = "c93c241f52bc5e0476e259c953234dab7e2a35ee207ee202e86c0095ec4951dc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -476,9 +476,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.44.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cb5f98188ec1435b68097daa2a37d74b9d17c9caa799466338a8d1544e71b9d" +checksum = "b259429be94a3459fa1b00c5684faee118d74f9577cc50aebadc36e507c63b5f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -611,9 +611,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87" +checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -823,9 +823,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.4" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags 2.6.0", "cexpr", @@ -918,9 +918,9 @@ dependencies = [ [[package]] name = "bytemuck_derive" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc8b54b395f2fcfbb3d90c47b01c7f444d94d05bdeb775811dec868ac3bbc26" +checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec" dependencies = [ "proc-macro2", "quote", @@ -987,9 +987,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.24" +version = "1.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938" +checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" dependencies = [ "jobserver", "libc", @@ -1075,9 +1075,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.19" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7be5744db7978a28d9df86a214130d106a89ce49644cbc4e3f0c22c3fba30615" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" dependencies = [ "clap_builder", "clap_derive", @@ -1085,9 +1085,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.19" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5fbc17d3ef8278f55b282b2a2e75ae6f6c7d4bb70ed3d0382375104bfafdb4b" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" dependencies = [ "anstream", "anstyle", @@ -1817,6 +1817,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1855,9 +1861,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -1881,9 +1887,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -1906,15 +1912,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -1934,9 +1940,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" @@ -1955,9 +1961,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -1966,21 +1972,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -2017,9 +2023,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.31.0" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" @@ -2107,6 +2113,11 @@ name = "hashbrown" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "hashlink" @@ -2120,7 +2131,7 @@ dependencies = [ [[package]] name = "hawk-pack" version = "0.1.0" -source = "git+https://github.com/therealyingtong/hawk-pack.git?branch=export-vector-store#7d488e9498fb17e98e0b0674a7e5f4d3b3b538db" +source = "git+https://github.com/therealyingtong/hawk-pack.git?branch=new-with-params#4602cf8a4622aed3e7a65875e50903fdacab9a7d" dependencies = [ "aes-prng 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion", @@ -2365,7 +2376,7 @@ dependencies = [ "http 1.1.0", "hyper 1.4.1", "hyper-util", - "rustls 0.23.13", + "rustls 0.23.14", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -2537,9 +2548,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "iris-mpc" @@ -2715,7 +2726,7 @@ dependencies = [ "rand", "rand_chacha", "rcgen", - "rustls 0.23.13", + "rustls 0.23.14", "rustls-pemfile 2.2.0", "serde", "serde-big-array", @@ -2798,9 +2809,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -2922,11 +2933,11 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lru" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.0", ] [[package]] @@ -3338,21 +3349,18 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.36.4" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.1" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82881c4be219ab5faaf2ad5e5e5ecdff8c66bd7402ca3160975c93b24961afd1" -dependencies = [ - "portable-atomic", -] +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oorandom" @@ -3665,18 +3673,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" dependencies = [ "proc-macro2", "quote", @@ -3802,9 +3810,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" dependencies = [ "unicode-ident", ] @@ -4233,9 +4241,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.13" +version = "0.23.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" +checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" dependencies = [ "aws-lc-rs", "log", @@ -4327,9 +4335,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" dependencies = [ "windows-sys 0.59.0", ] @@ -5214,7 +5222,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.13", + "rustls 0.23.14", "rustls-pki-types", "tokio", ] @@ -5653,9 +5661,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -5664,9 +5672,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -5679,9 +5687,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -5691,9 +5699,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5701,9 +5709,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -5714,15 +5722,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 7cbc36051..915d12907 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -15,7 +15,7 @@ bytemuck.workspace = true dashmap = "6.1.0" eyre.workspace = true futures.workspace = true -hawk-pack = { git = "https://github.com/therealyingtong/hawk-pack.git", branch = "export-vector-store" } +hawk-pack = { git = "https://github.com/therealyingtong/hawk-pack.git", branch = "new-with-params" } iris-mpc-common = { path = "../iris-mpc-common" } itertools.workspace = true num-traits.workspace = true diff --git a/iris-mpc-cpu/benches/.gitattributes b/iris-mpc-cpu/benches/.gitattributes deleted file mode 100644 index 917708ce0..000000000 --- a/iris-mpc-cpu/benches/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -hnsw_db_1000000_3668603835_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text -hnsw_db_1000000_69454808_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text diff --git a/iris-mpc-cpu/benches/assets/.gitattributes b/iris-mpc-cpu/benches/assets/.gitattributes index 47a3e0490..11d9461c2 100644 --- a/iris-mpc-cpu/benches/assets/.gitattributes +++ b/iris-mpc-cpu/benches/assets/.gitattributes @@ -1,2 +1,7 @@ hnsw_db_1000000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text hnsw_db_1000000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text +hnsw_db_100000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text +hnsw_db_100000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text +hnsw_db_200000_hawk_graph_links.csv.zip filter=lfs diff=lfs merge=lfs -text +hnsw_db_200000_hawk_vectors.csv.zip filter=lfs diff=lfs merge=lfs -text +100K_rust_format_synthetic_data.dat.zip filter=lfs diff=lfs merge=lfs -text diff --git a/iris-mpc-cpu/benches/assets/100K_rust_format_synthetic_data.dat.zip b/iris-mpc-cpu/benches/assets/100K_rust_format_synthetic_data.dat.zip new file mode 100644 index 000000000..7cad55bee --- /dev/null +++ b/iris-mpc-cpu/benches/assets/100K_rust_format_synthetic_data.dat.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a1550e6c1b39c0c3bdb26233aa00a2899a44b7198caf5c98d9f700e68ecf2e +size 253284275 diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store.rs b/iris-mpc-cpu/src/hawkers/plaintext_store.rs index 22ff20028..d9265f9e3 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store.rs @@ -1,16 +1,17 @@ use hawk_pack::VectorStore; use iris_mpc_common::iris_db::iris::{IrisCode, IrisCodeArray, MATCH_THRESHOLD_RATIO}; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; #[derive(Default, Debug, Clone)] pub struct PlaintextStore { - pub points: Vec, + pub points: BTreeMap, } #[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] pub struct FormattedIris { - data: Vec, - mask: IrisCodeArray, + pub data: Vec, + pub mask: IrisCodeArray, } impl From for FormattedIris { @@ -42,10 +43,10 @@ impl FormattedIris { #[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] pub struct PlaintextPoint { /// Whatever encoding of a vector. - data: FormattedIris, + pub data: FormattedIris, /// Distinguish between queries that are pending, and those that were /// ultimately accepted into the vector store. - is_persistent: bool, + pub is_persistent: bool, } impl From for PlaintextPoint { @@ -81,7 +82,7 @@ impl PlaintextPoint { } } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)] pub struct PointId(pub usize); impl PointId { @@ -97,12 +98,12 @@ impl PlaintextStore { distance2: &(PointId, PointId), ) -> (i32, i32) { let (x1, y1) = ( - &self.points[distance1.0.val()], - &self.points[distance1.1.val()], + &self.points.get(&distance1.0).unwrap(), + &self.points.get(&distance1.1).unwrap(), ); let (x2, y2) = ( - &self.points[distance2.0.val()], - &self.points[distance2.1.val()], + &self.points.get(&distance2.0).unwrap(), + &self.points.get(&distance2.1).unwrap(), ); let (d1, t1) = x1.compute_distance(y1); let (d2, t2) = x2.compute_distance(y2); @@ -120,15 +121,15 @@ impl VectorStore for PlaintextStore { type Data = PlaintextPoint; fn prepare_query(&mut self, raw_query: PlaintextPoint) -> PointId { - self.points.push(raw_query); + let point_id = PointId(self.points.len()); + self.points.insert(point_id, raw_query); - let point_id = self.points.len() - 1; - PointId(point_id) + point_id } async fn insert(&mut self, query: &Self::QueryRef) -> Self::VectorRef { // The query is now accepted in the store. It keeps the same ID. - self.points[query.0].is_persistent = true; + self.points.get_mut(query).unwrap().is_persistent = true; *query } @@ -142,8 +143,8 @@ impl VectorStore for PlaintextStore { } async fn is_match(&self, distance: &Self::DistanceRef) -> bool { - let x = &self.points[distance.0 .0]; - let y = &self.points[distance.1 .0]; + let x = &self.points.get(&distance.0).unwrap(); + let y = &self.points.get(&distance.1).unwrap(); x.is_close(y) } diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store_db.rs b/iris-mpc-cpu/src/hawkers/plaintext_store_db.rs index fc23ed1ac..cdfe193bb 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store_db.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store_db.rs @@ -7,7 +7,7 @@ use sqlx::{ postgres::{PgPoolOptions, PgRow}, Executor, PgPool, Row, }; -use std::path; +use std::{collections::BTreeMap, path}; use tokio::io::AsyncWriteExt; const MAX_CONNECTIONS: u32 = 5; @@ -40,7 +40,7 @@ impl VectorStore for PlaintextStoreDb { VALUES ($1, $2) ", ) - .bind(query.0 as i32) + .bind(query.val() as i32) .bind(sqlx::types::Json(point)) .execute(&self.pool) .await @@ -97,7 +97,9 @@ impl DbStore for PlaintextStoreDb { MIGRATOR.run(&pool).await?; Ok(PlaintextStoreDb { - cache: PlaintextStore { points: vec![] }, + cache: PlaintextStore { + points: BTreeMap::new(), + }, schema_name: schema_name.to_owned(), pool, }) @@ -112,30 +114,8 @@ impl DbStore for PlaintextStoreDb { } async fn copy_out(&self) -> Result> { - let table_name = "hawk_vectors"; let file_name = format!("{}_vectors.csv", self.schema_name.clone()); - - let path = path::absolute(file_name.clone())? - .as_os_str() - .to_str() - .unwrap() - .to_owned(); - - let mut file = tokio::fs::File::create(path.clone()).await?; - let mut conn = self.pool.acquire().await?; - - let mut copy_stream = conn - .copy_out_raw(&format!( - "COPY {} TO STDOUT (FORMAT CSV, HEADER)", - table_name - )) - .await?; - - while let Some(chunk) = copy_stream.try_next().await? { - file.write_all(&chunk).await?; - } - - Ok(vec![(table_name.to_string(), path)]) + self.copy_out_with_filename(file_name).await } } @@ -151,8 +131,9 @@ impl PlaintextStoreDb { .unwrap() .iter() .map(|row| { + let id: i32 = row.get("id"); let point: sqlx::types::Json = row.get("point"); - point.as_ref().clone() + (PointId(id as usize), point.as_ref().clone()) }) .collect(); @@ -160,10 +141,9 @@ impl PlaintextStoreDb { } pub async fn get_point(&self, point: PointId) -> Option { - if self.cache.points.len() > point.0 { - Some(self.cache.points[point.0].clone()) - } else { - sqlx::query( + let mut res = self.cache.points.get(&point).map(|p| p.clone()); + if res.is_none() { + res = sqlx::query( " SELECT point FROM hawk_vectors WHERE id = $1 ", @@ -175,8 +155,9 @@ impl PlaintextStoreDb { .map(|row: PgRow| { let x: sqlx::types::Json = row.get("point"); x.as_ref().clone() - }) + }); } + res } pub async fn distance_computation( @@ -199,6 +180,32 @@ impl PlaintextStoreDb { let cross_2 = d1 as i32 * t2 as i32; (cross_1, cross_2) } + + async fn copy_out_with_filename(&self, file_name: String) -> Result> { + let table_name = "hawk_vectors"; + + let path = path::absolute(file_name.clone())? + .as_os_str() + .to_str() + .unwrap() + .to_owned(); + + let mut file = tokio::fs::File::create(path.clone()).await?; + let mut conn = self.pool.acquire().await?; + + let mut copy_stream = conn + .copy_out_raw(&format!( + "COPY {} TO STDOUT (FORMAT CSV, HEADER)", + table_name + )) + .await?; + + while let Some(chunk) = copy_stream.try_next().await? { + file.write_all(&chunk).await?; + } + + Ok(vec![(table_name.to_string(), path)]) + } } fn sql_switch_schema(schema_name: &str) -> Result { @@ -223,11 +230,70 @@ fn sanitize_identifier(input: &str) -> Result<()> { #[cfg(test)] mod tests { use super::*; + use crate::hawkers::plaintext_store::FormattedIris; use aes_prng::AesRng; - use hawk_pack::{graph_store::GraphPg, hnsw_db::HawkSearcher}; - use iris_mpc_common::iris_db::iris::IrisCode; + use hawk_pack::{ + graph_store::GraphPg, + hnsw_db::{HawkSearcher, Params}, + }; + use iris_mpc_common::iris_db::iris::{IrisCode, IrisCodeArray}; use rand::SeedableRng; + struct PlaintextPointReader { + inner: R, + } + + impl PlaintextPointReader { + pub fn new(inner: R) -> Self { + Self { inner } + } + } + + impl Iterator for PlaintextPointReader { + type Item = Vec; + + fn next(&mut self) -> Option { + let mut buf: [u8; 102400] = [0; 102400]; + self.inner.read_exact(&mut buf).ok()?; + Some( + buf.chunks_exact(8) + .map(|bytes| { + let val = u64::from_le_bytes(bytes.try_into().unwrap()); + if val == u64::MAX { + -1 + } else { + val as i8 + } + }) + .collect(), + ) + } + } + + fn plaintext_point_from_data(data: Vec) -> PlaintextPoint { + let mut mask = IrisCodeArray::ZERO; + + for (bit, &val) in data.iter().enumerate() { + if val != 0 { + mask.set_bit(bit, true); + } + } + let iris_code = FormattedIris { data, mask }; + PlaintextPoint { + data: iris_code, + is_persistent: false, + } + } + + fn unzip(zip_path: &str) { + let path = std::path::Path::new(&zip_path); + let zip = std::fs::File::open(path).unwrap(); + let mut archive: zip::ZipArchive = zip::ZipArchive::new(zip).unwrap(); + archive + .extract("./benches/assets") + .expect(&format!("Could not extract {}", zip_path)); + } + #[tokio::test] async fn hawk_searcher_from_db() { let database_size = 100; @@ -242,8 +308,7 @@ mod tests { let vector_store = PlaintextStoreDb::new(hawk_database_url, &temporary_name()) .await .unwrap(); - let mut plain_searcher = - HawkSearcher::new(vector_store.clone(), graph_store.clone(), &mut rng); + let mut plain_searcher = HawkSearcher::new(vector_store, graph_store.clone(), &mut rng); let queries = (0..database_size) .map(|_| { @@ -291,4 +356,105 @@ mod tests { graph_store.cleanup().await.unwrap(); } } + + #[tokio::test] + async fn checkpoint_from_dat() { + use std::io::BufReader; + + let step_size = 20000; + let database_size = 100000; + let m_values = [64, 96, 128, 160, 192, 224, 256]; + let params = m_values.iter().map(|m| Params::new_with_m(*m as usize)); + + let mut rng = AesRng::seed_from_u64(0_u64); + let hawk_database_url: &str = "postgres://postgres:postgres@localhost/postgres"; + let zip_filename = "benches/assets/100K_rust_format_synthetic_data.dat.zip"; + let zip_path = path::absolute(zip_filename).unwrap(); + unzip(zip_path.to_str().unwrap()); + + let dat_filename = "benches/assets/100K_rust_format_synthetic_data.dat"; + let dat_path = path::absolute(dat_filename).unwrap(); + + // Create checkpointed vector files + let mut vectors_checkpoint_names: Vec> = vec![]; + let mut vector_store = PlaintextStoreDb::new(hawk_database_url, "vectors_100K") + .await + .unwrap(); + + let mut queries: Vec<_> = { + let input = + BufReader::new(std::fs::File::open(dat_path.clone()).expect("Failed to open file")); + let values: Vec> = PlaintextPointReader::new(input).collect(); + values + .into_iter() + .map(|data| plaintext_point_from_data(data)) + .collect() + }; + + for checkpoint in 1..=(database_size / step_size) { + let checkpoint_name = format!("{}_checkpoint", (checkpoint * step_size).to_string(),); + let checkpoint_queries = queries[0..step_size].to_vec(); + queries.drain(0..step_size); + for query in checkpoint_queries.iter() { + let query = vector_store.prepare_query(query.clone()); + vector_store.insert(&query).await; + } + vectors_checkpoint_names.push( + vector_store + .copy_out_with_filename(checkpoint_name) + .await + .unwrap(), + ); + vector_store.cleanup().await.unwrap(); + } + + for params in params { + let mut prev_checkpoint_name = None; + + for checkpoint in 1..=(database_size / step_size) { + let checkpoint_name = format!( + "{}_M{}_checkpoint", + (checkpoint * step_size).to_string(), + params.M().to_string() + ); + let vectors_checkpoint_name = vectors_checkpoint_names[checkpoint - 1].clone(); + + let vector_store = PlaintextStoreDb::new( + hawk_database_url, + &format!("vectors_{}", checkpoint_name), + ) + .await + .unwrap(); + vector_store.copy_in(vectors_checkpoint_name).await.unwrap(); + let vector_mem = vector_store.to_plaintext_store().await; + vector_store.cleanup().await.unwrap(); + + let graph_store = + GraphPg::::new(hawk_database_url, &checkpoint_name) + .await + .unwrap(); + if let Some(prev_checkpoint_name) = prev_checkpoint_name { + graph_store.copy_in(prev_checkpoint_name).await.unwrap(); + } + let graph_mem = graph_store.to_graph_mem().await; + graph_store.cleanup().await.unwrap(); + + let mut hawk_searcher = HawkSearcher::new(vector_mem.clone(), graph_mem, &mut rng); + for (query, _) in vector_mem.points.iter() { + let neighbors = hawk_searcher.search_to_insert(&query).await; + hawk_searcher + .insert_from_search_results(*query, neighbors) + .await; + } + + prev_checkpoint_name = Some( + hawk_searcher + .graph_store + .write_to_db(hawk_database_url, &checkpoint_name) + .await + .unwrap(), + ); + } + } + } }