diff --git a/iris-mpc-gpu/tests/bitinject.rs b/iris-mpc-gpu/tests/bitinject.rs index 6da658388..0ea26eb07 100644 --- a/iris-mpc-gpu/tests/bitinject.rs +++ b/iris-mpc-gpu/tests/bitinject.rs @@ -1,6 +1,9 @@ #[cfg(feature = "gpu_dependent")] mod bitinject_test { - use cudarc::driver::{CudaDevice, CudaStream}; + use cudarc::{ + driver::{CudaDevice, CudaStream}, + nccl::Id, + }; use iris_mpc_gpu::{ helpers::{device_manager::DeviceManager, dtoh_on_stream_sync, htod_on_stream_sync}, threshold_ring::protocol::{ChunkShare, ChunkShareView, Circuits}, @@ -10,7 +13,9 @@ mod bitinject_test { use static_assertions::const_assert; use std::{env, sync::Arc}; use tokio::time::Instant; + use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + const DB_RNG_SEED: u64 = 0xdeadbeef; const INPUTS_PER_GPU_SIZE: usize = 2048 * 2; fn to_view(inp: &[ChunkShare]) -> Vec> { @@ -25,28 +30,25 @@ mod bitinject_test { (0..size / 64).map(|_| rng.gen()).collect::>() } - fn rep_share(value: u64, id: usize, rng: &mut R) -> (u64, u64) { + fn rep_share(value: u64, rng: &mut R) -> (u64, u64, u64) { let a = rng.next_u64(); let b = rng.next_u64(); let c = value ^ a ^ b; - match id { - 0 => (a, c), - 1 => (b, a), - 2 => (c, b), - _ => unreachable!(), - } + (a, b, c) } - fn rep_share_vec(value: &[u64], id: usize, rng: &mut R) -> (Vec, Vec) { + fn rep_share_vec(value: &[u64], rng: &mut R) -> (Vec, Vec, Vec) { let mut a = Vec::with_capacity(value.len()); let mut b = Vec::with_capacity(value.len()); + let mut c = Vec::with_capacity(value.len()); for v in value.iter() { - let (a_, b_) = rep_share(*v, id, rng); + let (a_, b_, c_) = rep_share(*v, rng); a.push(a_); b.push(b_); + c.push(c_); } - (a, b) + (a, b, c) } fn to_gpu( @@ -136,42 +138,24 @@ mod bitinject_test { result } - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - #[ignore] - async fn test_bitinject() -> eyre::Result<()> { - const_assert!( - INPUTS_PER_GPU_SIZE % (2048) == 0, - // Mod 16 for randomness, mod 64 for chunk size - ); - - // TODO - let mut rng = StdRng::seed_from_u64(42); + fn install_tracing() { + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "info".into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + } - let party_id: usize = env::var("SMPC__PARTY_ID") - .expect("SMPC__PARTY_ID environment variable not set") - .parse() - .expect("SMPC__PARTY_ID must be a valid usize"); - let n_devices = CudaDevice::count()? as usize; + fn testcase( + mut party: Circuits, + input_bits_a: Vec, + input_bits_b: Vec, + real_result: Vec, + ) { + let id = party.peer_id(); - // Get inputs - let input_bits = sample_bits(INPUTS_PER_GPU_SIZE * n_devices, &mut rng); - - let (input_bits_a, input_bits_b) = rep_share_vec(&input_bits, party_id, &mut rng); - let real_result = real_result(input_bits); - println!("Random shared inputs generated!"); - - // Get Circuit Party - let device_manager = Arc::new(DeviceManager::init()); - let ids = device_manager.get_ids_from_magic(0); - let comms = device_manager.instantiate_network_from_ids(party_id, &ids)?; - let mut party = Circuits::new( - party_id, - INPUTS_PER_GPU_SIZE / 2, - INPUTS_PER_GPU_SIZE / 128, - ([party_id as u32; 8], [((party_id + 2) % 3) as u32; 8]), - device_manager.clone(), - comms, - ); let devices = party.get_devices(); let streams = devices .iter() @@ -182,8 +166,8 @@ mod bitinject_test { let code_gpu = to_gpu(&input_bits_a, &input_bits_b, &devices, &streams); let res_ = alloc_res(INPUTS_PER_GPU_SIZE, &devices); let mut res = to_view(&res_); - println!("Data is on GPUs!"); - println!("Starting tests..."); + tracing::info!("id = {}, Data is on GPUs!", id); + tracing::info!("id = {}, Starting tests...", id); for _ in 0..10 { let code_gpu_ = code_gpu.clone(); @@ -191,25 +175,130 @@ mod bitinject_test { let now = Instant::now(); party.bit_inject_ot(&code_gpu, &mut res, &streams); - println!("compute time: {:?}", now.elapsed()); + tracing::info!("id = {}, compute time: {:?}", id, now.elapsed()); let now = Instant::now(); let result = open(&mut party, &mut res, &streams); - println!("Open and transfer to CPU time: {:?}", now.elapsed()); + tracing::info!( + "id = {}, Open and transfer to CPU time: {:?}", + id, + now.elapsed() + ); party.synchronize_streams(&streams); let mut correct = true; for (i, (r, r_)) in izip!(&result, &real_result).enumerate() { if r != r_ { correct = false; - println!("Test failed on index: {}: {} != {}", i, r, r_); + tracing::warn!("id = {}, Test failed on index: {}: {} != {}", id, i, r, r_); break; } } if correct { - println!("Test passed!"); + tracing::info!("id = {}, Test passed!", id); } } + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_bitinject() -> eyre::Result<()> { + install_tracing(); + env::set_var("NCCL_P2P_LEVEL", "LOC"); + env::set_var("NCCL_NET", "Socket"); + env::set_var("NCCL_P2P_DIRECT_DISABLE", "1"); + env::set_var("NCCL_SHM_DISABLE", "1"); + + let chacha_seeds0 = ([0u32; 8], [2u32; 8]); + let chacha_seeds1 = ([1u32; 8], [0u32; 8]); + let chacha_seeds2 = ([2u32; 8], [1u32; 8]); + + const_assert!( + INPUTS_PER_GPU_SIZE % (2048) == 0, + // Mod 16 for randomness, mod 64 for chunk size + ); + + let mut rng = StdRng::seed_from_u64(DB_RNG_SEED); + + let device_manager = DeviceManager::init(); + let mut device_managers = device_manager + .split_into_n_chunks(3) + .expect("have at least 3 devices"); + let device_manager2 = Arc::new(device_managers.pop().unwrap()); + let device_manager1 = Arc::new(device_managers.pop().unwrap()); + let device_manager0 = Arc::new(device_managers.pop().unwrap()); + let n_devices = device_manager0.devices().len(); + let ids0 = (0..n_devices) + .map(|_| Id::new().unwrap()) + .collect::>(); + let ids1 = ids0.clone(); + let ids2 = ids0.clone(); + + // Get inputs + let input_bits = sample_bits(INPUTS_PER_GPU_SIZE * n_devices, &mut rng); + + let (input_bits_a, input_bits_b, input_bits_c) = rep_share_vec(&input_bits, &mut rng); + let real_result = real_result(input_bits); + tracing::info!("Random shared inputs generated!"); + + let input_bits_a_ = input_bits_a.to_owned(); + let input_bits_b_ = input_bits_b.to_owned(); + let input_bits_c_ = input_bits_c.to_owned(); + let real_result_ = real_result.to_owned(); + let real_result__ = real_result.to_owned(); + + let task0 = tokio::task::spawn_blocking(move || { + let comms0 = device_manager0 + .instantiate_network_from_ids(0, &ids0) + .unwrap(); + + let party = Circuits::new( + 0, + INPUTS_PER_GPU_SIZE, + INPUTS_PER_GPU_SIZE / 64, + chacha_seeds0, + device_manager0, + comms0, + ); + + testcase(party, input_bits_a, input_bits_c, real_result); + }); + + let task1 = tokio::task::spawn_blocking(move || { + let comms1 = device_manager1 + .instantiate_network_from_ids(1, &ids1) + .unwrap(); + + let party = Circuits::new( + 1, + INPUTS_PER_GPU_SIZE, + INPUTS_PER_GPU_SIZE / 64, + chacha_seeds1, + device_manager1, + comms1, + ); + + testcase(party, input_bits_b, input_bits_a_, real_result_); + }); + + let task2 = tokio::task::spawn_blocking(move || { + let comms2 = device_manager2 + .instantiate_network_from_ids(2, &ids2) + .unwrap(); + + let party = Circuits::new( + 2, + INPUTS_PER_GPU_SIZE, + INPUTS_PER_GPU_SIZE / 64, + chacha_seeds2, + device_manager2, + comms2, + ); + + testcase(party, input_bits_c_, input_bits_b_, real_result__); + }); + task0.await.unwrap(); + task1.await.unwrap(); + task2.await.unwrap(); Ok(()) } } diff --git a/iris-mpc-gpu/tests/threshold.rs b/iris-mpc-gpu/tests/threshold.rs index c1fbd5cf1..856d7dc95 100644 --- a/iris-mpc-gpu/tests/threshold.rs +++ b/iris-mpc-gpu/tests/threshold.rs @@ -1,4 +1,4 @@ -// #[cfg(feature = "gpu_dependent")] +#[cfg(feature = "gpu_dependent")] mod threshold_test { use cudarc::{ driver::{CudaDevice, CudaStream},