Skip to content

Commit

Permalink
remove chunked page-lock and move page lock to after import
Browse files Browse the repository at this point in the history
  • Loading branch information
eaypek-tfh committed Jan 28, 2025
1 parent d1d2f00 commit c4ce09b
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 84 deletions.
2 changes: 1 addition & 1 deletion deploy/stage/common-values-iris-mpc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
image: "ghcr.io/worldcoin/iris-mpc:a76986f433af1327232a946766a9e1467048cf4a"
image: "ghcr.io/worldcoin/iris-mpc:51dbcaff4d2f539f03a7cdd83bdb89767ac8d87b"

environment: stage
replicaCount: 1
Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ env:
- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "100"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"

Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ env:
- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "100"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"

Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ env:
- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "100"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"

Expand Down
7 changes: 0 additions & 7 deletions iris-mpc-common/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,6 @@ pub struct Config {

#[serde(default)]
pub load_chunks_buffer_size: usize,

/// Percentage of the chunk size to page lock at each iteration
/// The first chunk will be waited to finish without loading anything
/// The rest of the chunks will be page locked in parallel to s3 import
/// Must be a positive integer between [1-100]
#[serde(default)]
pub page_lock_chunk_percentage: usize,
}

fn default_load_chunks_parallelism() -> usize {
Expand Down
23 changes: 6 additions & 17 deletions iris-mpc-gpu/src/helpers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,35 +173,24 @@ pub fn htod_on_stream_sync<T: DeviceRepr>(
pub fn register_host_memory(
device_manager: Arc<DeviceManager>,
db: &CudaVec2DSlicerRawPointer,
chunk_length: usize,
chunk_offset: usize,
max_db_length: usize,
code_length: usize,
) {
tracing::info!(
"Page-locking chunk: [{}-{}]",
chunk_offset,
chunk_offset + chunk_length
);
let size = chunk_length / device_manager.device_count();
let max_size = max_db_length / device_manager.device_count();
for (device_index, device) in device_manager.devices().iter().enumerate() {
device.bind_to_thread().unwrap();
unsafe {
let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2(
(db.limb_0[device_index] + (chunk_offset * code_length) as u64) as *mut _,
size * code_length,
db.limb_0[device_index] as *mut _,
max_size * code_length,
CU_MEMHOSTALLOC_PORTABLE,
);

let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2(
(db.limb_1[device_index] + (chunk_offset * code_length) as u64) as *mut _,
size * code_length,
db.limb_1[device_index] as *mut _,
max_size * code_length,
CU_MEMHOSTALLOC_PORTABLE,
);
}
}
tracing::info!(
"Page-lock completed for chunk: [{}-{}]",
chunk_offset,
chunk_offset + chunk_length
);
}
79 changes: 29 additions & 50 deletions iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1086,54 +1086,8 @@ async fn server_main(config: Config) -> eyre::Result<()> {

tokio::runtime::Handle::current().block_on(async {
let total_load_time = Instant::now();
let dbs = [
(actor.left_code_db_slices.code_gr.clone(), IRIS_CODE_LENGTH),
(actor.right_code_db_slices.code_gr.clone(), IRIS_CODE_LENGTH),
(actor.left_mask_db_slices.code_gr.clone(), MASK_CODE_LENGTH),
(actor.right_mask_db_slices.code_gr.clone(), MASK_CODE_LENGTH),
];
let n_page_lock_iters = 100 / config.page_lock_chunk_percentage;
let page_lock_chunk_size = config.max_db_size / n_page_lock_iters;
tracing::info!(
"Will page lock chunks in {} iters, each {} items",
n_page_lock_iters,
page_lock_chunk_size
);
let dbs_clone = dbs.clone();
let mut now = Instant::now();
for (db, code_length) in dbs_clone {
let device_manager_clone = actor.device_manager.clone();
register_host_memory(
device_manager_clone,
&db,
page_lock_chunk_size,
0,
code_length,
);
}
tracing::info!("First chunk page-locking took {:?}", now.elapsed());

let device_manager_clone = actor.device_manager.clone();

// prepare the handle for the rest of the page locks
let page_lock_handle = spawn_blocking(move || {
for i in 1..n_page_lock_iters {
let dbs_clone = dbs.clone();
let device_manager_clone = device_manager_clone.clone();
for (db, code_length) in dbs_clone {
let device_manager_clone = device_manager_clone.clone();
register_host_memory(
device_manager_clone,
&db,
page_lock_chunk_size,
i * page_lock_chunk_size,
code_length,
);
}
}
});
let now = Instant::now();

now = Instant::now();
let mut record_counter = 0;
let mut all_serial_ids: HashSet<i64> =
HashSet::from_iter(1..=(store_len as i64));
Expand Down Expand Up @@ -1267,12 +1221,37 @@ async fn server_main(config: Config) -> eyre::Result<()> {
));
}

tracing::info!("Starting page lock");

let left_codes = actor.left_code_db_slices.code_gr.clone();
let right_codes = actor.right_code_db_slices.code_gr.clone();
let left_masks = actor.left_mask_db_slices.code_gr.clone();
let right_masks = actor.right_mask_db_slices.code_gr.clone();
let page_lock_ts = Instant::now();
for db in [&left_codes, &right_codes] {
register_host_memory(
actor.device_manager.clone(),
db,
config.max_db_size,
IRIS_CODE_LENGTH,
);
tracing::info!("Page locking completed for code slice");
}

for db in [&left_masks, &right_masks] {
register_host_memory(
actor.device_manager.clone(),
db,
config.max_db_size,
MASK_CODE_LENGTH,
);
tracing::info!("Page locking completed for mask slice");
}
tracing::info!("Page locking completed in {:?}", page_lock_ts.elapsed());

tracing::info!("Preprocessing db");
actor.preprocess_db();

tracing::info!("Waiting for all page-locks to finish");
page_lock_handle.await.expect("Error while page-locking");

tracing::info!(
"Loaded {} records from db into memory in {:?} [DB sizes: {:?}]",
record_counter,
Expand Down

0 comments on commit c4ce09b

Please sign in to comment.