Skip to content

Commit

Permalink
revert chunked page lock
Browse files Browse the repository at this point in the history
  • Loading branch information
eaypek-tfh committed Jan 28, 2025
1 parent 2d0ed57 commit 2090c2f
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 75 deletions.
2 changes: 1 addition & 1 deletion deploy/stage/common-values-iris-mpc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
image: "ghcr.io/worldcoin/iris-mpc:1654b9cbda7d59684ce026c04efeaa89f12f329d"
image: "ghcr.io/worldcoin/iris-mpc:3802806ffc7180d878a33dda356806dd0d55619b"

environment: stage
replicaCount: 1
Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ env:
- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "50"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"

Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ env:
- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "50"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"

Expand Down
3 changes: 0 additions & 3 deletions deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,6 @@ env:

- name: SMPC__LOAD_CHUNKS_BUFFER_SIZE
value: "1024"

- name: SMPC__PAGE_LOCK_CHUNK_PERCENTAGE
value: "50"

- name: SMPC__CLEAR_DB_BEFORE_INIT
value: "true"
Expand Down
7 changes: 0 additions & 7 deletions iris-mpc-common/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,6 @@ pub struct Config {

#[serde(default)]
pub load_chunks_buffer_size: usize,

/// Percentage of the chunk size to page lock at each iteration
/// The first chunk will be waited to finish without loading anything
/// The rest of the chunks will be page locked in parallel to s3 import
/// Must be a positive integer between [1-100]
#[serde(default)]
pub page_lock_chunk_percentage: usize,
}

fn default_load_chunks_parallelism() -> usize {
Expand Down
24 changes: 6 additions & 18 deletions iris-mpc-gpu/src/helpers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,36 +173,24 @@ pub fn htod_on_stream_sync<T: DeviceRepr>(
pub fn register_host_memory(
device_manager: Arc<DeviceManager>,
db: &CudaVec2DSlicerRawPointer,
chunk_length: usize,
chunk_offset: usize,
max_db_length: usize,
code_length: usize,
) {
tracing::info!(
"Page-locking chunk: [{}-{}]",
chunk_offset,
chunk_offset + chunk_length
);
let size = chunk_length / device_manager.device_count();
let offset = chunk_offset / device_manager.device_count();
let max_size = max_db_length / device_manager.device_count();
for (device_index, device) in device_manager.devices().iter().enumerate() {
device.bind_to_thread().unwrap();
unsafe {
let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2(
(db.limb_0[device_index] + (offset * code_length) as u64) as *mut _,
size * code_length,
db.limb_0[device_index] as *mut _,
max_size * code_length,
CU_MEMHOSTALLOC_PORTABLE,
);

let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2(
(db.limb_1[device_index] + (offset * code_length) as u64) as *mut _,
size * code_length,
db.limb_1[device_index] as *mut _,
max_size * code_length,
CU_MEMHOSTALLOC_PORTABLE,
);
}
}
tracing::info!(
"Page-lock completed for chunk: [{}-{}]",
chunk_offset,
chunk_offset + chunk_length
);
}
71 changes: 31 additions & 40 deletions iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1086,54 +1086,45 @@ async fn server_main(config: Config) -> eyre::Result<()> {

tokio::runtime::Handle::current().block_on(async {
let total_load_time = Instant::now();
let dbs = [
(actor.left_code_db_slices.code_gr.clone(), IRIS_CODE_LENGTH),
(actor.right_code_db_slices.code_gr.clone(), IRIS_CODE_LENGTH),
(actor.left_mask_db_slices.code_gr.clone(), MASK_CODE_LENGTH),
(actor.right_mask_db_slices.code_gr.clone(), MASK_CODE_LENGTH),
];
let n_page_lock_iters = 100 / config.page_lock_chunk_percentage;
let page_lock_chunk_size = config.max_db_size / n_page_lock_iters;
tracing::info!(
"Will page lock chunks in {} iters, each {} items",
n_page_lock_iters,
page_lock_chunk_size
);
let dbs_clone = dbs.clone();
let mut now = Instant::now();
for (db, code_length) in dbs_clone {
let device_manager_clone = actor.device_manager.clone();
register_host_memory(
device_manager_clone,
&db,
page_lock_chunk_size,
0,
code_length,
);
}
tracing::info!("First chunk page-locking took {:?}", now.elapsed());
let left_codes = actor.left_code_db_slices.code_gr.clone();
let right_codes = actor.right_code_db_slices.code_gr.clone();
let left_masks = actor.left_mask_db_slices.code_gr.clone();
let right_masks = actor.right_mask_db_slices.code_gr.clone();

let device_manager_clone = actor.device_manager.clone();

// prepare the handle for the rest of the page locks
let page_lock_handle = spawn_blocking(move || {
for i in 1..n_page_lock_iters {
let dbs_clone = dbs.clone();
let device_manager_clone = device_manager_clone.clone();
for (db, code_length) in dbs_clone {
let device_manager_clone = device_manager_clone.clone();
register_host_memory(
device_manager_clone,
&db,
page_lock_chunk_size,
i * page_lock_chunk_size,
code_length,
);
}
tracing::info!("Page locking host memory for code slices");
let now = Instant::now();
for db in [&left_codes, &right_codes] {
register_host_memory(
device_manager_clone.clone(),
db,
config.max_db_size,
IRIS_CODE_LENGTH,
);
tracing::info!("Page locking completed for code slice");
}

tracing::info!("Page locking host memory for mask slices");
for db in [&left_masks, &right_masks] {
register_host_memory(
device_manager_clone.clone(),
db,
config.max_db_size,
MASK_CODE_LENGTH,
);
tracing::info!("Page locking completed for mask slice");
}

tracing::info!(
"Page locking completed for all slices in {:?}",
now.elapsed()
);
});

now = Instant::now();
let now = Instant::now();
let mut record_counter = 0;
let mut all_serial_ids: HashSet<i64> =
HashSet::from_iter(1..=(store_len as i64));
Expand Down

0 comments on commit 2090c2f

Please sign in to comment.