diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml
index f6f8af93f..9a3877453 100644
--- a/deploy/stage/common-values-iris-mpc.yaml
+++ b/deploy/stage/common-values-iris-mpc.yaml
@@ -1,4 +1,4 @@
-image: "ghcr.io/worldcoin/iris-mpc:v0.8.6"
+image: "ghcr.io/worldcoin/iris-mpc:v0.8.7"
 
 environment: stage
 replicaCount: 1
diff --git a/deploy/stage/mpc1-stage/values-iris-mpc.yaml b/deploy/stage/mpc1-stage/values-iris-mpc.yaml
index 748ab96d8..a1725f5e2 100644
--- a/deploy/stage/mpc1-stage/values-iris-mpc.yaml
+++ b/deploy/stage/mpc1-stage/values-iris-mpc.yaml
@@ -72,7 +72,7 @@ env:
     value: "true"
 
   - name: SMPC__INIT_DB_SIZE
-    value: "16"
+    value: "100"
 
   - name: SMPC__MAX_BATCH_SIZE
     value: "64"
diff --git a/deploy/stage/mpc2-stage/values-iris-mpc.yaml b/deploy/stage/mpc2-stage/values-iris-mpc.yaml
index 71aba85ff..e8944835b 100644
--- a/deploy/stage/mpc2-stage/values-iris-mpc.yaml
+++ b/deploy/stage/mpc2-stage/values-iris-mpc.yaml
@@ -72,7 +72,7 @@ env:
     value: "true"
 
   - name: SMPC__INIT_DB_SIZE
-    value: "16"
+    value: "100"
 
   - name: SMPC__MAX_BATCH_SIZE
     value: "64"
diff --git a/deploy/stage/mpc3-stage/values-iris-mpc.yaml b/deploy/stage/mpc3-stage/values-iris-mpc.yaml
index 40bde7c08..0940130fc 100644
--- a/deploy/stage/mpc3-stage/values-iris-mpc.yaml
+++ b/deploy/stage/mpc3-stage/values-iris-mpc.yaml
@@ -72,7 +72,7 @@ env:
     value: "true"
 
   - name: SMPC__INIT_DB_SIZE
-    value: "16"
+    value: "100"
 
   - name: SMPC__MAX_BATCH_SIZE
     value: "64"
diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs
index 810d377d2..3c069b8da 100644
--- a/iris-mpc-gpu/src/server/actor.rs
+++ b/iris-mpc-gpu/src/server/actor.rs
@@ -175,6 +175,7 @@ impl ServerActor {
         max_db_size: usize,
         max_batch_size: usize,
     ) -> eyre::Result<Self> {
+        assert!(max_batch_size != 0);
         let mut kdf_nonce = 0;
         let kdf_salt: Salt = Salt::new(HKDF_SHA256, b"IRIS_MPC");
         let n_queries = max_batch_size * ROTATIONS;
@@ -1026,12 +1027,16 @@ impl ServerActor {
                 .map(|s| (s - DB_CHUNK_SIZE * db_chunk_idx).clamp(1, DB_CHUNK_SIZE))
                 .collect::<Vec<_>>();
 
-            // We need to pad the chunk size to be a multiple of 4, because the underlying
-            // `gemm_ex` expects this. We filter out potential "phantom matches"
-            // for the padded data in the `open` later.
+            // We need to pad the chunk size for two reasons:
+            // 1. Chunk size needs to be a multiple of 4, because the underlying
+            // `gemm_ex` expects this.
+            // 2. We are running into NCCL issues if the bytes sent/received are not a
+            //    multiple of 64.
+            // We filter out potential "phantom matches" for the padded data in the `open`
+            // later.
             let dot_chunk_size = chunk_size
                 .iter()
-                .map(|s| s.div_ceil(4) * 4)
+                .map(|s| s.div_ceil(64) * 64)
                 .collect::<Vec<_>>();
 
             // First stream doesn't need to wait