Compute offsets for high-dimensional arrays within loop body.

This eliminates the need to reason about incrementing offsets across loop iterations, and it also makes it easier for the compiler to optimize via loop unrolling if this is deemed to be beneficial.
libscran · Nov 17, 2024 · 7c359b2 · 7c359b2
1 parent 6bf8ce1
commit 7c359b2
Show file tree

Hide file tree

Showing 7 changed files with 43 additions and 54 deletions.
diff --git a/include/scran_markers/PrecomputedPairwiseWeights.hpp b/include/scran_markers/PrecomputedPairwiseWeights.hpp
@@ -18,33 +18,27 @@ class PrecomputedPairwiseWeights {
         my_ngroups(ngroups),
         my_nblocks(nblocks)
     {
-        size_t in_offset = 0;
-        for (size_t b = 0; b < nblocks; ++b, in_offset += ngroups) {
-            size_t out_offset1_raw = ngroups, out_offset2_raw = 1;
-
-            for (size_t g1 = 1; g1 < ngroups; ++g1, out_offset1_raw += ngroups, ++out_offset2_raw) {
-                auto w1 = combo_weights[in_offset + g1];
-                size_t out_offset1 = out_offset1_raw, out_offset2 = out_offset2_raw;
-
-                for (size_t g2 = 0; g2 < g1; ++g2, ++out_offset1, out_offset2 += ngroups) {
-                    Weight_ combined = w1 * combo_weights[in_offset + g2];
+        for (size_t b = 0; b < nblocks; ++b) {
+            for (size_t g1 = 1; g1 < ngroups; ++g1) {
+                auto w1 = combo_weights[b * ngroups + g1 /* already size_t's */];
+                for (size_t g2 = 0; g2 < g1; ++g2) {
+                    Weight_ combined = w1 * combo_weights[b * ngroups + g2 /* already size_t's */];
 
                     // Storing it as a 3D array where the blocks are the fastest changing, 
                     // and then the two groups are the next fastest changing.
-                    my_by_block[out_offset1 /* (g1 * ngroups + g2) */ * nblocks + b] = combined;
-                    my_by_block[out_offset2 /* (g2 * ngroups + g1) */ * nblocks + b] = combined;
+                    size_t out_offset1 = g1 * ngroups + g2;
+                    my_by_block[out_offset1 * nblocks + b] = combined;
+                    my_by_block[(g2 * ngroups + g1) * nblocks + b] = combined;
 
                     my_total[out_offset1] += combined;
                 }
             }
         }
 
         // Filling the other side, for completeness.
-        size_t out_offset1_raw = ngroups, out_offset2_raw = 1;
-        for (size_t g1 = 1; g1 < ngroups; ++g1, out_offset1_raw += ngroups, ++out_offset2_raw) {
-            size_t out_offset1 = out_offset1_raw, out_offset2 = out_offset2_raw;
-            for (size_t g2 = 0; g2 < g1; ++g2, ++out_offset1, out_offset2 += ngroups) {
-                my_total[out_offset2] = my_total[out_offset1];
+        for (size_t g1 = 1; g1 < ngroups; ++g1) {
+            for (size_t g2 = 0; g2 < g1; ++g2) {
+                my_total[g2 * ngroups + g1] = my_total[g1 * ngroups + g2];
             }
         }
     }

diff --git a/include/scran_markers/average_group_stats.hpp b/include/scran_markers/average_group_stats.hpp
@@ -46,8 +46,9 @@ void average_group_stats(
         gmean = 0;
         gdet = 0;
 
-        size_t offset = g;
-        for (size_t b = 0; b < nblocks; ++b, offset += ngroups) { // remember, blocks are the slower changing dimension, so we need to jump by 'ngroups'.
+        for (size_t b = 0; b < nblocks; ++b) {
+            // Remember, blocks are the slower changing dimension, so we need to jump by 'ngroups'.
+            size_t offset = b * ngroups + g; // already size_t's.
             const auto& curweight = combo_weights[offset];
             if (curweight) {
                 gmean += curweight * tmp_means[offset];

diff --git a/include/scran_markers/cohens_d.hpp b/include/scran_markers/cohens_d.hpp
@@ -66,12 +66,13 @@ void compute_pairwise_cohens_d_internal(
     if (total_weight != 0) {
         total_weight = 0; // need to calculate it more dynamically if there are NaN variances.
 
-        size_t offset1 = g1, offset2 = g2; // no need to cast, everything's already a size_t.
-        for (size_t b = 0; b < nblocks; ++b, offset1 += ngroups, offset2 += ngroups) {
+        for (size_t b = 0; b < nblocks; ++b) {
             auto weight = winfo.first[b];
             if (weight) {
-                auto left_var = vars[offset1 /* == b * ngroups + g1 */];
-                auto right_var = vars[offset2 /* == b * ngroups + g2 */];
+                size_t offset1 = b * ngroups + g1; // no need to cast, everything's already a size_t.
+                size_t offset2 = b * ngroups + g2; // no need to cast, everything's already a size_t.
+                auto left_var = vars[offset1];
+                auto right_var = vars[offset2];
                 Stat_ denom = cohen_denominator(left_var, right_var);
 
                 if (!std::isnan(denom)) {
@@ -157,13 +158,11 @@ void compute_pairwise_cohens_d(
     Stat_ threshold,
     Stat_* output)
 {
-    size_t offset1_raw = ngroups, offset2_raw = 1;
-    for (size_t g1 = 1; g1 < ngroups; ++g1, offset1_raw += ngroups, ++offset2_raw) {
-        auto offset1 = offset1_raw, offset2 = offset2_raw;
-        for (size_t g2 = 0; g2 < g1; ++g2, ++offset1, offset2 += ngroups) {
+    for (size_t g1 = 1; g1 < ngroups; ++g1) {
+        for (size_t g2 = 0; g2 < g1; ++g2) {
             auto tmp = compute_pairwise_cohens_d_two_sided(g1, g2, means, vars, ngroups, nblocks, preweights, threshold);
-            output[offset1 /* == g1 * ngroups + g2 */] = tmp.first;
-            output[offset2 /* == g2 * ngroups + g1 */] = tmp.second;
+            output[g1 * ngroups + g2] = tmp.first;
+            output[g2 * ngroups + g1] = tmp.second;
         }
     }
 }

diff --git a/include/scran_markers/scan_matrix.hpp b/include/scran_markers/scan_matrix.hpp
@@ -57,30 +57,29 @@ void initialize_auc_workspace(
     work.block_scale.reserve(nblocks);
     work.full_weight.resize(ngroups2);
     for (size_t b = 0; b < nblocks; ++b) {
-        size_t in_offset = b * ngroups;
-        work.block_scale.emplace_back(ngroups * ngroups);
+        work.block_scale.emplace_back(ngroups * ngroups /* already size_t's */);
         auto& cur_scale = work.block_scale[b];
         auto& cur_totals = work.block_totals[b];
 
-        size_t pair_offset1_raw = ngroups, pair_offset2_raw = 1;
-        for (size_t g1 = 1; g1 < ngroups; ++g1, pair_offset1_raw += ngroups, ++pair_offset2_raw) {
-            auto w1 = combo_weight[in_offset + g1];
+        for (size_t g1 = 1; g1 < ngroups; ++g1) {
+            auto w1 = combo_weight[b * ngroups + g1 /* already size_t's */];
             Stat_ denom1 = cur_totals[g1];
-            auto pair_offset1 = pair_offset1_raw, pair_offset2 = pair_offset2_raw;
 
-            for (size_t g2 = 0; g2 < g1; ++g2, ++pair_offset1, pair_offset2 += ngroups) {
+            for (size_t g2 = 0; g2 < g1; ++g2) {
                 Stat_ block_denom = denom1 * static_cast<Stat_>(cur_totals[g2]);
                 if (block_denom == 0) {
                     continue;
                 }
 
-                Stat_ block_weight = w1 * combo_weight[in_offset + g2];
+                Stat_ block_weight = w1 * combo_weight[b * ngroups + g2 /* already size_t's */];
                 Stat_ block_scaling = block_denom / block_weight;
 
-                cur_scale[pair_offset1 /* = g1 * ngroups + g2 */] = block_scaling;
+                size_t pair_offset1 = g1 * ngroups + g2; // already size_t's.
+                cur_scale[pair_offset1] = block_scaling;
                 work.full_weight[pair_offset1] += block_weight;
 
-                cur_scale[pair_offset2 /* = g2 * ngroups + g1 */] = block_scaling;
+                size_t pair_offset2 = g2 * ngroups + g1; // already size_t's.
+                cur_scale[pair_offset2] = block_scaling;
                 work.full_weight[pair_offset2] += block_weight;
             }
         }

diff --git a/include/scran_markers/score_markers_pairwise.hpp b/include/scran_markers/score_markers_pairwise.hpp
@@ -217,11 +217,11 @@ void process_simple_pairwise_effects(
         const auto* tmp_detected = combo_detected.data() + in_offset;
 
         size_t squared = ngroups * ngroups;
-        size_t out_offset = start * squared;
-        for (size_t gene = start, end = start + length; gene < end; ++gene, out_offset += squared) {
+        for (size_t gene = start, end = start + length; gene < end; ++gene) {
             average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
 
             // Computing the effect sizes.
+            size_t out_offset = gene * squared;
             if (output.cohens_d != NULL) {
                 internal::compute_pairwise_cohens_d(tmp_means, tmp_variances, ngroups, nblocks, preweights, threshold, output.cohens_d + out_offset);
             }

diff --git a/include/scran_markers/score_markers_summary.hpp b/include/scran_markers/score_markers_summary.hpp
@@ -349,9 +349,8 @@ class EffectsCacher {
             // We thus transfer the cached vector to the full_set.
             my_actions[other] = CacheAction::SKIP;
             auto curcache = my_cached.begin()->second;
-            size_t offset = other;
-            for (size_t i = 0; i < my_ngenes; ++i, offset += my_ngroups) {
-                full_effects[offset /* = other + ngroups * i */] = curcache[i];
+            for (size_t i = 0; i < my_ngenes; ++i) {
+                full_effects[other + my_ngroups * i /* already size_t's */] = curcache[i];
             }
 
             my_unused_pool.push_back(curcache);

diff --git a/include/scran_markers/simple_diff.hpp b/include/scran_markers/simple_diff.hpp
@@ -20,12 +20,11 @@ Stat_ compute_pairwise_simple_diff(size_t g1, size_t g2, const Stat_* values, si
     }
 
     Stat_ output = 0;
-    size_t offset1 = g1, offset2 = g2; 
-    for (size_t b = 0; b < nblocks; ++b, offset1 += ngroups, offset2 += ngroups) {
+    for (size_t b = 0; b < nblocks; ++b) {
         auto weight = winfo.first[b];
         if (weight) {
-            auto left = values[offset1 /* == b * ngroups + g1 */];
-            auto right = values[offset2 /* == b * ngroups + g2 */]; 
+            auto left = values[b * ngroups + g1 /* already size_t's */];
+            auto right = values[b * ngroups + g2 /* already size_t's */]; 
             output += (left - right) * weight;
         }
     }
@@ -35,13 +34,11 @@ Stat_ compute_pairwise_simple_diff(size_t g1, size_t g2, const Stat_* values, si
 
 template<typename Stat_, typename Weight_>
 void compute_pairwise_simple_diff(const Stat_* values, size_t ngroups, size_t nblocks, const PrecomputedPairwiseWeights<Weight_>& preweights, Stat_* output) {
-    size_t offset1_raw = ngroups, offset2_raw = 1;
-    for (size_t g1 = 1; g1 < ngroups; ++g1, offset1_raw += ngroups, ++offset2_raw) {
-        auto offset1 = offset1_raw, offset2 = offset2_raw;
-        for (size_t g2 = 0; g2 < g1; ++g2, ++offset1, offset2 += ngroups) {
+    for (size_t g1 = 1; g1 < ngroups; ++g1) {
+        for (size_t g2 = 0; g2 < g1; ++g2) {
             auto d = compute_pairwise_simple_diff(g1, g2, values, ngroups, nblocks, preweights);
-            output[offset1 /* == g1 * ngroups + g2 */] = d;
-            output[offset2 /* == g2 * ngroups + g1 */] = -d;
+            output[g1 * ngroups + g2 /* already size_t's */] = d;
+            output[g2 * ngroups + g1 /* already size_t's */] = -d;
         }
     }
 }