Skip to content

Commit

Permalink
Compute offsets for high-dimensional arrays within loop body.
Browse files Browse the repository at this point in the history
This eliminates the need to reason about incrementing offsets across loop
iterations, and it also makes it easier for the compiler to optimize via loop
unrolling if this is deemed to be beneficial.
  • Loading branch information
LTLA committed Nov 17, 2024
1 parent 6bf8ce1 commit 7c359b2
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 54 deletions.
28 changes: 11 additions & 17 deletions include/scran_markers/PrecomputedPairwiseWeights.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,27 @@ class PrecomputedPairwiseWeights {
my_ngroups(ngroups),
my_nblocks(nblocks)
{
size_t in_offset = 0;
for (size_t b = 0; b < nblocks; ++b, in_offset += ngroups) {
size_t out_offset1_raw = ngroups, out_offset2_raw = 1;

for (size_t g1 = 1; g1 < ngroups; ++g1, out_offset1_raw += ngroups, ++out_offset2_raw) {
auto w1 = combo_weights[in_offset + g1];
size_t out_offset1 = out_offset1_raw, out_offset2 = out_offset2_raw;

for (size_t g2 = 0; g2 < g1; ++g2, ++out_offset1, out_offset2 += ngroups) {
Weight_ combined = w1 * combo_weights[in_offset + g2];
for (size_t b = 0; b < nblocks; ++b) {
for (size_t g1 = 1; g1 < ngroups; ++g1) {
auto w1 = combo_weights[b * ngroups + g1 /* already size_t's */];
for (size_t g2 = 0; g2 < g1; ++g2) {
Weight_ combined = w1 * combo_weights[b * ngroups + g2 /* already size_t's */];

// Storing it as a 3D array where the blocks are the fastest changing,
// and then the two groups are the next fastest changing.
my_by_block[out_offset1 /* (g1 * ngroups + g2) */ * nblocks + b] = combined;
my_by_block[out_offset2 /* (g2 * ngroups + g1) */ * nblocks + b] = combined;
size_t out_offset1 = g1 * ngroups + g2;
my_by_block[out_offset1 * nblocks + b] = combined;
my_by_block[(g2 * ngroups + g1) * nblocks + b] = combined;

my_total[out_offset1] += combined;
}
}
}

// Filling the other side, for completeness.
size_t out_offset1_raw = ngroups, out_offset2_raw = 1;
for (size_t g1 = 1; g1 < ngroups; ++g1, out_offset1_raw += ngroups, ++out_offset2_raw) {
size_t out_offset1 = out_offset1_raw, out_offset2 = out_offset2_raw;
for (size_t g2 = 0; g2 < g1; ++g2, ++out_offset1, out_offset2 += ngroups) {
my_total[out_offset2] = my_total[out_offset1];
for (size_t g1 = 1; g1 < ngroups; ++g1) {
for (size_t g2 = 0; g2 < g1; ++g2) {
my_total[g2 * ngroups + g1] = my_total[g1 * ngroups + g2];
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions include/scran_markers/average_group_stats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ void average_group_stats(
gmean = 0;
gdet = 0;

size_t offset = g;
for (size_t b = 0; b < nblocks; ++b, offset += ngroups) { // remember, blocks are the slower changing dimension, so we need to jump by 'ngroups'.
for (size_t b = 0; b < nblocks; ++b) {
// Remember, blocks are the slower changing dimension, so we need to jump by 'ngroups'.
size_t offset = b * ngroups + g; // already size_t's.
const auto& curweight = combo_weights[offset];
if (curweight) {
gmean += curweight * tmp_means[offset];
Expand Down
19 changes: 9 additions & 10 deletions include/scran_markers/cohens_d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,13 @@ void compute_pairwise_cohens_d_internal(
if (total_weight != 0) {
total_weight = 0; // need to calculate it more dynamically if there are NaN variances.

size_t offset1 = g1, offset2 = g2; // no need to cast, everything's already a size_t.
for (size_t b = 0; b < nblocks; ++b, offset1 += ngroups, offset2 += ngroups) {
for (size_t b = 0; b < nblocks; ++b) {
auto weight = winfo.first[b];
if (weight) {
auto left_var = vars[offset1 /* == b * ngroups + g1 */];
auto right_var = vars[offset2 /* == b * ngroups + g2 */];
size_t offset1 = b * ngroups + g1; // no need to cast, everything's already a size_t.
size_t offset2 = b * ngroups + g2; // no need to cast, everything's already a size_t.
auto left_var = vars[offset1];
auto right_var = vars[offset2];
Stat_ denom = cohen_denominator(left_var, right_var);

if (!std::isnan(denom)) {
Expand Down Expand Up @@ -157,13 +158,11 @@ void compute_pairwise_cohens_d(
Stat_ threshold,
Stat_* output)
{
size_t offset1_raw = ngroups, offset2_raw = 1;
for (size_t g1 = 1; g1 < ngroups; ++g1, offset1_raw += ngroups, ++offset2_raw) {
auto offset1 = offset1_raw, offset2 = offset2_raw;
for (size_t g2 = 0; g2 < g1; ++g2, ++offset1, offset2 += ngroups) {
for (size_t g1 = 1; g1 < ngroups; ++g1) {
for (size_t g2 = 0; g2 < g1; ++g2) {
auto tmp = compute_pairwise_cohens_d_two_sided(g1, g2, means, vars, ngroups, nblocks, preweights, threshold);
output[offset1 /* == g1 * ngroups + g2 */] = tmp.first;
output[offset2 /* == g2 * ngroups + g1 */] = tmp.second;
output[g1 * ngroups + g2] = tmp.first;
output[g2 * ngroups + g1] = tmp.second;
}
}
}
Expand Down
19 changes: 9 additions & 10 deletions include/scran_markers/scan_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,30 +57,29 @@ void initialize_auc_workspace(
work.block_scale.reserve(nblocks);
work.full_weight.resize(ngroups2);
for (size_t b = 0; b < nblocks; ++b) {
size_t in_offset = b * ngroups;
work.block_scale.emplace_back(ngroups * ngroups);
work.block_scale.emplace_back(ngroups * ngroups /* already size_t's */);
auto& cur_scale = work.block_scale[b];
auto& cur_totals = work.block_totals[b];

size_t pair_offset1_raw = ngroups, pair_offset2_raw = 1;
for (size_t g1 = 1; g1 < ngroups; ++g1, pair_offset1_raw += ngroups, ++pair_offset2_raw) {
auto w1 = combo_weight[in_offset + g1];
for (size_t g1 = 1; g1 < ngroups; ++g1) {
auto w1 = combo_weight[b * ngroups + g1 /* already size_t's */];
Stat_ denom1 = cur_totals[g1];
auto pair_offset1 = pair_offset1_raw, pair_offset2 = pair_offset2_raw;

for (size_t g2 = 0; g2 < g1; ++g2, ++pair_offset1, pair_offset2 += ngroups) {
for (size_t g2 = 0; g2 < g1; ++g2) {
Stat_ block_denom = denom1 * static_cast<Stat_>(cur_totals[g2]);
if (block_denom == 0) {
continue;
}

Stat_ block_weight = w1 * combo_weight[in_offset + g2];
Stat_ block_weight = w1 * combo_weight[b * ngroups + g2 /* already size_t's */];
Stat_ block_scaling = block_denom / block_weight;

cur_scale[pair_offset1 /* = g1 * ngroups + g2 */] = block_scaling;
size_t pair_offset1 = g1 * ngroups + g2; // already size_t's.
cur_scale[pair_offset1] = block_scaling;
work.full_weight[pair_offset1] += block_weight;

cur_scale[pair_offset2 /* = g2 * ngroups + g1 */] = block_scaling;
size_t pair_offset2 = g2 * ngroups + g1; // already size_t's.
cur_scale[pair_offset2] = block_scaling;
work.full_weight[pair_offset2] += block_weight;
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/scran_markers/score_markers_pairwise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ void process_simple_pairwise_effects(
const auto* tmp_detected = combo_detected.data() + in_offset;

size_t squared = ngroups * ngroups;
size_t out_offset = start * squared;
for (size_t gene = start, end = start + length; gene < end; ++gene, out_offset += squared) {
for (size_t gene = start, end = start + length; gene < end; ++gene) {
average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);

// Computing the effect sizes.
size_t out_offset = gene * squared;
if (output.cohens_d != NULL) {
internal::compute_pairwise_cohens_d(tmp_means, tmp_variances, ngroups, nblocks, preweights, threshold, output.cohens_d + out_offset);
}
Expand Down
5 changes: 2 additions & 3 deletions include/scran_markers/score_markers_summary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,8 @@ class EffectsCacher {
// We thus transfer the cached vector to the full_set.
my_actions[other] = CacheAction::SKIP;
auto curcache = my_cached.begin()->second;
size_t offset = other;
for (size_t i = 0; i < my_ngenes; ++i, offset += my_ngroups) {
full_effects[offset /* = other + ngroups * i */] = curcache[i];
for (size_t i = 0; i < my_ngenes; ++i) {
full_effects[other + my_ngroups * i /* already size_t's */] = curcache[i];
}

my_unused_pool.push_back(curcache);
Expand Down
17 changes: 7 additions & 10 deletions include/scran_markers/simple_diff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ Stat_ compute_pairwise_simple_diff(size_t g1, size_t g2, const Stat_* values, si
}

Stat_ output = 0;
size_t offset1 = g1, offset2 = g2;
for (size_t b = 0; b < nblocks; ++b, offset1 += ngroups, offset2 += ngroups) {
for (size_t b = 0; b < nblocks; ++b) {
auto weight = winfo.first[b];
if (weight) {
auto left = values[offset1 /* == b * ngroups + g1 */];
auto right = values[offset2 /* == b * ngroups + g2 */];
auto left = values[b * ngroups + g1 /* already size_t's */];
auto right = values[b * ngroups + g2 /* already size_t's */];
output += (left - right) * weight;
}
}
Expand All @@ -35,13 +34,11 @@ Stat_ compute_pairwise_simple_diff(size_t g1, size_t g2, const Stat_* values, si

template<typename Stat_, typename Weight_>
void compute_pairwise_simple_diff(const Stat_* values, size_t ngroups, size_t nblocks, const PrecomputedPairwiseWeights<Weight_>& preweights, Stat_* output) {
size_t offset1_raw = ngroups, offset2_raw = 1;
for (size_t g1 = 1; g1 < ngroups; ++g1, offset1_raw += ngroups, ++offset2_raw) {
auto offset1 = offset1_raw, offset2 = offset2_raw;
for (size_t g2 = 0; g2 < g1; ++g2, ++offset1, offset2 += ngroups) {
for (size_t g1 = 1; g1 < ngroups; ++g1) {
for (size_t g2 = 0; g2 < g1; ++g2) {
auto d = compute_pairwise_simple_diff(g1, g2, values, ngroups, nblocks, preweights);
output[offset1 /* == g1 * ngroups + g2 */] = d;
output[offset2 /* == g2 * ngroups + g1 */] = -d;
output[g1 * ngroups + g2 /* already size_t's */] = d;
output[g2 * ngroups + g1 /* already size_t's */] = -d;
}
}
}
Expand Down

0 comments on commit 7c359b2

Please sign in to comment.