From e7cf94a1d126f150a90b61760bd9564c95fbfc56 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 25 Jul 2024 14:25:54 +0200 Subject: [PATCH] Optimize chained hyperslab selection. A common pattern for creating semi-unstructured selection is to use many (small) RegularHyperSlab and chain them: ``` HyperSlab hyperslab; for(auto slab : regular_hyper_slabs) { hyperslab |= slab; } ``` This eventually triggers calling: ``` for(auto slab : regular_hyper_slabs) { auto [offset, stride, counts, blocks] = slab; H5Sselect_hyperslab(space_id, offset, stride, counts, block); } ``` Measurements show that this has runtime that's quadratic in the number of regular hyper slabs. This starts becoming prohibitive at 10k - 40k slabs. We noticed that `H5Scombine_select` does not suffer from the same performance issue. This allows us to optimize (long) chain of `Op::Or` using divide and conquer. The current implementation only optimizes streaks of `Op::Or`. --- include/highfive/bits/H5Slice_traits.hpp | 84 +++++++++++++++++++++--- tests/unit/test_high_five_selection.cpp | 58 ++++++++++++++++ 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/include/highfive/bits/H5Slice_traits.hpp b/include/highfive/bits/H5Slice_traits.hpp index 6812a0914..a7202de07 100644 --- a/include/highfive/bits/H5Slice_traits.hpp +++ b/include/highfive/bits/H5Slice_traits.hpp @@ -163,16 +163,27 @@ class HyperSlab { DataSpace apply(const DataSpace& space_) const { auto space = space_.clone(); - for (const auto& sel: selects) { - if (sel.op == Op::None) { + auto n_selects = selects.size(); + for (size_t i = 0; i < n_selects; ++i) { + auto begin = selects.data() + i; + auto end = selects.data() + n_selects; + + auto n_ors = detect_ors(begin, end); + + if (n_ors > 1) { + auto right_space = reduce_ors(space_, begin, begin + n_ors); + // Since HDF5 doesn't allow `combine_selections` with a None + // selection, we need to avoid the issue: + if (H5Sget_select_type(space.getId()) == H5S_SEL_NONE) { + space = right_space; + } else { + space = combine_selections(space, Op::Or, right_space); + } + i += n_ors - 1; + } else if (selects[i].op == Op::None) { detail::h5s_select_none(space.getId()); } else { - detail::h5s_select_hyperslab(space.getId(), - convert(sel.op), - sel.offset.empty() ? nullptr : sel.offset.data(), - sel.stride.empty() ? nullptr : sel.stride.data(), - sel.count.empty() ? nullptr : sel.count.data(), - sel.block.empty() ? nullptr : sel.block.data()); + select_hyperslab(space, selects[i]); } } return space; @@ -229,6 +240,63 @@ class HyperSlab { }; std::vector selects; + + protected: + size_t detect_ors(Select_ const* begin, Select_ const* end) const { + size_t streak = 0; + for (Select_ const* it = begin; it != end; ++it) { + if (it->op == Op::Or) { + ++streak; + } else { + break; + } + } + + return streak; + } + + DataSpace select_none(const DataSpace& outer_space) const { + auto space = outer_space.clone(); + detail::h5s_select_none(space.getId()); + return space; + } + + void select_hyperslab(DataSpace& space, const Select_& sel) const { + detail::h5s_select_hyperslab(space.getId(), + convert(sel.op), + sel.offset.empty() ? nullptr : sel.offset.data(), + sel.stride.empty() ? nullptr : sel.stride.data(), + sel.count.empty() ? nullptr : sel.count.data(), + sel.block.empty() ? nullptr : sel.block.data()); + } + + DataSpace combine_selections(const DataSpace& left_space, + Op op, + const DataSpace& right_space) const { + return detail::make_data_space( + H5Scombine_select(left_space.getId(), convert(op), right_space.getId())); + } + + DataSpace reduce_ors(const DataSpace& outer_space, + Select_ const* begin, + Select_ const* end) const { + if (begin == end) { + throw std::runtime_error("Broken logic."); + } + + std::ptrdiff_t distance = end - begin; + if (distance == 1) { + auto space = select_none(outer_space); + select_hyperslab(space, *begin); + return space; + } + + Select_ const* mid = begin + distance / 2; + auto right_space = reduce_ors(outer_space, begin, mid); + auto left_space = reduce_ors(outer_space, mid, end); + + return combine_selections(left_space, Op::Or, right_space); + } }; /// diff --git a/tests/unit/test_high_five_selection.cpp b/tests/unit/test_high_five_selection.cpp index e3b91e4cc..1979f3ca0 100644 --- a/tests/unit/test_high_five_selection.cpp +++ b/tests/unit/test_high_five_selection.cpp @@ -25,6 +25,7 @@ #include #include "tests_high_five.hpp" +#include "data_generator.hpp" using namespace HighFive; using Catch::Matchers::Equals; @@ -534,3 +535,60 @@ void irregularHyperSlabSelectionWriteTest() { TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::tuple) { irregularHyperSlabSelectionWriteTest(); } + +TEST_CASE("select_multiple_ors", "[hyperslab]") { + size_t n = 100, m = 20; + size_t nsel = 30; + auto x = testing::DataGenerator>>::create({n, m}); + + auto file = File("select_multiple_ors.h5", File::Truncate); + auto dset = file.createDataSet("x", x); + + std::vector> indices; + auto hyperslab = HyperSlab(); + for (size_t i = 0; i < nsel; ++i) { + std::vector offsets{i, i % 10}; + std::vector counts{1, 3}; + hyperslab |= RegularHyperSlab(offsets, counts); + + for (size_t k = 0; k < counts[1]; ++k) { + indices.push_back({offsets[0], offsets[1] + k}); + } + } + + SECTION("Pure Or Chain") { + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = indices[k][0]; + size_t j = indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } + + SECTION("Or Chain And Slab") { + std::cout << " ................... " << std::endl; + std::vector offsets{5, 2}; + std::vector counts{85, 12}; + + std::vector> selected_indices; + for (const auto ij: indices) { + std::array ij_max = {offsets[0] + counts[0], offsets[1] + counts[1]}; + + if (offsets[0] <= ij[0] && ij[0] < ij_max[0] && offsets[1] <= ij[1] && + ij[1] < ij_max[1]) { + selected_indices.push_back(ij); + } + } + + hyperslab &= RegularHyperSlab(offsets, counts); + + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == selected_indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = selected_indices[k][0]; + size_t j = selected_indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } +}