From 1ec3008b119893e90f67a3c37619af9f492017b2 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 9 Nov 2023 19:02:42 +0100 Subject: [PATCH] Select product of slices. --- include/highfive/bits/H5Slice_traits.hpp | 67 +++++++ include/highfive/bits/H5Slice_traits_misc.hpp | 152 ++++++++++++++++ src/examples/select_slices.cpp | 131 ++++++++++++++ tests/unit/tests_high_five.hpp | 2 +- tests/unit/tests_high_five_base.cpp | 167 ++++++++++++++++++ 5 files changed, 518 insertions(+), 1 deletion(-) create mode 100644 src/examples/select_slices.cpp diff --git a/include/highfive/bits/H5Slice_traits.hpp b/include/highfive/bits/H5Slice_traits.hpp index 52c52713f..c21ecfc86 100644 --- a/include/highfive/bits/H5Slice_traits.hpp +++ b/include/highfive/bits/H5Slice_traits.hpp @@ -245,6 +245,71 @@ class HyperSlab { std::vector selects; }; +/// +/// \brief Selects the Cartesian product of slices. +/// +/// Given a one-dimensional dataset one might want to select the union of +/// multiple, non-overlapping slices. For example, +/// +/// using Slice = std::array; +/// using Slices = std::vector; +/// auto slices = Slices{{0, 2}, {4, 10}}; +/// dset.select(ProductSet(slices); +/// +/// to select elements `0`, `1` and `4`, ..., `9` (inclusive). +/// +/// For a two-dimensional array one which to select the row specified above, +/// but only columns `2`, `3` and `4`: +/// +/// dset.select(ProductSet(slices, Slice{2, 5})); +/// // Analogues with the roles of columns and rows reversed: +/// dset.select(ProductSet(Slice{2, 5}, slices)); +/// +/// One can generalize once more and allow the unions of slices in both x- and +/// y-dimension: +/// +/// auto yslices = Slices{{1, 5}, {7, 8}}; +/// auto xslices = Slices{{0, 3}, {6, 8}}; +/// dset.select(ProductSet(yslices, xslices)); +/// +/// which selects the following from a 11x8 dataset: +/// +/// . . . . . . . . +/// x x x . . . x x +/// x x x . . . x x +/// x x x . . . x x +/// x x x . . . x x +/// . . . . . . . . +/// . . . . . . . . +/// x x x . . . x x +/// . . . . . . . . +/// . . . . . . . . +/// . . . . . . . . +/// +/// Final twist, the selection along one axis may be a point set, from which a +/// vector of, possibly single-element, slices can be constructed. +/// +/// The solution works analogous in higher dimensions. A selection `sk` along +/// axis `k` can be interpreted as a subset `S_k` of the natural numbers. The +/// index `i` is in `S_k` if it's selected by `sk`. The `ProductSet` of `s0`, +/// ..., `sN` selects the Cartesian product `S_0 x ... x S_N`. +/// +/// Note that the selections along each axis must be sorted and non-overlapping. +/// +class ProductSet { + public: + template + explicit ProductSet(const Slices&... slices); + + private: + HyperSlab slab; + std::vector shape; + + template + friend class SliceTraits; +}; + + template class SliceTraits { public: @@ -291,6 +356,8 @@ class SliceTraits { /// Selection select(const ElementSet& elements) const; + Selection select(const ProductSet& product_set) const; + template T read(const DataTransferProps& xfer_props = DataTransferProps()) const; diff --git a/include/highfive/bits/H5Slice_traits_misc.hpp b/include/highfive/bits/H5Slice_traits_misc.hpp index 7b07c9abf..01040cf87 100644 --- a/include/highfive/bits/H5Slice_traits_misc.hpp +++ b/include/highfive/bits/H5Slice_traits_misc.hpp @@ -63,6 +63,153 @@ inline ElementSet::ElementSet(const std::vector>& eleme } } +namespace detail { +class HyperCube { + public: + HyperCube(size_t rank) + : offset(rank) + , count(rank) {} + + void cross(const std::array& range, size_t axis) { + offset[axis] = range[0]; + count[axis] = range[1] - range[0]; + } + + RegularHyperSlab asSlab() { + return RegularHyperSlab(offset, count); + } + + private: + std::vector offset; + std::vector count; +}; + +inline void build_hyper_slab(HyperSlab& slab, size_t /* axis */, HyperCube& cube) { + slab |= cube.asSlab(); +} + +template +inline void build_hyper_slab(HyperSlab& slab, + size_t axis, + HyperCube& cube, + const std::array& slice, + const Slices&... higher_slices) { + cube.cross(slice, axis); + build_hyper_slab(slab, axis + 1, cube, higher_slices...); +} + +template +inline void build_hyper_slab(HyperSlab& slab, + size_t axis, + HyperCube& cube, + const std::vector>& slices, + const Slices&... higher_slices) { + for (const auto& slice: slices) { + build_hyper_slab(slab, axis, cube, slice, higher_slices...); + } +} + +template +inline void build_hyper_slab(HyperSlab& slab, + size_t axis, + HyperCube& cube, + const std::vector& ids, + const Slices&... higher_slices) { + for (const auto& id: ids) { + auto slice = std::array{id, id + 1}; + build_hyper_slab(slab, axis, cube, slice, higher_slices...); + } +} + +template +inline void build_hyper_slab(HyperSlab& slab, + size_t axis, + HyperCube& cube, + size_t id, + const Slices&... higher_slices) { + auto slice = std::array{id, id + 1}; + build_hyper_slab(slab, axis, cube, slice, higher_slices...); +} + +inline void compute_squashed_shape(size_t /* axis */, std::vector& /* shape */) { + // assert(axis == shape.size()); +} + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::array& slice, + const Slices&... higher_slices); + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::vector& points, + const Slices&... higher_slices); + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + size_t point, + const Slices&... higher_slices); + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::vector>& slices, + const Slices&... higher_slices); + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::array& slice, + const Slices&... higher_slices) { + shape[axis] = slice[1] - slice[0]; + compute_squashed_shape(axis + 1, shape, higher_slices...); +} + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::vector& points, + const Slices&... higher_slices) { + shape[axis] = points.size(); + compute_squashed_shape(axis + 1, shape, higher_slices...); +} + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + const std::vector>& slices, + const Slices&... higher_slices) { + shape[axis] = 0; + for (const auto& slice: slices) { + shape[axis] += slice[1] - slice[0]; + } + compute_squashed_shape(axis + 1, shape, higher_slices...); +} + +template +inline void compute_squashed_shape(size_t axis, + std::vector& shape, + size_t /* point */, + const Slices&... higher_slices) { + shape[axis] = 1; + compute_squashed_shape(axis + 1, shape, higher_slices...); +} +} // namespace detail + +template +inline ProductSet::ProductSet(const Slices&... slices) { + auto rank = sizeof...(slices); + detail::HyperCube cube(rank); + detail::build_hyper_slab(slab, 0, cube, slices...); + + shape = std::vector(rank, size_t(0)); + detail::compute_squashed_shape(0, shape, slices...); +} + + template inline Selection SliceTraits::select(const HyperSlab& hyperslab, const DataSpace& memspace) const { @@ -156,6 +303,11 @@ inline Selection SliceTraits::select(const ElementSet& elements) const return detail::make_selection(DataSpace(num_elements), space, details::get_dataset(slice)); } +template +inline Selection SliceTraits::select(const ProductSet& product_set) const { + return this->select(product_set.slab, DataSpace(product_set.shape)); +} + template template diff --git a/src/examples/select_slices.cpp b/src/examples/select_slices.cpp new file mode 100644 index 000000000..ea428fa3f --- /dev/null +++ b/src/examples/select_slices.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +#include + +void print_mask(const std::vector>& values, + const std::vector>& selected); + +void print_values(const std::vector>& values); + +void pretty_print(const std::vector>& values, + const std::vector>& selected); + +int main(void) { + using namespace HighFive; + using container_type = std::vector>; + + const std::string file_name("select_slices.h5"); + const std::string dataset_name("dset"); + + // Create a new file using the default property lists. + File file(file_name, File::Truncate); + + // we have some example values in a 2D vector 2x5 + // clang-format off + container_type values = { + {1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7}, + {2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7}, + {3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7}, + {4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7}, + {5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7}, + {6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7}, + {7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7}, + {8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7}, + {9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7} + }; + // clang-format on + + auto dset = file.createDataSet(dataset_name, values); + + // Let's start with the selection `values[1:4, 2:4]`. + { + auto xslice = std::array{2, 4}; + auto yslice = std::array{1, 4}; + + auto selected = dset.select(ProductSet(yslice, xslice)).read(); + std::cout << " -- values[1:4, 2:4] ------------ \n"; + pretty_print(values, selected); + } + + // Now we'd like the selection `values[[1,2,8], 2:4]`. + { + auto xslice = std::array{2, 4}; + auto yslice = std::vector{1, 2, 8}; + + auto selected = dset.select(ProductSet(yslice, xslice)).read(); + std::cout << "\n -- values[[1,2,8], 2:4] -------- \n"; + pretty_print(values, selected); + } + + // ... or the union of multiple slices. + { + auto xslice = std::array{2, 4}; + auto yslice = std::vector>{{0, 2}, {5, 9}}; + + auto selected = dset.select(ProductSet(yslice, xslice)).read(); + std::cout << "\n -- values[[0:2, 5:10], 2:4] -------- \n"; + pretty_print(values, selected); + } + + // Similar for the union of multiple slices in both x- and y-direction. + { + auto xslice = std::vector>{{0, 1}, {2, 4}, {6, 7}}; + auto yslice = std::vector>{{0, 2}, {5, 9}}; + + auto selected = dset.select(ProductSet(yslice, xslice)).read(); + std::cout << "\n -- values[[0:2, 5:10], [0:1, 2:4, 6:7]] -------- \n"; + pretty_print(values, selected); + } + + // If only a single row/column is need use the following. However, + // selecting elements one-by-one in a loop can be a serious performance + // issue. + { + auto xslice = std::vector>{{0, 1}, {2, 4}, {6, 7}}; + auto row_id = 3; + + auto selected = dset.select(ProductSet(row_id, xslice)).read(); + std::cout << "\n -- values[3, [0:1, 2:4, 6:7]] -------- \n"; + pretty_print(values, selected); + } + + return 0; +} + +void print_mask(const std::vector>& values, + const std::vector>& selected) { + std::vector flat_selection; + for (const auto& row: selected) { + for (const auto& x: row) { + flat_selection.push_back(x); + } + } + + for (const auto& row: values) { + for (const auto& x: row) { + auto found = std::find(flat_selection.begin(), flat_selection.end(), x) != + flat_selection.end(); + std::cout << (found ? "x" : ".") << " "; + } + std::cout << "\n"; + } +} + +void print_values(const std::vector>& values) { + for (const auto& row: values) { + for (const auto& x: row) { + std::cout << x << " "; + } + std::cout << "\n"; + } +} + +void pretty_print(const std::vector>& values, + const std::vector>& selected) { + print_mask(values, selected); + std::cout << "\n"; + print_values(selected); +} diff --git a/tests/unit/tests_high_five.hpp b/tests/unit/tests_high_five.hpp index 0ebd58c44..afc2420da 100644 --- a/tests/unit/tests_high_five.hpp +++ b/tests/unit/tests_high_five.hpp @@ -151,7 +151,7 @@ struct ContentGenerate { ContentGenerate gen; std::string random_string; std::mt19937_64 rgen; - rgen.seed(88); + rgen.seed(42); std::uniform_int_distribution int_dist(0, 1000); const size_t size_string = int_dist(rgen); diff --git a/tests/unit/tests_high_five_base.cpp b/tests/unit/tests_high_five_base.cpp index 899170d93..626ac1e7a 100644 --- a/tests/unit/tests_high_five_base.cpp +++ b/tests/unit/tests_high_five_base.cpp @@ -1617,6 +1617,173 @@ TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::t irregularHyperSlabSelectionWriteTest(); } +// Ensure that "all" combinations of `ProductSet` are being instantiated. +class CompileProductSet { + using Point = size_t; + using Points = std::vector; + using Slice = std::array; + using Slices = std::vector; + + void all() { + one_arg(); + two_args(); + three_args(); + } + + template + void zero_args() { + ProductSet(Preamble()...); + } + + template + void one_arg() { + zero_args(); + zero_args(); + zero_args(); + zero_args(); + } + + template + void two_args() { + one_arg(); + one_arg(); + one_arg(); + one_arg(); + } + + template + void three_args() { + two_args(); + two_args(); + two_args(); + two_args(); + } +}; + +template +void check_product_set_shape(const S& subarray, const Y& yslices, const X& xslices) { + std::vector subshape{0, 0}; + + for (auto yslice: yslices) { + subshape[0] += yslice[1] - yslice[0]; + } + + for (auto xslice: xslices) { + subshape[1] += xslice[1] - xslice[0]; + } + + REQUIRE(subarray.size() == subshape[0]); + for (const auto& v: subarray) { + REQUIRE(v.size() == subshape[1]); + } +} + +template +void check_product_set_values(const S& array, + const S& subarray, + const Y& yslices, + const X& xslices) { + size_t il = 0; + for (const auto& yslice: yslices) { + for (size_t ig = yslice[0]; ig < yslice[1]; ++ig) { + size_t jl = 0; + + for (const auto& xslice: xslices) { + for (size_t jg = xslice[0]; jg < xslice[1]; ++jg) { + REQUIRE(subarray[il][jl] == array[ig][jg]); + ++jl; + } + } + ++il; + } + } +} + +template +void check(const S& array, const S& subarray, const Y& yslices, const X& xslices) { + check_product_set_shape(subarray, yslices, xslices); + check_product_set_values(array, subarray, yslices, xslices); +}; + + +TEST_CASE("productSet") { + using Slice = std::array; + using Slices = std::vector; + using Point = size_t; + using Points = std::vector; + + const std::string file_name("h5_test_product_set.h5"); + + auto generate = [](size_t n, size_t m, auto f) { + auto x = std::vector>(n); + for (size_t i = 0; i < n; ++i) { + x[i] = std::vector(m); + } + + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < m; ++j) { + x[i][j] = f(i, j); + } + } + + return x; + }; + + auto array = generate(6, 12, [](size_t i, size_t j) { return double(i) + double(j) * 0.01; }); + + auto file = File(file_name, File::Truncate); + auto dset = file.createDataSet("dset", array); + + SECTION("rR") { + std::vector> subarray; + + auto yslice = Slice{1, 3}; + auto yslices = Slices{yslice}; + auto xslices = Slices{{0, 1}, {3, 5}}; + + dset.select(ProductSet(yslice, xslices)).read(subarray); + + check(array, subarray, yslices, xslices); + } + + SECTION("Rr") { + std::vector> subarray; + + auto yslices = Slices{{0, 1}, {3, 5}}; + auto xslice = Slice{1, 3}; + auto xslices = Slices{xslice}; + + dset.select(ProductSet(yslices, xslice)).read(subarray); + + check(array, subarray, yslices, xslices); + } + + SECTION("RP") { + std::vector> subarray; + + auto yslices = Slices{{0, 1}, {3, 5}}; + auto xpoints = Points{2, 4, 5}; + auto xslices = Slices{{2, 3}, {4, 6}}; + + dset.select(ProductSet(yslices, xpoints)).read(subarray); + + check(array, subarray, yslices, xslices); + } + + SECTION("pR") { + std::vector> subarray; + + auto ypoint = Point{2}; + auto yslices = Slices{{2, 3}}; + auto xslices = Slices{{0, 1}, {3, 5}}; + + dset.select(ProductSet(ypoint, xslices)).read(subarray); + + check(array, subarray, yslices, xslices); + } +} + + template void attribute_scalar_rw() { std::ostringstream filename;