diff --git a/src/hats/pixel_math/sparse_histogram.py b/src/hats/pixel_math/sparse_histogram.py index bc07e4f3..49322065 100644 --- a/src/hats/pixel_math/sparse_histogram.py +++ b/src/hats/pixel_math/sparse_histogram.py @@ -1,34 +1,29 @@ """Sparse 1-D histogram of healpix pixel counts.""" import numpy as np -from scipy.sparse import csc_array, load_npz, save_npz, sparray import hats.pixel_math.healpix_shim as hp class SparseHistogram: - """Wrapper around scipy's sparse array.""" + """Wrapper around a naive sparse array, that is just non-zero indexes and counts. - def __init__(self, sparse_array): - if not isinstance(sparse_array, sparray): - raise ValueError("The sparse array must be a scipy sparse array.") - if sparse_array.format != "csc": - raise ValueError("The sparse array must be a Compressed Sparse Column array.") - self.sparse_array = sparse_array + e.g. for a dense 1-d numpy histogram of order 0, you might see:: - def add(self, other): - """Add in another sparse histogram, updating this wrapper's array. + [0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0] - Args: - other (SparseHistogram): the wrapper containing the addend - """ - if not isinstance(other, SparseHistogram): - raise ValueError("Both addends should be SparseHistogram.") - if self.sparse_array.shape != other.sparse_array.shape: - raise ValueError( - "The histogram partials have incompatible sizes due to different healpix orders." - ) - self.sparse_array += other.sparse_array + There are only elements at [1, 8], and they have respective values [4, 9]. You + would create the sparse histogram like:: + + SparseHistogram([1, 8], [4, 9], 0) + """ + + def __init__(self, indexes, counts, order): + if len(indexes) != len(counts): + raise ValueError("indexes and counts must be same length") + self.indexes = indexes + self.counts = counts + self.order = order def to_array(self): """Convert the sparse array to a dense numpy array. @@ -36,7 +31,9 @@ def to_array(self): Returns: dense 1-d numpy array. """ - return self.sparse_array.toarray()[0] + dense = np.zeros(hp.order2npix(self.order), dtype=np.int64) + dense[self.indexes] = self.counts + return dense def to_file(self, file_name): """Persist the sparse array to disk. @@ -44,7 +41,7 @@ def to_file(self, file_name): NB: this saves as a sparse array, and so will likely have lower space requirements than saving the corresponding dense 1-d numpy array. """ - save_npz(file_name, self.sparse_array) + np.savez(file_name, indexes=self.indexes, counts=self.counts, order=self.order) def to_dense_file(self, file_name): """Persist the DENSE array to disk as a numpy array.""" @@ -52,50 +49,41 @@ def to_dense_file(self, file_name): file_handle.write(self.to_array().data) @classmethod - def make_empty(cls, healpix_order=10): - """Create an empty sparse array for a given healpix order. - - Args: - healpix_order (int): healpix order + def from_file(cls, file_name): + """Read sparse histogram from a file. Returns: new sparse histogram """ - histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64) - return cls(histo) - - @classmethod - def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10): - """Create an sparse array for a given healpix order, prefilled with counts at - the provided indexes. + npzfile = np.load(file_name) + return cls(npzfile["indexes"], npzfile["counts"], npzfile["order"]) - e.g. for a dense 1-d numpy histogram of order 0, you might see:: - [0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0] +class HistogramAggregator: + """Utility for aggregating sparse histograms.""" - There are only elements at [1, 8], and they have respective values [4, 9]. You - would create the sparse histogram like:: + def __init__(self, order): + self.order = order + self.full_histogram = np.zeros(hp.order2npix(order), dtype=np.int64) - make_from_counts([1, 8], [4, 9], 0) + def add(self, other): + """Add in another sparse histogram, updating this wrapper's array. Args: - indexes (int[]): index locations of non-zero values - counts_at_indexes (int[]): values at the ``indexes`` - healpix_order (int): healpix order - - Returns: - new sparse histogram - """ - row = np.array(np.zeros(len(indexes), dtype=np.int64)) - histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order))) - return cls(histo) - - @classmethod - def from_file(cls, file_name): - """Read sparse histogram from a file. - - Returns: - new sparse histogram + other (SparseHistogram): the wrapper containing the addend """ - histo = load_npz(file_name) - return cls(histo) + if not isinstance(other, SparseHistogram): + raise ValueError("Both addends should be SparseHistogram.") + if self.order != other.order: + raise ValueError( + "The histogram partials have incompatible sizes due to different healpix orders." + ) + if len(other.indexes) == 0: + return + self.full_histogram[other.indexes] += other.counts + + def to_sparse(self): + """Return a SparseHistogram, based on non-zero values in this aggregation.""" + indexes = self.full_histogram.nonzero()[0] + counts = self.full_histogram[indexes] + return SparseHistogram(indexes, counts, self.order) diff --git a/tests/hats/pixel_math/test_sparse_histogram.py b/tests/hats/pixel_math/test_sparse_histogram.py index a2598417..fc9a8291 100644 --- a/tests/hats/pixel_math/test_sparse_histogram.py +++ b/tests/hats/pixel_math/test_sparse_histogram.py @@ -4,22 +4,21 @@ import numpy.testing as npt import pytest from numpy import frombuffer -from scipy.sparse import csr_array import hats.pixel_math.healpix_shim as hp -from hats.pixel_math.sparse_histogram import SparseHistogram +from hats.pixel_math.sparse_histogram import HistogramAggregator, SparseHistogram def test_make_empty(): """Tests the initialization of an empty histogram at the specified order""" - histogram = SparseHistogram.make_empty(5) + histogram = SparseHistogram([], [], 5) expected_hist = np.zeros(hp.order2npix(5)) npt.assert_array_equal(expected_hist, histogram.to_array()) def test_read_write_round_trip(tmp_path): """Test that we can read what we write into a histogram file.""" - histogram = SparseHistogram.make_from_counts([11], [131], 0) + histogram = SparseHistogram([11], [131], 0) # Write as a sparse array file_name = tmp_path / "round_trip_sparse.npz" @@ -38,43 +37,42 @@ def test_read_write_round_trip(tmp_path): def test_add_same_order(): """Test that we can add two histograms created from the same order, and get the expected results.""" - partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0) + partial_histogram_left = SparseHistogram([11], [131], 0) - partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0) + partial_histogram_right = SparseHistogram([10, 11], [4, 15], 0) - partial_histogram_left.add(partial_histogram_right) + total_histogram = HistogramAggregator(0) + total_histogram.add(partial_histogram_left) + total_histogram.add(partial_histogram_right) expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146] - npt.assert_array_equal(partial_histogram_left.to_array(), expected) + npt.assert_array_equal(total_histogram.full_histogram, expected) + + sparse = total_histogram.to_sparse() + npt.assert_array_equal(sparse.indexes, [10, 11]) + npt.assert_array_equal(sparse.counts, [4, 146]) + npt.assert_array_equal(sparse.order, 0) def test_add_different_order(): """Test that we can NOT add histograms of different healpix orders.""" - partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0) - - partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1) + partial_histogram_left = SparseHistogram([11], [131], 0) + partial_histogram_right = SparseHistogram([10, 11], [4, 15], 1) + total_histogram = HistogramAggregator(0) + total_histogram.add(partial_histogram_left) with pytest.raises(ValueError, match="partials have incompatible sizes"): - partial_histogram_left.add(partial_histogram_right) + total_histogram.add(partial_histogram_right) def test_add_different_type(): """Test that we can NOT add histograms of different healpix orders.""" - partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0) + partial_histogram_left = SparseHistogram([11], [131], 0) + total_histogram = HistogramAggregator(0) + total_histogram.add(partial_histogram_left) with pytest.raises(ValueError, match="addends should be SparseHistogram"): - partial_histogram_left.add(5) + total_histogram.add(5) with pytest.raises(ValueError, match="addends should be SparseHistogram"): - partial_histogram_left.add([1, 2, 3, 4, 5]) - - -def test_init_bad_inputs(): - """Test that the SparseHistogram type requires a compressed sparse column - as its sole `sparse_array` argument.""" - with pytest.raises(ValueError, match="must be a scipy sparse array"): - SparseHistogram(5) - - with pytest.raises(ValueError, match="must be a Compressed Sparse Column"): - row_sparse_array = csr_array((1, 12), dtype=np.int64) - SparseHistogram(row_sparse_array) + total_histogram.add(([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], 0))