Skip to content

Commit

Permalink
Use a naive sparse histogram. (#446)
Browse files Browse the repository at this point in the history
* Use a naive sparse histogram.

* Remove make_from_counts
  • Loading branch information
delucchi-cmu authored Jan 6, 2025
1 parent e07916b commit c01018f
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 84 deletions.
104 changes: 46 additions & 58 deletions src/hats/pixel_math/sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -1,101 +1,89 @@
"""Sparse 1-D histogram of healpix pixel counts."""

import numpy as np
from scipy.sparse import csc_array, load_npz, save_npz, sparray

import hats.pixel_math.healpix_shim as hp


class SparseHistogram:
"""Wrapper around scipy's sparse array."""
"""Wrapper around a naive sparse array, that is just non-zero indexes and counts.
def __init__(self, sparse_array):
if not isinstance(sparse_array, sparray):
raise ValueError("The sparse array must be a scipy sparse array.")
if sparse_array.format != "csc":
raise ValueError("The sparse array must be a Compressed Sparse Column array.")
self.sparse_array = sparse_array
e.g. for a dense 1-d numpy histogram of order 0, you might see::
def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
[0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0]
Args:
other (SparseHistogram): the wrapper containing the addend
"""
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.sparse_array.shape != other.sparse_array.shape:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
self.sparse_array += other.sparse_array
There are only elements at [1, 8], and they have respective values [4, 9]. You
would create the sparse histogram like::
SparseHistogram([1, 8], [4, 9], 0)
"""

def __init__(self, indexes, counts, order):
if len(indexes) != len(counts):
raise ValueError("indexes and counts must be same length")
self.indexes = indexes
self.counts = counts
self.order = order

def to_array(self):
"""Convert the sparse array to a dense numpy array.
Returns:
dense 1-d numpy array.
"""
return self.sparse_array.toarray()[0]
dense = np.zeros(hp.order2npix(self.order), dtype=np.int64)
dense[self.indexes] = self.counts
return dense

def to_file(self, file_name):
"""Persist the sparse array to disk.
NB: this saves as a sparse array, and so will likely have lower space requirements
than saving the corresponding dense 1-d numpy array.
"""
save_npz(file_name, self.sparse_array)
np.savez(file_name, indexes=self.indexes, counts=self.counts, order=self.order)

def to_dense_file(self, file_name):
"""Persist the DENSE array to disk as a numpy array."""
with open(file_name, "wb+") as file_handle:
file_handle.write(self.to_array().data)

@classmethod
def make_empty(cls, healpix_order=10):
"""Create an empty sparse array for a given healpix order.
Args:
healpix_order (int): healpix order
def from_file(cls, file_name):
"""Read sparse histogram from a file.
Returns:
new sparse histogram
"""
histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64)
return cls(histo)

@classmethod
def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
"""Create an sparse array for a given healpix order, prefilled with counts at
the provided indexes.
npzfile = np.load(file_name)
return cls(npzfile["indexes"], npzfile["counts"], npzfile["order"])

e.g. for a dense 1-d numpy histogram of order 0, you might see::

[0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0]
class HistogramAggregator:
"""Utility for aggregating sparse histograms."""

There are only elements at [1, 8], and they have respective values [4, 9]. You
would create the sparse histogram like::
def __init__(self, order):
self.order = order
self.full_histogram = np.zeros(hp.order2npix(order), dtype=np.int64)

make_from_counts([1, 8], [4, 9], 0)
def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
Args:
indexes (int[]): index locations of non-zero values
counts_at_indexes (int[]): values at the ``indexes``
healpix_order (int): healpix order
Returns:
new sparse histogram
"""
row = np.array(np.zeros(len(indexes), dtype=np.int64))
histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order)))
return cls(histo)

@classmethod
def from_file(cls, file_name):
"""Read sparse histogram from a file.
Returns:
new sparse histogram
other (SparseHistogram): the wrapper containing the addend
"""
histo = load_npz(file_name)
return cls(histo)
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.order != other.order:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
if len(other.indexes) == 0:
return
self.full_histogram[other.indexes] += other.counts

def to_sparse(self):
"""Return a SparseHistogram, based on non-zero values in this aggregation."""
indexes = self.full_histogram.nonzero()[0]
counts = self.full_histogram[indexes]
return SparseHistogram(indexes, counts, self.order)
50 changes: 24 additions & 26 deletions tests/hats/pixel_math/test_sparse_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,21 @@
import numpy.testing as npt
import pytest
from numpy import frombuffer
from scipy.sparse import csr_array

import hats.pixel_math.healpix_shim as hp
from hats.pixel_math.sparse_histogram import SparseHistogram
from hats.pixel_math.sparse_histogram import HistogramAggregator, SparseHistogram


def test_make_empty():
"""Tests the initialization of an empty histogram at the specified order"""
histogram = SparseHistogram.make_empty(5)
histogram = SparseHistogram([], [], 5)
expected_hist = np.zeros(hp.order2npix(5))
npt.assert_array_equal(expected_hist, histogram.to_array())


def test_read_write_round_trip(tmp_path):
"""Test that we can read what we write into a histogram file."""
histogram = SparseHistogram.make_from_counts([11], [131], 0)
histogram = SparseHistogram([11], [131], 0)

# Write as a sparse array
file_name = tmp_path / "round_trip_sparse.npz"
Expand All @@ -38,43 +37,42 @@ def test_read_write_round_trip(tmp_path):
def test_add_same_order():
"""Test that we can add two histograms created from the same order, and get
the expected results."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
partial_histogram_left = SparseHistogram([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0)
partial_histogram_right = SparseHistogram([10, 11], [4, 15], 0)

partial_histogram_left.add(partial_histogram_right)
total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
total_histogram.add(partial_histogram_right)

expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146]
npt.assert_array_equal(partial_histogram_left.to_array(), expected)
npt.assert_array_equal(total_histogram.full_histogram, expected)

sparse = total_histogram.to_sparse()
npt.assert_array_equal(sparse.indexes, [10, 11])
npt.assert_array_equal(sparse.counts, [4, 146])
npt.assert_array_equal(sparse.order, 0)


def test_add_different_order():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1)
partial_histogram_left = SparseHistogram([11], [131], 0)
partial_histogram_right = SparseHistogram([10, 11], [4, 15], 1)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="partials have incompatible sizes"):
partial_histogram_left.add(partial_histogram_right)
total_histogram.add(partial_histogram_right)


def test_add_different_type():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
partial_histogram_left = SparseHistogram([11], [131], 0)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add(5)
total_histogram.add(5)

with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add([1, 2, 3, 4, 5])


def test_init_bad_inputs():
"""Test that the SparseHistogram type requires a compressed sparse column
as its sole `sparse_array` argument."""
with pytest.raises(ValueError, match="must be a scipy sparse array"):
SparseHistogram(5)

with pytest.raises(ValueError, match="must be a Compressed Sparse Column"):
row_sparse_array = csr_array((1, 12), dtype=np.int64)
SparseHistogram(row_sparse_array)
total_histogram.add(([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], 0))

0 comments on commit c01018f

Please sign in to comment.