diff --git a/pixi.toml b/pixi.toml index 231146fb..eb861050 100644 --- a/pixi.toml +++ b/pixi.toml @@ -38,6 +38,7 @@ pre-commit = "*" pytest-codspeed = "*" [feature.notebooks.dependencies] +ipython = "*" nbmake = "*" matplotlib = "*" @@ -60,7 +61,8 @@ scipy = ">=0.19" mlir-python-bindings = "19.*" [environments] +dev = ["tests", "extras", "notebooks"] tests = ["tests", "extras"] docs = ["docs", "extras"] -mlir-dev = ["tests", "mlir"] -finch-dev = ["tests", "finch"] +mlir-dev = ["mlir", "tests", "notebooks"] +finch-dev = ["finch", "tests", "notebooks"] diff --git a/sparse/numba_backend/__init__.py b/sparse/numba_backend/__init__.py index 4658109b..742e1889 100644 --- a/sparse/numba_backend/__init__.py +++ b/sparse/numba_backend/__init__.py @@ -157,7 +157,7 @@ where, ) from ._dok import DOK -from ._io import load_npz, save_npz +from ._io import from_binsparse, load_npz, save_npz from ._umath import elemwise from ._utils import random @@ -226,6 +226,7 @@ "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater", diff --git a/sparse/numba_backend/_common.py b/sparse/numba_backend/_common.py index d3440426..13e6f9bb 100644 --- a/sparse/numba_backend/_common.py +++ b/sparse/numba_backend/_common.py @@ -35,7 +35,7 @@ def _check_device(func): def wrapped(*args, **kwargs): device = kwargs.get("device", None) if device not in {"cpu", None}: - raise ValueError("Device must be `'cpu'` or `None`.") + raise BufferError("Device must be `'cpu'` or `None`.") return func(*args, **kwargs) return wrapped diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 85bce095..528c6aa2 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -11,7 +11,6 @@ from .._coo.core import COO from .._sparse_array import SparseArray from .._utils import ( - _zero_of_dtype, can_store, check_compressed_axes, check_fill_value, @@ -175,13 +174,9 @@ def __init__( if self.data.ndim != 1: raise ValueError("data must be a scalar or 1-dimensional.") - self.shape = shape - - if fill_value is None: - fill_value = _zero_of_dtype(self.data.dtype) + SparseArray.__init__(self, shape=shape, fill_value=fill_value) self._compressed_axes = tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None - self.fill_value = self.data.dtype.type(fill_value) if prune: self._prune() @@ -259,32 +254,6 @@ def nnz(self): """ return self.data.shape[0] - @property - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "gcxs" - @property def nbytes(self): """ @@ -443,7 +412,7 @@ def tocoo(self): fill_value=self.fill_value, ) uncompressed = uncompress_dimension(self.indptr) - coords = np.vstack((uncompressed, self.indices)) + coords = np.stack((uncompressed, self.indices)) order = np.argsort(self._axis_order) return ( COO( @@ -844,6 +813,12 @@ def isinf(self): def isnan(self): return self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) + # be expressed in the `binsparse` 0.1 language. + # We are missing index maps. + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + return super().__binsparse__() + class _Compressed2d(GCXS): class_compressed_axes: tuple[int] @@ -883,6 +858,29 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" + descriptor = { + "binsparse": { + "version": "0.1", + "format": self.format.upper(), + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": str(self.indices.dtype), + "indices_1": str(self.indptr.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + return descriptor, [self.indptr, self.indices, self.data] + class CSR(_Compressed2d): """ diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 2b4b5a82..71394a41 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -600,29 +600,6 @@ def nnz(self): """ return self.coords.shape[1] - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - -------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "coo" - @property def nbytes(self): """ @@ -1537,6 +1514,41 @@ def isnan(self): prune=True, ) + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" + descriptor = { + "binsparse": { + "version": "0.1", + "format": { + "custom": { + "level": { + "level_desc": "sparse", + "rank": self.ndim, + "level": { + "level_desc": "element", + }, + } + } + } + if self.ndim != 2 + else "COOR", + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": "uint64", + "indices_1": str(self.coords.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + return descriptor, [np.array([0, self.nnz], dtype=np.uint64), self.coords, self.data] + def as_coo(x, shape=None, fill_value=None, idx_dtype=None): """ diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 9c4e601d..5861f82e 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -271,29 +271,6 @@ def nnz(self): """ return len(self.data) - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "dok" - @property def nbytes(self): """ @@ -548,6 +525,9 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + raise RuntimeError("`DOK` doesn't support the `__binsparse__` protocol.") + def to_slice(k): """Convert integer indices to one-element slices for consistency""" diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index 24d9f1db..a5067d9b 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -1,7 +1,9 @@ import numpy as np -from ._compressed import GCXS +from ._common import _check_device +from ._compressed import CSC, CSR, GCXS from ._coo.core import COO +from ._sparse_array import SparseArray def save_npz(filename, matrix, compressed=True): @@ -130,3 +132,171 @@ def load_npz(filename): ) except KeyError as e: raise RuntimeError(f"The file {filename!s} does not contain a valid sparse matrix") from e + + +@_check_device +def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseArray: + desc, arrs = arr.__binsparse__() + + desc = desc["binsparse"] + version_tuple: tuple[int, ...] = tuple(int(v) for v in desc["version"].split(".")) + if version_tuple != (0, 1): + raise RuntimeError("Unsupported `__binsparse__` protocol version.") + + format = desc["format"] + format_err_str = f"Unsupported format: `{format!r}`." + + if isinstance(format, str): + match format: + case "COO" | "COOR": + desc["format"] = { + "custom": { + "transpose": [0, 1], + "level": { + "level_desc": "sparse", + "rank": 2, + "level": { + "level_desc": "element", + }, + }, + } + } + case "CSC" | "CSR": + desc["format"] = { + "custom": { + "transpose": [0, 1] if format == "CSR" else [0, 1], + "level": { + "level_desc": "dense", + "level": { + "level_desc": "sparse", + "level": { + "level_desc": "element", + }, + }, + }, + }, + } + case _: + raise RuntimeError(format_err_str) + + format = desc["format"]["custom"] + rank = 0 + level = format + while "level" in level: + if "rank" not in level: + level["rank"] = 1 + rank += level["rank"] + level = level["level"] + if "transpose" not in format: + format["transpose"] = list(range(rank)) + + match desc: + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "sparse", + "rank": ndim, + "level": { + "level_desc": "element", + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": _, + "indices_1": coords_dtype, + "values": value_dtype, + }, + **_kwargs, + }: + if transpose != list(range(ndim)): + raise RuntimeError(format_err_str) + + ptr_arr: np.ndarray = np.from_dlpack(arrs[0]) + start, end = ptr_arr + if copy is False and not (start == 0 or end == nnz): + raise RuntimeError(format_err_str) + + coord_arr: np.ndarray = np.from_dlpack(arrs[1]) + value_arr: np.ndarray = np.from_dlpack(arrs[2]) + + _check_binsparse_dt(coord_arr, coords_dtype) + _check_binsparse_dt(value_arr, value_dtype) + + return COO( + coord_arr[:, start:end], + value_arr, + shape=shape, + has_duplicates=False, + sorted=True, + prune=False, + idx_dtype=coord_arr.dtype, + ) + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "dense", + "rank": 1, + "level": { + "level_desc": "sparse", + "rank": 1, + "level": { + "level_desc": "element", + }, + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": ptr_dtype, + "indices_1": crd_dtype, + "values": val_dtype, + }, + **_kwargs, + }: + crd_arr = np.from_dlpack(arrs[0]) + _check_binsparse_dt(crd_arr, crd_dtype) + ptr_arr = np.from_dlpack(arrs[1]) + _check_binsparse_dt(ptr_arr, ptr_dtype) + val_arr = np.from_dlpack(arrs[2]) + _check_binsparse_dt(val_arr, val_dtype) + + match transpose: + case [0, 1]: + sparse_type = CSR + case [1, 0]: + sparse_type = CSC + case _: + raise RuntimeError(format_err_str) + + return sparse_type((val_arr, ptr_arr, crd_arr), shape=shape) + case _: + raise RuntimeError(format_err_str) + + +def _convert_binsparse_dtype(dt: str) -> np.dtype: + if dt.startswith("complex[float") and dt.endswith("]"): + complex_bits = 2 * int(dt[len("complex[float") : -len("]")]) + dt: str = f"complex{complex_bits}" + + return np.dtype(dt) + + +def _check_binsparse_dt(arr: np.ndarray, dt: str) -> None: + invalid_dtype_str = "Invalid dtype: `{dtype!s}`, expected `{expected!s}`." + dt = _convert_binsparse_dtype(dt) + if dt != arr.dtype: + raise BufferError( + invalid_dtype_str.format( + dtype=arr.dtype, + expected=dt, + ) + ) diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 7f47c6eb..a469dfb4 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -145,6 +145,34 @@ def size(self): # returns a float64 for an empty shape. return reduce(operator.mul, self.shape, 1) + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + [`scipy.sparse.coo_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.csr_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5, 5), density=0.2, format="dok") + >>> s.format + 'dok' + >>> t = sparse.random((5, 5), density=0.2, format="coo") + >>> t.format + 'coo' + """ + return type(self).__name__.lower() + @property def density(self): """ @@ -218,6 +246,22 @@ def _str_impl(self, summary): except (ImportError, ValueError): return summary + @abstractmethod + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + """Return a 2-tuple: + * First element is a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + of this array. + * Second element is a `list[np.ndarray]` of the constituent arrays. + + Returns + ------- + dict + Parsed `binsparse` descriptor. + list[np.ndarray] + The constituent arrays + """ + raise NotImplementedError + @abstractmethod def asformat(self, format): """ diff --git a/sparse/numba_backend/tests/test_coo.py b/sparse/numba_backend/tests/test_coo.py index c929fff3..ca0156cf 100644 --- a/sparse/numba_backend/tests/test_coo.py +++ b/sparse/numba_backend/tests/test_coo.py @@ -1883,7 +1883,7 @@ def test_invalid_device(func, args, kwargs): like = sparse.random((5, 5), density=0.5) args = (like,) + args - with pytest.raises(ValueError, match="Device must be"): + with pytest.raises(BufferError, match="Device must be"): func(*args, device="invalid_device", **kwargs) diff --git a/sparse/numba_backend/tests/test_io.py b/sparse/numba_backend/tests/test_io.py index 060b9263..c6caa25f 100644 --- a/sparse/numba_backend/tests/test_io.py +++ b/sparse/numba_backend/tests/test_io.py @@ -28,3 +28,13 @@ def test_load_wrong_format_exception(tmp_path): np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename) + + +@pytest.mark.parametrize( + "format", ["coo", "csr", pytest.param("csc", marks=pytest.mark.xfail(reason="`CSC<>COO` round-trip broken"))] +) +def test_round_trip_binsparse(format: str) -> None: + x = sparse.random((20, 30), density=0.25, format=format) + y = sparse.from_binsparse(x) + + assert_eq(x, y) diff --git a/sparse/numba_backend/tests/test_namespace.py b/sparse/numba_backend/tests/test_namespace.py index 39556f99..ca1a4277 100644 --- a/sparse/numba_backend/tests/test_namespace.py +++ b/sparse/numba_backend/tests/test_namespace.py @@ -67,6 +67,7 @@ def test_namespace(): "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater",