Skip to content

Commit

Permalink
Merge pull request #45 from willow-ahrens/asarray-copy
Browse files Browse the repository at this point in the history
`copy` keyword for `asarray`, `Tensor`, and `astype`
  • Loading branch information
mtsokol authored Jun 6, 2024
2 parents 0971088 + 282b8ae commit e0b37d6
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 33 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "finch-tensor"
version = "0.1.25"
version = "0.1.26"
description = ""
authors = ["Willow Ahrens <[email protected]>"]
readme = "README.md"
Expand Down
88 changes: 63 additions & 25 deletions src/finch/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class Tensor(_Display, SparseArray):
order. Default: row-major.
fill_value : np.number, optional
Only used when `numpy.ndarray` or `scipy.sparse` is passed.
copy : bool, optional
If ``True``, then the object is copied. If ``None`` then the object is copied only if needed.
For ``False`` it raises a ``ValueError`` if a copy cannot be avoided. Default: ``None``.
Returns
-------
Expand Down Expand Up @@ -86,26 +89,35 @@ def __init__(
/,
*,
fill_value: np.number | None = None,
copy: bool | None = None,
):
if isinstance(obj, (int, float, complex, bool, list)):
obj = np.array(obj)
if copy is False:
raise ValueError("copy=False isn't supported for scalar inputs and Python lists")
obj = np.asarray(obj)
if fill_value is None:
fill_value = 0.0

if _is_scipy_sparse_obj(obj): # scipy constructor
jl_data = self._from_scipy_sparse(obj, fill_value=fill_value)
jl_data = self._from_scipy_sparse(obj, fill_value=fill_value, copy=copy)
self._obj = jl_data
elif isinstance(obj, np.ndarray): # numpy constructor
jl_data = self._from_numpy(obj, fill_value=fill_value)
jl_data = self._from_numpy(obj, fill_value=fill_value, copy=copy)
self._obj = jl_data
elif isinstance(obj, Storage): # from-storage constructor
if copy:
self._raise_julia_copy_not_supported()
order = self.preprocess_order(
obj.order, self.get_lvl_ndim(obj.levels_descr._obj)
)
self._obj = jl.swizzle(jl.Tensor(obj.levels_descr._obj), *order)
elif jl.isa(obj, jl.Finch.Tensor): # raw-Julia-object constructors
if copy:
self._raise_julia_copy_not_supported()
self._obj = jl.swizzle(obj, *tuple(range(1, jl.ndims(obj) + 1)))
elif jl.isa(obj, jl.Finch.SwizzleArray) or jl.isa(obj, jl.Finch.LazyTensor):
if copy:
self._raise_julia_copy_not_supported()
self._obj = obj
elif isinstance(obj, Tensor):
self._obj = obj._obj
Expand Down Expand Up @@ -283,6 +295,16 @@ def _is_dense(self) -> bool:
def _order(self) -> tuple[int, ...]:
return jl.typeof(self._obj).parameters[1]

@property
def device(self) -> str:
return "cpu"

def to_device(self, device: Device, /, *, stream: int | Any | None = None) -> "Tensor":
if device != "cpu":
raise ValueError("Only `device='cpu'` is supported.")

return self

def is_computed(self) -> bool:
return not jl.isa(self._obj, jl.Finch.LazyTensor)

Expand Down Expand Up @@ -368,7 +390,9 @@ def _from_other_tensor(cls, tensor: "Tensor", storage: Storage | None) -> JuliaO
)

@classmethod
def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
def _from_numpy(cls, arr: np.ndarray, fill_value: np.number, copy: bool | None = None) -> JuliaObj:
if copy:
arr = arr.copy()
order_char = "F" if np.isfortran(arr) else "C"
order = cls.preprocess_order(order_char, arr.ndim)
inv_order = tuple(i - 1 for i in jl.invperm(order))
Expand All @@ -385,21 +409,31 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
return jl.swizzle(jl.Tensor(lvl._obj), *order)

@classmethod
def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor":
def from_scipy_sparse(
cls,
x,
fill_value: np.number | None = None,
copy: bool | None = None,
) -> "Tensor":
if not _is_scipy_sparse_obj(x):
raise ValueError("{x} is not a SciPy sparse object.")
return Tensor(x, fill_value=fill_value)
return Tensor(x, fill_value=fill_value, copy=copy)

@classmethod
def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj:
def _from_scipy_sparse(
cls,
x,
*,
fill_value: np.number | None = None,
copy: bool | None = None,
) -> JuliaObj:
if copy is False and not (x.format in ("coo", "csr", "csc") and x.has_canonical_format):
raise ValueError("Unable to avoid copy while creating an array as requested.")
if x.format not in ("coo", "csr", "csc"):
x = x.asformat("coo")
if not x.has_canonical_format:
warnings.warn(
"SciPy sparse input must be in a canonical format. "
"Calling `sum_duplicates`."
)
if copy:
x = x.copy()
if not x.has_canonical_format:
x.sum_duplicates()
assert x.has_canonical_format

Expand Down Expand Up @@ -581,15 +615,9 @@ def to_scipy_sparse(self, accept_fv=None):
else:
raise ValueError("Tensor can't be converted to scipy.sparse object.")

@property
def device(self) -> str:
return "cpu"

def to_device(self, device: Device, /, *, stream: int | Any | None = None) -> "Tensor":
if device != "cpu":
raise ValueError("Only `device='cpu'` is supported.")

return self
@staticmethod
def _raise_julia_copy_not_supported() -> None:
raise ValueError("copy=True isn't supported for Julia object inputs")

def __array_namespace__(self, *, api_version: str | None = None) -> Any:
if api_version is None:
Expand All @@ -615,13 +643,23 @@ def random(shape, density=0.01, random_state=None):
return Tensor(jl.fsprand(*args))


def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None):
def asarray(
obj,
/,
*,
dtype: DType | None = None,
format: str | None = None,
fill_value: np.number | None = None,
device: Device | None = None,
copy: bool | None = None,
) -> Tensor:
if format not in {"coo", "csr", "csc", "csf", "dense", None}:
raise ValueError(f"{format} format not supported.")
_validate_device(device)
tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value)

tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value, copy=copy)
if format is not None:
if copy is False:
raise ValueError("Unable to avoid copy while creating an array as requested.")
order = tensor.get_order()
if format == "coo":
storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order)
Expand All @@ -643,7 +681,7 @@ def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None):
tensor = tensor.to_storage(storage)

if dtype is not None:
return astype(tensor, dtype)
return astype(tensor, dtype, copy=copy)
else:
return tensor

Expand Down
7 changes: 4 additions & 3 deletions tests/test_scipy_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,10 @@ def test_from_scipy_sparse(format_with_pattern, fill_value):
def test_non_canonical_format(format):
sp_arr = sp.random(3, 4, density=0.5, format=format)

with pytest.warns(
UserWarning, match="SciPy sparse input must be in a canonical format."
with pytest.raises(
ValueError, match="Unable to avoid copy while creating an array"
):
finch_arr = finch.asarray(sp_arr)
finch.asarray(sp_arr, copy=False)

finch_arr = finch.asarray(sp_arr)
assert_equal(finch_arr.todense(), sp_arr.toarray())
11 changes: 7 additions & 4 deletions tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,17 @@ def test_wrappers(dtype, jl_dtype, order):

@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
@pytest.mark.parametrize("order", ["C", "F", None])
def test_no_copy_fully_dense(dtype, order, arr3d):
@pytest.mark.parametrize("copy", [True, False, None])
def test_copy_fully_dense(dtype, order, copy, arr3d):
arr = np.array(arr3d, dtype=dtype, order=order)
arr_finch = finch.Tensor(arr)
arr_finch = finch.Tensor(arr, copy=copy)
arr_todense = arr_finch.todense()

assert_equal(arr_todense, arr)
assert np.shares_memory(arr_todense, arr)

if copy:
assert not np.shares_memory(arr_todense, arr)
else:
assert np.shares_memory(arr_todense, arr)

def test_coo(rng):
coords = (
Expand Down

0 comments on commit e0b37d6

Please sign in to comment.