Skip to content

Commit

Permalink
Nick/fortran ordering (#373)
Browse files Browse the repository at this point in the history
* Internal: Try new link checker

* Internal: Add codespell and fix typos.

* Internal: See if codespell precommit finds config.

* Internal: Found config. Now enable reading it

* MATLAB: Add initial support for more matlab support.

* DEV: Updating contributing doc for more details on adding a tutorial

* Fix internal calls to avoid fortran warnings in tutorials

* Update one tutorial with "F" order to avoid warnings.

* Check doctests for Fortran ordering warnings

* Add tests to verify dense tensor ops return arrays with the correct memory layout:
* Only required fixing mttkrp and double

* Add a note about memory layout at top level high visibility areas

* Add small utility for memory layout management

* TTENSOR: Propagate order
* Assumes core ordering is correct

* KTENSOR: Propagate order

* TENMAT: Propagate order

* SPTENSOR: Propagate order
* Ensure fortran order for data returned but not index arrays

* SPTENMAT: Propagate order
* Doesn't return arrays so minimal changes needed?

* SUMTENSOR: Propagate order
* Mostly calls the ops from its parts

* Plumb through order printout

* Remove global warnings filter
* See if anything blows up

Closes #368
  • Loading branch information
ntjohnson1 authored Jan 6, 2025
1 parent 7e97fea commit a508707
Show file tree
Hide file tree
Showing 24 changed files with 870 additions and 406 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ CP_ALS:
Final f = 7.508253e-01
```

### Memory layout
For historical reasons we use Fortran memory layouts, where numpy by default uses C.
This is relevant for indexing. In the future we hope to extend support for both.
```python
>>> import numpy as np
>>> c_order = np.arange(8).reshape((2,2,2))
>>> f_order = np.arange(8).reshape((2,2,2), order="F")
>>> print(c_order[0,1,1])
3
>>> print(f_order[0,1,1])
6
```

<!-- markdown-link-check-disable -->
### Getting Help
- [Documentation](https://pyttb.readthedocs.io)
Expand Down
131 changes: 131 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# U.S. Government retains certain rights in this software.

import numpy
import numpy as np

# content of conftest.py
import pytest

import pyttb
import pyttb as ttb


@pytest.fixture(autouse=True)
Expand All @@ -17,6 +19,12 @@ def add_packages(doctest_namespace): # noqa: D103
doctest_namespace["ttb"] = pyttb


@pytest.fixture(params=[{"order": "F"}, {"order": "C"}])
def memory_layout(request):
"""Test C and F memory layouts."""
return request.param


def pytest_addoption(parser): # noqa: D103
parser.addoption(
"--packaging",
Expand All @@ -30,3 +38,126 @@ def pytest_addoption(parser): # noqa: D103
def pytest_configure(config): # noqa: D103
if not config.option.packaging:
config.option.markexpr = "not packaging"


@pytest.fixture()
def sample_tensor_2way(): # noqa: D103
data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
shape = (2, 3)
params = {"data": data, "shape": shape}
tensorInstance = ttb.tensor(data, shape)
return params, tensorInstance


@pytest.fixture()
def sample_tensor_3way(): # noqa: D103
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
shape = (2, 3, 2)
params = {"data": np.reshape(data, np.array(shape), order="F"), "shape": shape}
tensorInstance = ttb.tensor(data, shape)
return params, tensorInstance


@pytest.fixture()
def sample_ndarray_1way(): # noqa: D103
shape = (16,)
ndarrayInstance = np.reshape(np.arange(1, 17), shape, order="F")
params = {"data": ndarrayInstance, "shape": shape}
return params, ndarrayInstance


@pytest.fixture()
def sample_ndarray_2way(): # noqa: D103
shape = (4, 4)
ndarrayInstance = np.reshape(np.arange(1, 17), shape, order="F")
params = {"data": ndarrayInstance, "shape": shape}
return params, ndarrayInstance


@pytest.fixture()
def sample_ndarray_4way(): # noqa: D103
shape = (2, 2, 2, 2)
ndarrayInstance = np.reshape(np.arange(1, 17), shape, order="F")
params = {"data": ndarrayInstance, "shape": shape}
return params, ndarrayInstance


@pytest.fixture()
def sample_tenmat_4way(): # noqa: D103
shape = (4, 4)
data = np.reshape(np.arange(1, 17), shape, order="F")
tshape = (2, 2, 2, 2)
rdims = np.array([0, 1])
cdims = np.array([2, 3])
tenmatInstance = ttb.tenmat()
tenmatInstance.tshape = tshape
tenmatInstance.rindices = rdims.copy()
tenmatInstance.cindices = cdims.copy()
tenmatInstance.data = data.copy()
params = {
"data": data,
"rdims": rdims,
"cdims": cdims,
"tshape": tshape,
"shape": shape,
}
return params, tenmatInstance


@pytest.fixture()
def sample_tensor_4way(): # noqa: D103
data = np.arange(1, 17)
shape = (2, 2, 2, 2)
params = {"data": np.reshape(data, np.array(shape), order="F"), "shape": shape}
tensorInstance = ttb.tensor(data, shape)
return params, tensorInstance


@pytest.fixture()
def sample_ktensor_2way(): # noqa: D103
weights = np.array([1.0, 2.0])
fm0 = np.array([[1.0, 2.0], [3.0, 4.0]])
fm1 = np.array([[5.0, 6.0], [7.0, 8.0]])
factor_matrices = [fm0, fm1]
data = {"weights": weights, "factor_matrices": factor_matrices}
ktensorInstance = ttb.ktensor(factor_matrices, weights)
return data, ktensorInstance


@pytest.fixture()
def sample_ktensor_3way(): # noqa: D103
rank = 2
shape = (2, 3, 4)
vector = np.arange(1, rank * sum(shape) + 1).astype(float)
weights = 2 * np.ones(rank).astype(float)
vector_with_weights = np.concatenate((weights, vector), axis=0)
# vector_with_weights = vector_with_weights.reshape((len(vector_with_weights), 1))
# ground truth
fm0 = np.array([[1.0, 3.0], [2.0, 4.0]])
fm1 = np.array([[5.0, 8.0], [6.0, 9.0], [7.0, 10.0]])
fm2 = np.array([[11.0, 15.0], [12.0, 16.0], [13.0, 17.0], [14.0, 18.0]])
factor_matrices = [fm0, fm1, fm2]
data = {
"weights": weights,
"factor_matrices": factor_matrices,
"vector": vector,
"vector_with_weights": vector_with_weights,
"shape": shape,
}
ktensorInstance = ttb.ktensor(factor_matrices, weights)
return data, ktensorInstance


@pytest.fixture()
def sample_ktensor_symmetric(): # noqa: D103
weights = np.array([1.0, 1.0])
fm0 = np.array(
[[2.340431417384394, 4.951967353890655], [4.596069112758807, 8.012451489774961]]
)
fm1 = np.array(
[[2.340431417384394, 4.951967353890655], [4.596069112758807, 8.012451489774961]]
)
factor_matrices = [fm0, fm1]
data = {"weights": weights, "factor_matrices": factor_matrices}
ktensorInstance = ttb.ktensor(factor_matrices, weights)
return data, ktensorInstance
12 changes: 12 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ algorithms for computing low-rank tensor models.

Getting Started
===============
For historical reasons we use Fortran memory layouts, where numpy by default uses C.
This is relevant for indexing. In the future we hope to extend support for both.

.. code-block:: python
>>> import numpy as np
>>> c_order = np.arange(8).reshape((2,2,2))
>>> f_order = np.arange(8).reshape((2,2,2), order="F")
>>> print(c_order[0,1,1])
3
>>> print(f_order[0,1,1])
6
.. toctree::
:maxdepth: 1
Expand Down
8 changes: 5 additions & 3 deletions docs/source/tutorial/class_tensor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@
"outputs": [],
"source": [
"np.random.seed(0)\n",
"A = ttb.tensor(np.floor(3 * np.random.rand(2, 2, 3))) # Generate some data.\n",
"A = ttb.tensor(np.floor(3 * np.random.rand(2, 2, 3), order=\"F\")) # Generate some data.\n",
"A.tenfun(lambda x: x + 1) # Increment every element of A by one."
]
},
Expand All @@ -882,12 +882,14 @@
"outputs": [],
"source": [
"np.random.seed(0)\n",
"C = ttb.tensor(np.floor(5 * np.random.rand(2, 2, 3))) # Create another tensor.\n",
"C = ttb.tensor(\n",
" np.floor(5 * np.random.rand(2, 2, 3), order=\"F\")\n",
") # Create another tensor.\n",
"\n",
"\n",
"def elementwise_mean(X):\n",
" # finding mean for the columns\n",
" return np.floor(np.mean(X, axis=0))\n",
" return np.floor(np.mean(X, axis=0), order=\"F\")\n",
"\n",
"\n",
"A.tenfun(elementwise_mean, B, C) # Elementwise means for A, B, and C."
Expand Down
4 changes: 1 addition & 3 deletions pyttb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def ignore_warnings(ignore=True):
warnings.simplefilter("default")


ignore_warnings(True)

# Ruff inspection rules are too strict heres
# Ruff inspection rules are too strict here
__all__ = [ # noqa: PLE0604
cp_als.__name__,
cp_apr.__name__,
Expand Down
14 changes: 9 additions & 5 deletions pyttb/cp_apr.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,9 @@ def tt_cp_apr_pdnr( # noqa: PLR0912,PLR0913,PLR0915
if isinstance(input_tensor, ttb.tensor) and isSparse is False:
# Data is not a sparse tensor.
Pi = tt_calcpi_prowsubprob(input_tensor, M, rank, n, N, isSparse)
X_mat = input_tensor.to_tenmat(np.array([n]), copy=False).data
X_mat = input_tensor.to_tenmat(
np.array([n], order=input_tensor.order), copy=False
).data

num_rows = M.factor_matrices[n].shape[0]
isRowNOTconverged = np.zeros((num_rows,))
Expand Down Expand Up @@ -876,7 +878,9 @@ def tt_cp_apr_pqnr( # noqa: PLR0912,PLR0913,PLR0915
if not isinstance(input_tensor, ttb.sptensor) and not isSparse:
# Data is not a sparse tensor.
Pi = tt_calcpi_prowsubprob(input_tensor, M, rank, n, N, isSparse)
X_mat = input_tensor.to_tenmat(np.array([n]), copy=False).data
X_mat = input_tensor.to_tenmat(
np.array([n], order=input_tensor.order), copy=False
).data

num_rows = M.factor_matrices[n].shape[0]
isRowNOTconverged = np.zeros((num_rows,))
Expand Down Expand Up @@ -1772,7 +1776,7 @@ def calculate_phi( # noqa: PLR0913
)
Phi[:, r] = Yr
else:
Xn = Data.to_tenmat(np.array([factorIndex]), copy=False).data
Xn = Data.to_tenmat(np.array([factorIndex], order=Data.order), copy=False).data
V = Model.factor_matrices[factorIndex].dot(Pi.transpose())
W = Xn / np.maximum(V, epsilon)
Y = W.dot(Pi)
Expand Down Expand Up @@ -1817,8 +1821,8 @@ def tt_loglikelihood(
np.sum(Data.vals * np.log(np.sum(A, axis=1))[:, None])
- np.sum(Model.factor_matrices[0])
)
dX = Data.to_tenmat(np.array([1]), copy=False).data
dM = Model.to_tenmat(np.array([1]), copy=False).data
dX = Data.to_tenmat(np.array([1], order=Data.order), copy=False).data
dM = Model.to_tenmat(np.array([1], order=Model.order), copy=False).data
f = 0
for i in range(dX.shape[0]):
for j in range(dX.shape[1]):
Expand Down
42 changes: 33 additions & 9 deletions pyttb/ktensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
np_to_python,
parse_one_d,
parse_shape,
to_memory_order,
tt_dimscheck,
tt_ind2sub,
)
Expand Down Expand Up @@ -74,7 +75,7 @@ class ktensor:

__slots__ = ("weights", "factor_matrices")

def __init__(
def __init__( # noqa: PLR0912
self,
factor_matrices: Optional[Sequence[np.ndarray]] = None,
weights: Optional[np.ndarray] = None,
Expand Down Expand Up @@ -147,7 +148,7 @@ def __init__(

# Empty constructor
if factor_matrices is None and weights is None:
self.weights = np.array([])
self.weights = np.array([], order=self.order)
self.factor_matrices: List[np.ndarray] = []
return

Expand Down Expand Up @@ -183,17 +184,30 @@ def __init__(
)
# make copy or use reference
if copy:
self.weights = weights.copy()
self.weights = weights.copy(self.order)
else:
self.weights = weights
if not self._matches_order(weights):
logging.warning(
f"Selected no copy, but input weights aren't {self.order} "
"ordered so must copy."
)
self.weights = to_memory_order(weights, self.order)
else:
# create weights if not provided
self.weights = np.ones(num_components)
self.weights = np.ones(num_components, order=self.order)

# process factor_matrices
if copy:
self.factor_matrices = [fm.copy() for fm in factor_matrices]
self.factor_matrices = [fm.copy(order=self.order) for fm in factor_matrices]
else:
if not all(self._matches_order(factor) for factor in factor_matrices):
logging.warning(
"Selected no copy, but input factor matrices aren't "
f"{self.order} ordered so must copy."
)
factor_matrices = [
to_memory_order(fm, self.order, copy=True) for fm in factor_matrices
]
if not isinstance(factor_matrices, list):
logging.warning("Must provide factor matrices as list to avoid copy")
factor_matrices = list(factor_matrices)
Expand Down Expand Up @@ -419,6 +433,14 @@ def order(self) -> Literal["F"]:
"""Return the data layout of the underlying storage."""
return "F"

def _matches_order(self, array: np.ndarray) -> bool:
"""Check if provided array matches tensor memory layout."""
if array.flags["C_CONTIGUOUS"] and self.order == "C":
return True
if array.flags["F_CONTIGUOUS"] and self.order == "F":
return True
return False

def arrange(
self,
weight_factor: Optional[int] = None,
Expand Down Expand Up @@ -924,7 +946,9 @@ def min_split_dims(dims: Tuple[int, ...]):
data = (
ttb.khatrirao(*self.factor_matrices[:i_split], reverse=True) * self.weights
) @ ttb.khatrirao(*self.factor_matrices[i_split:], reverse=True).T
return ttb.tensor(data, self.shape, copy=False)
# Copy needed to ensure F order. Transpose above means both elements are
# different layout. If originally in C order can save on this copy.
return ttb.tensor(data, self.shape, copy=True)

def to_tenmat(
self,
Expand Down Expand Up @@ -1237,7 +1261,7 @@ def mttkrp(
W = W * (self.factor_matrices[i].T @ U[i])

# Find each column of answer by multiplying columns of X.u{n} with weights
return self.factor_matrices[n] @ W
return to_memory_order(self.factor_matrices[n] @ W, self.order)

@property
def ncomponents(self) -> int:
Expand Down Expand Up @@ -1678,7 +1702,7 @@ def score(
# Compute all possible vector-vector congruences.

# Compute every pair for each mode
Cbig = ttb.tensor.from_function(np.zeros, (RA, RB, N))
Cbig = ttb.tensor(np.zeros((RA, RB, N), order=self.order))
for n in range(N):
Cbig[:, :, n] = np.abs(A.factor_matrices[n].T @ B.factor_matrices[n])

Expand Down
Loading

0 comments on commit a508707

Please sign in to comment.