Skip to content

Commit

Permalink
Make matrix mod 2 conversion to numpy faster
Browse files Browse the repository at this point in the history
  • Loading branch information
user202729 committed Jan 6, 2025
1 parent 1be0a58 commit b1d65a8
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 28 deletions.
10 changes: 5 additions & 5 deletions src/bin/sage-fixdoctests
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import re
import shlex
import subprocess
import sys
import textwrap

from argparse import ArgumentParser
from collections import defaultdict
Expand Down Expand Up @@ -303,7 +304,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
got = re.sub(r'(doctest:warning).*^( *DeprecationWarning:)',
r'\1...\n\2',
got, 1, re.DOTALL | re.MULTILINE)
got = got.splitlines() # got can't be the empty string
got = textwrap.dedent(got).splitlines() # got can't be the empty string

if args.only_tags:
if args.verbose:
Expand All @@ -329,7 +330,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
if not expected:
indent = (len(src_in_lines[first_line_num - 1]) - len(src_in_lines[first_line_num - 1].lstrip()))
append_to_line(line_num - 1,
'\n' + '\n'.join('%s%s' % (' ' * indent, line.lstrip()) for line in got),
'\n' + '\n'.join(' ' * indent + line for line in got),
message="Adding the new output")
return

Expand All @@ -338,8 +339,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
# has the smallest indentation after lstrip(). Note that the amount of indentation
# required could be negative if the ``got`` block is indented. In this case
# ``indent`` is set to zero.
indent = max(0, (len(src_in_lines[line_num]) - len(src_in_lines[line_num].lstrip())
- min(len(got[j]) - len(got[j].lstrip()) for j in range(len(got)))))
indent = len(src_in_lines[line_num]) - len(src_in_lines[line_num].lstrip())

# Double check that what was expected was indeed in the source file and if
# it is not then then print a warning for the user which contains the
Expand All @@ -358,7 +358,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
update_line(line_num, None,
message="Expected something, got nothing; deleting the old output")
else:
update_line(line_num, '\n'.join((' ' * indent + got[i]) for i in range(len(got))),
update_line(line_num, '\n'.join(' ' * indent + line for line in got),
message="Replacing the old expected output with the new output")

# Mark any remaining `expected` lines as ``None`` so as to preserve the line numbering
Expand Down
82 changes: 68 additions & 14 deletions src/sage/matrix/matrix1.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ import sage.modules.free_module
from sage.structure.coerce cimport coercion_model


_MISSING = object()


cdef class Matrix(Matrix0):
###################################################
# Coercion to Various Systems
Expand Down Expand Up @@ -670,7 +673,7 @@ cdef class Matrix(Matrix0):
entries = [[sib(v, 2) for v in row] for row in self.rows()]
return sib.name('matrix')(self.base_ring(), entries)

def numpy(self, dtype=None, copy=True):
def numpy(self, dtype=None, copy=_MISSING):
"""
Return the Numpy matrix associated to this matrix.
Expand All @@ -680,10 +683,6 @@ cdef class Matrix(Matrix0):
then the type will be determined as the minimum type required
to hold the objects in the sequence.
- ``copy`` -- if `self` is already an `ndarray`, then this flag
determines whether the data is copied (the default), or whether
a view is constructed.
EXAMPLES::
sage: # needs numpy
Expand All @@ -708,14 +707,14 @@ cdef class Matrix(Matrix0):
Type ``numpy.typecodes`` for a list of the possible
typecodes::
sage: import numpy # needs numpy
sage: numpy.typecodes.items() # needs numpy # random
sage: import numpy # needs numpy
sage: numpy.typecodes.items() # needs numpy # random
[('All', '?bhilqpBHILQPefdgFDGSUVOMm'), ('AllFloat', 'efdgFDG'),
...
For instance, you can see possibilities for real floating point numbers::
sage: numpy.typecodes['Float'] # needs numpy
sage: numpy.typecodes['Float'] # needs numpy
'efdg'
Alternatively, numpy automatically calls this function (via
Expand All @@ -733,15 +732,70 @@ cdef class Matrix(Matrix0):
dtype('int64') # 64-bit
sage: b.shape
(3, 4)
TESTS::
sage: # needs numpy
sage: matrix(3, range(12)).numpy(copy=False)
doctest:warning...
DeprecationWarning: passing copy argument to numpy() is deprecated
See https://github.com/sagemath/sage/issues/39152 for details.
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
"""
if copy is not _MISSING:
from sage.misc.superseded import deprecation
deprecation(39152, "passing copy argument to numpy() is deprecated")
import numpy
A = numpy.matrix(self.list(), dtype=dtype, copy=copy)
return numpy.resize(A,(self.nrows(), self.ncols()))
return numpy.asarray(self.list(), dtype=dtype).reshape(self.nrows(), self.ncols())

def __array__(self, dtype=None, copy=None):
"""
Define the magic ``__array__`` function so that ``numpy.array(m)`` can convert
a matrix ``m`` to a numpy array. See
`Interoperability with NumPy <https://numpy.org/doc/1.26/user/basics.interoperability.html>`_.
# Define the magic "__array__" function so that numpy.array(m) can convert
# a matrix m to a numpy array.
# See http://docs.scipy.org/doc/numpy/user/c-info.how-to-extend.html#converting-an-arbitrary-sequence-object
__array__=numpy
Note that subclasses should override :meth:`numpy`, but usually not this method.
INPUT:
- ``dtype`` -- the desired data-type for the array. If not given,
then the type will be determined automatically.
- ``copy`` -- required for numpy 2.0 compatibility.
See <https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword>`_.
Note that ``copy=False`` is not supported.
TESTS::
sage: # needs numpy
sage: import numpy as np
sage: a = matrix(3, range(12))
sage: if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
....: try:
....: np.array(a, copy=False) # in numpy 2.0, this raises an error
....: except ValueError:
....: pass
....: else:
....: assert False
....: else:
....: b = np.array(a, copy=False) # in numpy 1.26, this means "avoid copy if possible"
....: # https://numpy.org/doc/1.26/reference/generated/numpy.array.html#numpy.array
....: # but no-copy is not supported so it will copy anyway
....: a[0,0] = 1
....: assert b[0,0] == 0
....: b = np.asarray(a)
....: a[0,0] = 2
....: assert b[0,0] == 1
"""
import numpy as np
if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
if copy is False:
raise ValueError("Sage matrix cannot be converted to numpy array without copying")
else:
assert copy is None # numpy versions before 2.0 should not pass copy argument
return self.numpy(dtype)

###################################################
# Construction functions
Expand Down
28 changes: 28 additions & 0 deletions src/sage/matrix/matrix_mod2_dense.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ from cysignals.signals cimport sig_on, sig_str, sig_off
cimport sage.matrix.matrix_dense as matrix_dense
from sage.matrix.args cimport SparseEntry, MatrixArgs_init, MA_ENTRIES_NDARRAY
from libc.stdio cimport *
from libc.stdint cimport uintptr_t
from sage.structure.element cimport (Matrix, Vector)
from sage.modules.free_module_element cimport FreeModuleElement
from sage.libs.gmp.random cimport *
Expand Down Expand Up @@ -581,6 +582,33 @@ cdef class Matrix_mod2_dense(matrix_dense.Matrix_dense): # dense or sparse
return list(C)
return C

def numpy(self, dtype=None):
"""
Return the Numpy matrix associated to this matrix.
See :meth:`.matrix1.Matrix.numpy`.
EXAMPLES::
sage: # needs numpy
sage: import numpy as np
sage: np.array(matrix.identity(GF(2), 5))
array([[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 1]], dtype=uint8)
TESTS:
Make sure it's reasonably fast (the temporary numpy array is immediately
destroyed otherwise it consumes 400MB memory)::
sage: np.sum(np.array(matrix.identity(GF(2), 2*10^4))) # around 2s walltime # needs numpy
20000
"""
from ..modules.numpy_util import mzd_matrix_to_numpy
return mzd_matrix_to_numpy(<uintptr_t>self._entries, dtype)

########################################################################
# LEVEL 2 functionality
# * def _pickle
Expand Down
74 changes: 65 additions & 9 deletions src/sage/matrix/matrix_numpy_dense.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -366,15 +366,12 @@ cdef class Matrix_numpy_dense(Matrix_dense):

def numpy(self, dtype=None):
"""
Return a copy of the matrix as a numpy array.
It uses the numpy C/api so is very fast.
Return the Numpy matrix associated to this matrix.
INPUT:
- ``dtype`` -- the desired data-type for the array. If not given,
then the type will be determined as the minimum type required
to hold the objects in the sequence.
then the type will be determined automatically.
EXAMPLES::
Expand Down Expand Up @@ -424,12 +421,71 @@ cdef class Matrix_numpy_dense(Matrix_dense):
[]
sage: m.numpy()
array([], shape=(5, 0), dtype=float64)
Test that a copy is always made::
sage: import numpy as np
sage: m = matrix(RDF,2); m
[0.0 0.0]
[0.0 0.0]
sage: m[0,0]=1
sage: n=m.numpy() # should copy
sage: m[0,0]=2
sage: n
array([[1., 0.],
[0., 0.]])
sage: n=numpy.array(m) # should copy
sage: m[0,0]=3
sage: n
array([[2., 0.],
[0., 0.]])
sage: n=numpy.asarray(m) # should still copy
sage: m[0,0]=4
sage: n
array([[3., 0.],
[0., 0.]])
sage: n=numpy.asarray(m, dtype=numpy.int64) # should copy
sage: m[0,0]=5
sage: n
array([[4, 0],
[0, 0]])
::
sage: import numpy as np
sage: a = matrix(RDF, 3, range(12))
sage: if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
....: try:
....: np.array(a, copy=False) # in numpy 2.0, this raises an error
....: except ValueError:
....: pass
....: else:
....: assert False
....: else:
....: b = np.array(a, copy=False) # in numpy 1.26, this means "avoid copy if possible"
....: # https://numpy.org/doc/1.26/reference/generated/numpy.array.html#numpy.array
....: # but no-copy is not supported so it will copy anyway
....: a[0,0] = 1
....: assert b[0,0] == 0
....: b = np.asarray(a)
....: a[0,0] = 2
....: assert b[0,0] == 1
Make sure it's reasonably fast (the temporary numpy array is immediately
destroyed otherwise it consumes 200MB memory)::
sage: import numpy as np
sage: np.sum(np.array(matrix.identity(RDF, 5*10^3))).item() # around 2s each
5000.0
sage: np.sum(np.asarray(matrix.identity(RDF, 5*10^3))).item()
5000.0
sage: np.sum(np.asarray(matrix.identity(RDF, 5*10^3), dtype=np.uint8)).item()
5000
sage: np.sum(np.array(matrix.identity(CDF, 3*10^3))).item()
(3000+0j)
"""
import numpy as np
if dtype is None or self._numpy_dtype == np.dtype(dtype):
return self._matrix_numpy.copy()
else:
return Matrix_dense.numpy(self, dtype=dtype)
return np.array(self._matrix_numpy, dtype=dtype)

def _replace_self_with_numpy(self, numpy_matrix):
"""
Expand Down
25 changes: 25 additions & 0 deletions src/sage/modules/numpy_util.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,28 @@ cpdef int set_matrix_mod2_from_numpy(Matrix_mod2_dense a, b) except -1:
return (<object>_set_matrix_mod2_from_numpy_helper)(a, b) # https://github.com/cython/cython/issues/6588
except TypeError:
return False


cpdef object mzd_matrix_to_numpy(uintptr_t entries_addr, object dtype):
"""
Convert ``<mzd_t*>entries_addr`` to a numpy array.
INPUT:
- ``entries_addr`` -- must be a ``mzd_t*`` casted to ``uintptr_t``; the casting
is necessary to pass it through Python boundary because of lazy import.
Do not pass arbitrary integer value here, will crash the program.
- ``dtype`` -- numpy dtype. If ``None``, the result will have some convenient dtype.
OUTPUT: a 2-dimensional array.
"""
if dtype is not None:
return mzd_matrix_to_numpy(entries_addr, None).astype(dtype)
cdef mzd_t* entries = <mzd_t*>entries_addr
cdef np.ndarray[np.uint8_t, ndim=2] result = np.empty((entries.nrows, entries.ncols), dtype=np.uint8)
cdef Py_ssize_t i, j
for i in range(entries.nrows):
for j in range(entries.ncols):
result[i, j] = mzd_read_bit(entries, i, j)
return result

0 comments on commit b1d65a8

Please sign in to comment.