From 8be5e0cf9d86b66f93328d5547345d96672b42ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Mon, 4 Dec 2023 16:54:08 +0100 Subject: [PATCH 1/2] use __reduce__ to not store hash in the pickle --- .gitignore | 8 +++++++- tests/test_tasks.py | 30 +++++++++++++++++++++++++++++- tests/test_xdeps.py | 1 + xdeps/refs.py | 45 ++++++++++++++++++++++++++++++++++++++------- 4 files changed, 75 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 93e8713..ab78fa3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,10 @@ *.swp *.egg-info .DS_Store - +*.so +.pymon +.ipynb_checkpoints +**/.coverage +**/.coverage.* +**/checkpoint_restart.dat +build/ diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 2edf205..9e6b5ea 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -1,4 +1,9 @@ +# copyright ############################### # +# This file is part of the Xdeps Package. # +# Copyright (c) CERN, 2023. # +# ######################################### # import numpy as np +import pickle import pytest import xdeps as xd @@ -84,7 +89,7 @@ def example_manager(): # │ # ╭ [d[0]]─┐ │ # container2 │ │ │ - # ╰ [d[1]]─┴───►(e[0]=d[0]+d[1])───►[e[0]]─┴─►(j=h+e[0])─►[j] + # ╰ [d[1]]─┴───►(e[0]=d[0]*d[1])───►[e[0]]─┴─►(j=h+e[0])─►[j] ref1['f'] = ref1['a'] + ref1['b'] @@ -248,6 +253,29 @@ def test_manager_dump_and_load(example_manager): assert not new_manager.tasks[new_ref2['j']].expr == old_expr +def test_manager_pickle(example_manager): + manager, _, original_containers = example_manager + original1, original2 = original_containers + + pickled = pickle.dumps(manager) + new_manager = pickle.loads(pickled) + + ref1 = new_manager.containers['ref1'] + container1 = ref1._owner + ref2 = new_manager.containers['ref2'] + container2 = ref2._owner + + ref1['a'] = 32 + ref2['d'][1] = 5 + + assert container1 is not original1 + assert container2 is not original2 + assert container1['f'] == 40 + assert container1['h'] == 6 + assert container2['e'][0] == 10 + assert container2['j'] == 16 + + def test_ref_count(): manager = xd.Manager() container = {'a': 3, 'b': [None], 'c': None} diff --git a/tests/test_xdeps.py b/tests/test_xdeps.py index a68743e..ee702a1 100644 --- a/tests/test_xdeps.py +++ b/tests/test_xdeps.py @@ -2,6 +2,7 @@ # This file is part of the Xdeps Package. # # Copyright (c) CERN, 2021. # # ######################################### # +import pickle import xdeps from xdeps.tasks import AttrDict diff --git a/xdeps/refs.py b/xdeps/refs.py index 8be59f9..2edbaed 100644 --- a/xdeps/refs.py +++ b/xdeps/refs.py @@ -19,7 +19,6 @@ '__dict__', '__getstate__', '__setstate__', - '__reduce__', '__reduce_cython__', '__wrapped__', '__array_ufunc__', @@ -117,6 +116,9 @@ def __init__(self, *args, **kwargs): def __hash__(self): return self._hash + def __reduce__(self): + raise TypeError("Cannot pickle an abstract class") + def __eq__(self, other): """Check equality of the expressions `self` and `other`. @@ -346,6 +348,14 @@ def __setattr__(self, attr, value): ref = AttrRef(self, attr, self._manager) self._manager.set_value(ref, value) + def __reduce__(self): + """Do not store the hash when pickling. + + The hash is only guaranteed to be the same for the 'same' refs within + the same python instance, therefore serialising hashes makes no sense. + """ + return type(self), (self._owner, self._key, self._manager) + def _get_dependencies(self, out=None): if out is None: out = set() @@ -555,6 +565,8 @@ def _set_value(self, value): setattr(owner, attr, value) def __repr__(self): + assert self._owner is not None + assert self._key is not None return f"{self._owner}.{self._key}" @@ -571,6 +583,7 @@ def _set_value(self, value): owner[item] = value def __repr__(self): + assert self._owner is not None return f"{self._owner}[{repr(self._key)}]" @@ -608,6 +621,10 @@ def _get_dependencies(self, out=None): self._rhs._get_dependencies(out) return out + def __reduce__(self): + """Instruct pickle to not pickle the hash.""" + return type(self), (self._lhs, self._rhs) + def __repr__(self): return f"({self._lhs} {self._op_str} {self._rhs})" @@ -643,6 +660,10 @@ def _get_dependencies(self, out=None): # performance reasons we skip the check. return self._arg._get_dependencies(out) + def __reduce__(self): + """Instruct pickle to not pickle the hash.""" + return type(self), (self._arg,) + def __repr__(self): return f"({self._op_str}{self._arg})" @@ -900,6 +921,10 @@ def _get_dependencies(self, out=None): arg._get_dependencies(out) return out + def __reduce__(self): + """Instruct pickle to not pickle the hash.""" + return type(self), (self._op, self._args) + def __repr__(self): op_symbol = OPERATOR_SYMBOLS.get(self._op, self._op.__name__) return f"{op_symbol}({self._arg})" @@ -914,7 +939,10 @@ class CallRef(BaseRef): def __init__(self, func, args, kwargs): self._func = func self._args = args - self._kwargs = tuple(kwargs.items()) + if isinstance(kwargs, dict): + self._kwargs = tuple(kwargs.items()) + else: + self._kwargs = tuple(kwargs) self._hash = hash((self._func, self._args, self._kwargs)) def _get_value(self): @@ -936,17 +964,20 @@ def _get_dependencies(self, out=None): arg._get_dependencies(out) return out + def __reduce__(self): + """Instruct pickle to not pickle the hash.""" + return type(self), (self._func, self._args, self._kwargs) + def __repr__(self): - args = [] - for aa in self._args: - args.append(repr(aa)) - for k, v in self._kwargs: - args.append(f"{k}={v}") + args = [repr(arg) for arg in self._args] + args += [f"{k}={v}" for k, v in self._kwargs] args = ", ".join(args) + if isinstance(self._func, BaseRef): fname = repr(self._func) else: fname = self._func.__name__ + return f"{fname}({args})" From e7f6171c4d52614a01f55db778d9fc8ccaac3a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Mon, 4 Dec 2023 17:37:48 +0100 Subject: [PATCH 2/2] implement __cinit__ to ensure that refs are always in a valid state An attempt to avoid this problem encountered rarely when pickling: ```python xsuite/xtrack/tests/test_line.py:909: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ xdeps/refs.py:132: in xdeps.refs.BaseRef.__eq__ ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E TypeError: __str__ returned non-string (type NoneType) ``` --- xdeps/refs.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/xdeps/refs.py b/xdeps/refs.py index 2edbaed..60fd516 100644 --- a/xdeps/refs.py +++ b/xdeps/refs.py @@ -111,7 +111,13 @@ class BaseRef: _hash = cython.declare(int, visibility='private') def __init__(self, *args, **kwargs): - raise TypeError("Cannot instantiate abstract class BaseRef") + # To keep compatibility with pure Python (useful for debugging simpler + # issues), we simulate Cython __cinit__ behaviour with this __init__: + if not is_cythonized(): + for base in type(self).__mro__: + cinit = getattr(base, '__cinit__', None) + if cinit: + cinit(self, *args, **kwargs) def __hash__(self): return self._hash @@ -311,11 +317,13 @@ class MutableRef(BaseRef): _owner = cython.declare(object, visibility='public', value=None) _key = cython.declare(object, visibility='public', value=None) - def __init__(self, _owner, _key, _manager): + def __cinit__(self, _owner, _key, _manager): self._owner = _owner self._key = _key self._manager = _manager - self._hash = hash((self.__class__.__name__, _owner, _key)) + # _hash will depend on the particularities of the subclass, for now + # it is None, which does not matter, as this class should never be + # instantiated. def __setitem__(self, key, value): ref = ItemRef(self, key, self._manager) @@ -340,7 +348,7 @@ def __setattr__(self, attr, value): # The above way of setting attributes does not work in Cython, # as the object does not have a __dict__. We do not really need # a setter for those though, as the only time we need to - # set a "built-in" attribute is during __init__ or when + # set a "built-in" attribute is during __cinit__ or when # unpickling, and both of those cases are handled by Cython # without the usual pythonic call to __setattr__. raise AttributeError(f"Attribute {attr} is read-only.") @@ -509,11 +517,9 @@ class Ref(MutableRef): """ A reference in the top-level container. """ - def __init__(self, _owner, _key, _manager): - self._owner = _owner - self._key = _key - self._manager = _manager - self._hash = hash((self.__class__.__name__, _key)) + def __cinit__(self, _owner, _key, _manager): + # Cython automatically calls __cinit__ in the base classes + self._hash = hash((type(self).__name__, _key)) def __repr__(self): return self._key @@ -554,6 +560,10 @@ def __setattr__(self, attr, value): @cython.cclass class AttrRef(MutableRef): + def __cinit__(self, _owner, _key, _manager): + # Cython automatically calls __cinit__ in the base classes + self._hash = hash((type(self).__name__, _owner, _key)) + def _get_value(self): owner = BaseRef._mk_value(self._owner) attr = BaseRef._mk_value(self._key) @@ -572,6 +582,10 @@ def __repr__(self): @cython.cclass class ItemRef(MutableRef): + def __cinit__(self, _owner, _key, _manager): + # Cython automatically calls __cinit__ in the base classes + self._hash = hash((type(self).__name__, _owner, _key)) + def _get_value(self): owner = BaseRef._mk_value(self._owner) item = BaseRef._mk_value(self._key) @@ -604,7 +618,7 @@ class BinOpExpr(BaseRef): _lhs = cython.declare(object, visibility='public') _rhs = cython.declare(object, visibility='public') - def __init__(self, lhs, rhs): + def __cinit__(self, lhs, rhs): self._lhs = lhs self._rhs = rhs self._hash = hash((self.__class__, lhs, rhs)) @@ -645,7 +659,7 @@ class UnaryOpExpr(BaseRef): """ _arg = cython.declare(object, visibility='public') - def __init__(self, arg): + def __cinit__(self, arg): self._arg = arg self._hash = hash((self.__class__, self._arg)) @@ -900,7 +914,7 @@ class BuiltinRef(BaseRef): _op = cython.declare(object, visibility='public') _params = cython.declare(tuple, visibility='public') - def __init__(self, arg, op, params=()): + def __cinit__(self, arg, op, params=()): self._arg = arg self._op = op self._params = params @@ -936,7 +950,7 @@ class CallRef(BaseRef): _args = cython.declare(tuple, visibility='public') _kwargs = cython.declare(tuple, visibility='public') - def __init__(self, func, args, kwargs): + def __cinit__(self, func, args, kwargs): self._func = func self._args = args if isinstance(kwargs, dict):