Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No need to redirect to the original frame explicitly during deserialization #3722

Merged
merged 24 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Fixes
* add a 0.5 for correct midpoints in hole analysis (Issue #3715)
* Fix reading error when PDB CONECT records are corrupt. (Issue #988)
* Fix writing unusable PDB CONECT records with index>100000. (Issue #988)
* Fixes failure in double-serialization of TextIOPicklable file reader.
(Issue #3723, PR #3722)

Enhancements
* Add a new `formalcharge` attribute for storing formal charges (PR #3755)
Expand All @@ -36,6 +38,8 @@ Enhancements
(CZI Performance track, PR #3683)
* Refactor make_downshift_arrays with vector operation in numpy(PR #3724)
* Optimize topology serialization/construction by lazy-building _RA and _SR.
* Improve performance in Universe serialization by no explicit redirection to
the timestep. (Issue #3721, PR #3722)

Changes
* To add additional optional packages for different file formats when
Expand All @@ -45,6 +49,8 @@ Changes
as per NEP29 (PR #3737)
* Narrowed variable scope to reduce use of OpenMP `private` clause (PR #3706, PR
#3728)
* TextIOPicklable serializes the raw class instance instead of class name.
(PR #3722)

Deprecations
* The extras_requires target "AMBER" for `pip install ./package[AMBER]`
Expand Down
4 changes: 0 additions & 4 deletions package/MDAnalysis/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,10 +1316,6 @@ def _apply_transformations(self, ts):

return ts

def __setstate__(self, state):
self.__dict__ = state
self[self.ts.frame]


class ReaderBase(ProtoReader):
"""Base class for trajectory readers that extends :class:`ProtoReader` with a
Expand Down
47 changes: 24 additions & 23 deletions package/MDAnalysis/lib/picklable_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def __getstate__(self):

def __setstate__(self, args):
name = args[0]
super().__init__(name, mode='r')
self.__init__(name, mode='r')
self.seek(args[1])


Expand Down Expand Up @@ -151,29 +151,26 @@ def __init__(self, raw):
super().__init__(raw)
self.raw_class = raw.__class__


def __getstate__(self):
return self.raw_class, self.name, self.tell()


def __setstate__(self, args):
# raw in BufferIOPicklable is immutable.
# need to reconstruct from class
raw_class = args[0]
name = args[1]
raw = raw_class(name)
super().__init__(raw)
self.__init__(raw)
self.seek(args[2])


class TextIOPicklable(io.TextIOWrapper):
"""Character and line based picklable file-like object.

This class provides a file-like :class:`io.TextIOWrapper` object that can
be pickled. Note that this only works in read mode.

Note
----
After pickling, the current position is reset. `universe.trajectory[i]` has
to be used to return to its original frame.


Parameters
----------
raw : FileIO object
Expand All @@ -192,26 +189,30 @@ class TextIOPicklable(io.TextIOWrapper):


.. versionadded:: 2.0.0
.. versionchanged:: 2.3.0
The raw class instance instead of the class name
that is wrapped inside will be serialized.
After deserialization, the current position is no longer reset
so `universe.trajectory[i]` is not needed to seek to the
original position.
"""
def __init__(self, raw):
super().__init__(raw)
self.raw_class = raw.__class__
self.raw = raw

def __getstate__(self):
try:
name = self.name
except AttributeError:
# This is kind of ugly--BZ2File does not save its name.
name = self.buffer._fp.name
return self.raw_class, name
curr_loc = self.tell()
# some readers (e.g. GMS) disable tell() due to using next()
except OSError:
curr_loc = None
return self.raw, curr_loc

def __setstate__(self, args):
raw_class = args[0]
name = args[1]
# raw_class is used for further expansion this functionality to
# Gzip files, which also requires a text wrapper.
raw = raw_class(name)
super().__init__(raw)
raw = args[0]
self.__init__(raw)
if args[1] is not None:
self.seek(args[1])


class BZ2Picklable(bz2.BZ2File):
Expand Down Expand Up @@ -272,7 +273,7 @@ def __getstate__(self):
return self._fp.name, self.tell()

def __setstate__(self, args):
super().__init__(args[0])
self.__init__(args[0])
self.seek(args[1])


Expand Down Expand Up @@ -334,7 +335,7 @@ def __getstate__(self):
return self.name, self.tell()

def __setstate__(self, args):
super().__init__(args[0])
self.__init__(args[0])
self.seek(args[1])


Expand Down
11 changes: 11 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,13 @@ def test_last_slice(self):
def test_pickle_singleframe_reader(self):
reader = self.universe.trajectory
reader_p = pickle.loads(pickle.dumps(reader))
reader_p_p = pickle.loads(pickle.dumps(reader_p))
assert_equal(len(reader), len(reader_p))
assert_equal(reader.ts, reader_p.ts,
"Single-frame timestep is changed after pickling")
assert_equal(len(reader), len(reader_p_p))
assert_equal(reader.ts, reader_p_p.ts,
"Single-frame timestep is changed after double pickling")


class BaseReference(object):
Expand Down Expand Up @@ -444,6 +448,10 @@ def test_pickle_reader(self, reader):
assert_equal(len(reader), len(reader_p))
assert_equal(reader.ts, reader_p.ts,
"Timestep is changed after pickling")
reader_p_p = pickle.loads(pickle.dumps(reader_p))
assert_equal(len(reader), len(reader_p_p))
assert_equal(reader.ts, reader_p_p.ts,
"Timestep is changed after double pickling")


class MultiframeReaderTest(BaseReaderTest):
Expand Down Expand Up @@ -527,6 +535,9 @@ def test_pickle_next_ts_reader(self, reader):
reader_p = pickle.loads(pickle.dumps(reader))
assert_equal(next(reader), next(reader_p),
"Next timestep is changed after pickling")
reader_p_p = pickle.loads(pickle.dumps(reader_p))
assert_equal(next(reader), next(reader_p_p),
"Next timestep is changed after double pickling")

# To make sure pickle works for last frame.
def test_pickle_last_ts_reader(self, reader):
Expand Down
4 changes: 2 additions & 2 deletions testsuite/MDAnalysisTests/utils/test_pickleio.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def test_iopickle_text(f_text):
assert_equal(f_text.readlines(), f_text_pickled.readlines())


def test_offset_text_to_0(f_text):
def test_offset_text_same(f_text):
f_text.readline()
f_text_pickled = pickle.loads(pickle.dumps(f_text))
assert_equal(f_text_pickled.tell(), 0)
assert_equal(f_text_pickled.tell(), f_text.tell())


@pytest.fixture(params=[
Expand Down