From 9018f6dccbf6e05fa3779d715d6ed1e6b3cb23ef Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 18:50:21 +0800 Subject: [PATCH] support order with cfg.track_order --- h5pyd/_hl/base.py | 9 +++- h5pyd/_hl/files.py | 14 ++---- h5pyd/_hl/group.py | 15 ++++-- test/hl/test_attribute.py | 87 ++++++++++++++++++++-------------- test/hl/test_file.py | 48 ++++++++++++++----- test/hl/test_group.py | 99 +++++++++++++++++++++++---------------- 6 files changed, 170 insertions(+), 102 deletions(-) diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index f581480..dd1a78e 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -14,6 +14,7 @@ import posixpath import os +import sys import json import numpy as np import logging @@ -28,6 +29,10 @@ numpy_float_types = (np.float16, np.float32, np.float64) +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + class FakeLock(): def __init__(self): pass @@ -506,7 +511,7 @@ def readElement(buffer, offset, arr, index, dt): e = np.frombuffer(bytes(e_buffer), dtype=dt) arr[index] = e[0] except ValueError: - print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") + eprint(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") raise else: # variable length element @@ -533,7 +538,7 @@ def readElement(buffer, offset, arr, index, dt): try: e = np.frombuffer(bytes(e_buffer), dtype=vlen) except ValueError: - print("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) + eprint("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) raise arr[index] = e diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index 5937d04..22ad0b4 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -327,7 +327,6 @@ def __init__( timeout Timeout value in seconds """ - groupid = None dn_ids = [] # if we're passed a GroupId as domain, just initialize the file object @@ -407,9 +406,6 @@ def __init__( if swmr: use_cache = False # disable metadata caching in swmr mode - if track_order is None: - track_order = cfg.track_order - http_conn = HttpConn( domain, endpoint=endpoint, @@ -489,7 +485,7 @@ def __init__( body["owner"] = owner if linked_domain: body["linked_domain"] = linked_domain - if track_order: + if track_order or cfg.track_order: create_props = {"CreateOrder": 1} group_body = {"creationProperties": create_props} body["group"] = group_body @@ -558,22 +554,20 @@ def __init__( groupid = GroupID(None, group_json, http_conn=http_conn) # end else + self._name = "/" self._id = groupid - self._verboseInfo = None # aditional state we'll get when requested + self._verboseInfo = None # additional state we'll get when requested self._verboseUpdated = None # when the verbose data was fetched self._lastScan = None # when summary stats where last updated by server self._dn_ids = dn_ids - self._track_order = track_order self._swmr_mode = swmr Group.__init__(self, self._id, track_order=track_order) def _getVerboseInfo(self): now = time.time() - if ( - self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME - ): + if (self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME): # resynch the verbose data req = "/?verbose=1" rsp_json = self.GET(req, use_cache=False, params={"CreateOrder": "1" if self._track_order else "0"}) diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index 0983fcc..21404db 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -200,6 +200,7 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N """ helper function to make a group """ cfg = config.get_config() + link_json = {} if parent_id: link_json["id"] = parent_id @@ -219,6 +220,8 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N group_json = rsp groupId = GroupID(self, group_json) sub_group = Group(groupId) + if track_order or cfg.track_order: + sub_group._track_order = True if parent_name: if parent_name[-1] == '/': parent_name = parent_name + link @@ -272,8 +275,12 @@ def create_group(self, h5path, track_order=None): create_group = True if create_group: - sub_group = self._make_group(parent_id=parent_uuid, parent_name=parent_name, link=link) - sub_group._track_order = track_order + kwargs = {} + kwargs["parent_id"] = parent_uuid + kwargs["parent_name"] = parent_name + kwargs["link"] = link + kwargs["track_order"] = track_order + sub_group = self._make_group(**kwargs) parent_uuid = sub_group.id.id else: @@ -593,7 +600,7 @@ def __getitem__(self, name, track_order=None): # convert bytes to str for PY3 if isinstance(name, bytes): name = name.decode('utf-8') - self.log.debug(f"group.__getitem__({name})") + self.log.debug(f"group.__getitem__({name}, track_order={track_order})") tgt = None if isinstance(name, h5type.Reference): @@ -716,7 +723,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Non """ if not (getclass or getlink): try: - return self.__getitem__(name, track_order) + return self.__getitem__(name, track_order=track_order) except KeyError: return default diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 21d2290..408ed99 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -292,49 +292,68 @@ def test_delete_multiple(self): class TestTrackOrder(TestCase): - def fill_attrs(self, track_order): - attrs = self.f.create_group('test', track_order=track_order).attrs - for i in range(100): - attrs[str(i)] = i - return attrs + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + + def fill_attrs(self, obj): + count = len(self.titles) + attrs = obj.attrs + for i in range(count): + title = self.titles[i] + val = i + 1 + attrs[title] = val - # https://forum.hdfgroup.org/t/bug-h5arename-fails-unexpectedly/4881 def test_track_order(self): filename = self.getFileName("test_test_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=True) # creation order - self.assertEqual(list(attrs), - [str(i) for i in range(100)]) + with h5py.File(filename, 'w') as f: + g1 = f.create_group('test', track_order=True) + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + # group should return track order + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) + + def test_track_order_cfg(self): + filename = self.getFileName("test_test_track_order_attribute") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True + g1 = f.create_group('test') + cfg.track_order = False # reset + + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_test_no_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=False) # name alphanumeric - self.assertEqual(list(attrs), - sorted([str(i) for i in range(100)])) - - def fill_attrs2(self, track_order): - group = self.f.create_group('test', track_order=track_order) - for i in range(12): - group.attrs[str(i)] = i - return group + f = h5py.File(filename, 'w') + g1 = f.create_group('test') # name alphanumeric + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) def test_track_order_overwrite_delete(self): filename = self.getFileName("test_test_track_order_overwrite_delete") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - # issue h5py#1385 - group = self.fill_attrs2(track_order=True) # creation order - self.assertEqual(group.attrs["11"], 11) + f = h5py.File(filename, 'w') + + g1 = f.create_group("g1", track_order=True) # creation order + self.fill_attrs(g1) + title = 'three' + self.assertEqual(g1.attrs[title], 3) # overwrite attribute - group.attrs['11'] = 42.0 - self.assertEqual(group.attrs["11"], 42.0) + g1.attrs[title] = 42.0 + self.assertEqual(g1.attrs[title], 42.0) # delete attribute - self.assertIn('10', group.attrs) - del group.attrs['10'] - self.assertNotIn('10', group.attrs) + self.assertIn(title, g1.attrs) + del g1.attrs[title] + self.assertNotIn(title, g1.attrs) def test_track_order_not_inherited(self): """ @@ -343,13 +362,11 @@ def test_track_order_not_inherited(self): """ filename = self.getFileName("test_test_track_order_not_inherited") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w', track_order=True) - group = self.f.create_group('test') - - for i in range(12): - group.attrs[str(i)] = i + f = h5py.File(filename, 'w', track_order=True) + g1 = f.create_group('test') + self.fill_attrs(g1) - self.assertEqual(list(group.attrs), sorted([str(i) for i in range(12)])) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) if __name__ == '__main__': diff --git a/test/hl/test_file.py b/test/hl/test_file.py index d8da2bd..a8f6760 100644 --- a/test/hl/test_file.py +++ b/test/hl/test_file.py @@ -350,27 +350,53 @@ def test_close(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + def populate(self, f): - count = 3 + count = len(self.titles) + # create count datasets/groups for i in range(count): + title = self.titles[i] # Mix group and dataset creation. - if i % 10 == 0: - f.create_group(str(i)) + if i % 2 == 0: + f.create_group(title) else: - f[str(i)] = [i] - return count + f[title] = [i] + # create count attributes + for i in range(count): + title = self.titles[i] + f.attrs[title] = i def test_track_order(self): filename = self.getFileName("test_track_order_file") print(f"filename: {filename}") # write file using creation order with h5py.File(filename, 'w', track_order=True) as f: - count = self.populate(f) - self.assertEqual(list(f), [str(i) for i in range(count)]) + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + with h5py.File(filename) as f: + # domain/file should have been saved with track_order state + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + def test_cfg_track_order(self): + filename = self.getFileName("test_cfg_track_order_file") + print(f"filename: {filename}") + # write file using creation order + cfg = h5py.get_config() + cfg.track_order = True + with h5py.File(filename, 'w') as f: + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + cfg.track_order = False # reset with h5py.File(filename) as f: # domain/file should have been saved with track_order state - self.assertEqual(list(f), [str(i) for i in range(count)]) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_no_track_order_file") @@ -378,11 +404,11 @@ def test_no_track_order(self): # create file using alphanumeric order with h5py.File(filename, 'w', track_order=False) as f: - count = self.populate(f) - self.assertEqual(list(f), sorted([str(i) for i in range(count)])) + self.populate(f) + self.assertEqual(list(f), sorted(self.titles)) with h5py.File(filename) as f: # name alphanumeric - self.assertEqual(list(f), sorted([str(i) for i in range(count)])) + self.assertEqual(list(f), sorted(self.titles)) if __name__ == '__main__': diff --git a/test/hl/test_group.py b/test/hl/test_group.py index 4b86702..89d391d 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -313,7 +313,7 @@ def test_link_multi_removal(self): if config.get("use_h5py"): return filename = self.getFileName("test_link_multi_removal") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -358,7 +358,7 @@ def test_link_multi_create(self): return filename = self.getFileName("test_link_multi_create") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -438,7 +438,7 @@ def test_link_get_multi(self): return filename = self.getFileName("test_link_get_multi") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -550,58 +550,85 @@ def test_link_get_multi(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") def populate(self, g): - count = 10 + count = len(self.titles) for i in range(count): # Mix group and dataset creation. - if i % 10 == 0: - g.create_group(str(i)) + if i % 2 == 0: + g.create_group(self.titles[i]) else: - g[str(i)] = [i] - return count + g[self.titles[i]] = [i] def populate_attrs(self, d): - count = 10 + count = len(self.titles) for i in range(count): - d.attrs[str(i)] = i - return count + d.attrs[self.titles[i]] = i def test_track_order(self): filename = self.getFileName("test_track_order_group") print(f"filename: {filename}") with h5py.File(filename, 'w') as f: g = f.create_group('order', track_order=True) # creation order - count = self.populate(g) + self.populate(g) - ref = [str(i) for i in range(count)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + ref = self.titles + self.assertEqual(tuple(g), ref) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 # re-opening the file should retain the track_order setting with h5py.File(filename) as f: g = f['order'] - count = len(g) - self.assertTrue(count > 0) - ref = [str(i) for i in range(count)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + self.assertEqual(tuple(reversed(g)), tuple(reversed(self.titles))) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + def test_track_order_cfg(self): + filename = self.getFileName("test_track_order_cfg_group") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True # creation order + g = f.create_group('order') + cfg.track_order = False # reset + self.populate(g) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + # re-opening the file should retain the track_order setting + with h5py.File(filename) as f: + g = f['order'] + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 def test_no_track_order(self): filename = self.getFileName("test_no_track_order_group") print(f"filename: {filename}") with h5py.File(filename, 'w') as f: g = f.create_group('order', track_order=False) # name alphanumeric - count = self.populate(g) - ref = sorted([str(i) for i in range(count)]) + self.populate(g) + ref = sorted(self.titles) self.assertEqual(list(g), ref) self.assertEqual(list(reversed(g)), list(reversed(ref))) with h5py.File(filename) as f: g = f['order'] # name alphanumeric - count = len(g) - self.assertTrue(count > 0) - ref = sorted([str(i) for i in range(count)]) + ref = sorted(self.titles) self.assertEqual(list(g), ref) self.assertEqual(list(reversed(g)), list(reversed(ref))) @@ -619,21 +646,18 @@ def test_get_dataset_track_order(self): dset = g.create_dataset('dset', (10,), dtype='i4') dset2 = g.create_dataset('dset2', (10,), dtype='i4') - count1 = self.populate_attrs(dset) - count2 = self.populate_attrs(dset2) + self.populate_attrs(dset) + self.populate_attrs(dset2) with h5py.File(filename) as f: g = f['order'] d = g.get('dset', track_order=True) - ref = [str(i) for i in range(count1)] - self.assertEqual(list(d.attrs), ref) - self.assertEqual(list(reversed(d.attrs)), list(reversed(ref))) + self.assertEqual(list(d.attrs), list(self.titles)) d2 = g.get('dset2', track_order=False) - ref = sorted([str(i) for i in range(count2)]) + ref = sorted(self.titles) self.assertEqual(list(d2.attrs), ref) - self.assertEqual(list(reversed(d2.attrs)), list(reversed(ref))) def test_get_group_track_order(self): # h5py does not support track_order on group.get() @@ -645,22 +669,17 @@ def test_get_group_track_order(self): g = f.create_group('order') # create subgroup and populate it with links g.create_group('subgroup') - count = self.populate(g['subgroup']) + self.populate(g['subgroup']) with h5py.File(filename) as f: g = f['order'] subg = g.get('subgroup', track_order=True) - ref = [str(i) for i in range(count)] - self.assertEqual(list(subg), ref) - self.assertEqual(list(reversed(subg)), list(reversed(ref))) + self.assertEqual(tuple(subg), self.titles) with h5py.File(filename) as f: g = f['order'] subg2 = g.get('subgroup', track_order=False) - count = len(subg2) - ref = sorted([str(i) for i in range(count)]) - self.assertEqual(list(subg2), ref) - self.assertEqual(list(reversed(subg2)), list(reversed(ref))) + self.assertEqual(list(subg2), sorted(self.titles)) if __name__ == '__main__':