diff --git a/h5pyd/_hl/attrs.py b/h5pyd/_hl/attrs.py index 0e191b2..c1134f9 100644 --- a/h5pyd/_hl/attrs.py +++ b/h5pyd/_hl/attrs.py @@ -452,7 +452,7 @@ def __len__(self): def __iter__(self): """ Iterate over the names of attributes. """ if self._objdb_attributes is not None: - if self._parent._track_order: + if self._parent.track_order: attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created']) else: attrs = sorted(self._objdb_attributes.items()) @@ -469,7 +469,7 @@ def __iter__(self): req = self._req_prefix # backup over the trailing slash in req req = req[:-1] - rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"}) + rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent.track_order else "0"}) attributes = rsp['attributes'] attrlist = [] @@ -505,7 +505,7 @@ def __repr__(self): def __reversed__(self): """ Iterate over the names of attributes in reverse order. """ if self._objdb_attributes is not None: - if self._parent._track_order: + if self._parent.track_order: attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created']) else: attrs = sorted(self._objdb_attributes.items()) @@ -522,7 +522,7 @@ def __reversed__(self): req = self._req_prefix # backup over the trailing slash in req req = req[:-1] - rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"}) + rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent.track_order else "0"}) attributes = rsp['attributes'] attrlist = [] diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index dd1a78e..db136d6 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -948,6 +948,10 @@ def modified(self): """Last modified time as a datetime object""" return self.id._modified + @property + def track_order(self): + return self._track_order + def verifyCert(self): # default to validate CERT for https requests, unless # the H5PYD_VERIFY_CERT environment variable is set and True @@ -1072,7 +1076,7 @@ def DELETE(self, req, params=None): if rsp.status_code != 200: raise IOError(rsp.reason) - def __init__(self, oid, file=None): + def __init__(self, oid, file=None, track_order=None): """ Setup this object, given its low-level identifier """ self._id = oid self.log = self._id.http_conn.logging @@ -1092,6 +1096,24 @@ def __init__(self, oid, file=None): else: pass + if track_order is None: + # set order based on group creation props + obj_json = self.id.obj_json + if "creationProperties" in obj_json: + cpl = obj_json["creationProperties"] + else: + cpl = {} + if "CreateOrder" in cpl: + createOrder = cpl["CreateOrder"] + if not createOrder or createOrder == "0": + self._track_order = False + else: + self._track_order = True + else: + self._track_order = False + else: + self._track_order = track_order + def __hash__(self): return hash(self.id.id) diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 3678825..b08ddff 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -31,6 +31,7 @@ from . import selections as sel from .datatype import Datatype from .h5type import getTypeItem, createDataType, check_dtype, special_dtype, getItemSize +from .. import config _LEGACY_GZIP_COMPRESSION_VALS = frozenset(range(10)) VERBOSE_REFRESH_TIME = 1.0 # 1 second @@ -77,6 +78,7 @@ def make_new_dset( compression_opts=None, fillvalue=None, scaleoffset=None, + track_order=None, track_times=None, initializer=None, initializer_opts=None @@ -88,6 +90,7 @@ def make_new_dset( # fill in fields for the body of the POST request as we got body = {} + cfg = config.get_config() # Convert data to a C-contiguous ndarray if data is not None and not isinstance(data, Empty): @@ -249,17 +252,14 @@ def make_new_dset( dcpl["fillValue"] = fillvalue + if track_order or cfg.track_order: + dcpl["CreateOrder"] = 1 + if chunks and isinstance(chunks, dict): dcpl["layout"] = chunks body["creationProperties"] = dcpl - """ - if track_times in (True, False): - dcpl.set_obj_track_times(track_times) - elif track_times is not None: - raise TypeError("track_times must be either True or False") - """ if maxshape is not None and len(maxshape) > 0: if shape is not None: maxshape = tuple(m if m is not None else 0 for m in maxshape) @@ -291,7 +291,7 @@ def make_new_dset( if data is not None: # init data - dset = Dataset(dset_id) + dset = Dataset(dset_id, track_order=(track_order or cfg.track_order)) dset[...] = data return dset_id @@ -763,12 +763,12 @@ def allocated_size(self): self._getVerboseInfo() return self._allocated_size - def __init__(self, bind, track_order=False): + def __init__(self, bind, track_order=None): """Create a new Dataset object by binding to a low-level DatasetID.""" if not isinstance(bind, DatasetID): raise ValueError(f"{bind} is not a DatasetID") - HLObject.__init__(self, bind) + HLObject.__init__(self, bind, track_order=track_order) self._dcpl = self.id.dcpl_json self._filters = filters.get_filters(self._dcpl) @@ -778,7 +778,14 @@ def __init__(self, bind, track_order=False): # make a numpy dtype out of the type json self._dtype = createDataType(self.id.type_json) self._item_size = getItemSize(self.id.type_json) - self._track_order = track_order + if track_order is None: + if "CreateOrder" in self._dcpl: + if not self._dcpl["CreateOrder"] or self._dcpl["CreateOrder"] == "0": + self._track_order = False + else: + self._track_order = True + else: + self._track_order = track_order self._shape = self.get_shape() diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index 21404db..b7a6501 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -58,8 +58,8 @@ def __init__(self, bind, track_order=None, **kwargs): if not isinstance(bind, GroupID): raise ValueError(f"{bind} is not a GroupID") - HLObject.__init__(self, bind, **kwargs) - + HLObject.__init__(self, bind, track_order=track_order, **kwargs) + """ if track_order is None: # set order based on group creation props gcpl = self.id.gcpl_json @@ -73,6 +73,7 @@ def __init__(self, bind, track_order=None, **kwargs): self._track_order = False else: self._track_order = track_order + """ self._req_prefix = "/groups/" + self.id.uuid self._link_db = {} # cache for links @@ -164,7 +165,7 @@ def _get_link_json(self, h5path): req = "/groups/" + parent_uuid + "/links/" + name try: - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + rsp_json = self.GET(req, params={"CreateOrder": "1" if self.track_order else "0"}) except IOError: raise KeyError("Unable to open object (Component not found)") @@ -219,9 +220,9 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N group_json = rsp groupId = GroupID(self, group_json) - sub_group = Group(groupId) - if track_order or cfg.track_order: - sub_group._track_order = True + + sub_group = Group(groupId, track_order=(track_order or cfg.track_order)) + if parent_name: if parent_name[-1] == '/': parent_name = parent_name + link @@ -355,6 +356,8 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): conjunction with the scale/offset filter. fillvalue (Scalar) Use this value for uninitialized parts of the dataset. + track_oder + (T/F) List attributes by creation_time if set track_times (T/F) Enable dataset creation timestamps. initializer @@ -426,12 +429,17 @@ def create_dataset_like(self, name, other, **kwupdate): 'fillvalue'): kwupdate.setdefault(k, getattr(other, k)) # TODO: more elegant way to pass these (dcpl to create_dataset?) - # TBD: track times and creation order not yet supported - """ - dcpl = other.id.get_create_plist() - kwupdate.setdefault('track_times', dcpl.get_obj_track_times()) - kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0) - """ + + dcpl_json = other.id.dcpl_json + track_order = None + if "CreateOrder" in dcpl_json: + createOrder = dcpl_json["CreateOrder"] + if not createOrder or createOrder == "0": + track_order = False + else: + track_order = True + + kwupdate.setdefault('track_order', track_order) # Special case: the maxshape property always exists, but if we pass it # to create_dataset, the new dataset will automatically get chunked @@ -566,9 +574,10 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=None): else: raise IOError(f"Unexpected uuid: {uuid}") objdb = self.id.http_conn.getObjDb() - if objdb and uuid in objdb: + if objdb and uuid in objdb and False: # we should be able to construct an object from objdb json obj_json = objdb[uuid] + print('fetch from db') else: # will need to get JSON from server req = f"/{collection_type}/{uuid}" @@ -583,11 +592,11 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=None): elif collection_type == 'datatypes': tgt = Datatype(TypeID(self, obj_json)) elif collection_type == 'datasets': - # create a Table if the daset is one dimensional and compound + # create a Table if the dataset is one dimensional and compound shape_json = obj_json["shape"] dtype_json = obj_json["type"] if "dims" in shape_json and len(shape_json["dims"]) == 1 and dtype_json["class"] == 'H5T_COMPOUND': - tgt = Table(DatasetID(self, obj_json)) + tgt = Table(DatasetID(self, obj_json), track_order=track_order) else: tgt = Dataset(DatasetID(self, obj_json), track_order=track_order) else: @@ -700,6 +709,10 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Non Return HardLink, SoftLink and ExternalLink classes. Return "default" if nothing with that name exists. + "track_order" is (T/F): + List links and attributes by creation order if True, alphanumerically if False. + If None, the track_order used when creating the group will be used. + "limit" is an integer: If "name" is None, this will return the first "limit" links in the group. @@ -876,8 +889,8 @@ def __setitem__(self, name, obj): parent_uuid = link_json["id"] req = "/groups/" + parent_uuid params = {} - if self._track_order is not None: - params["CreateOrder"] = "1" if self._track_order else "0" + if self.track_order is not None: + params["CreateOrder"] = "1" if self.track_order else "0" group_json = self.GET(req, params=params) tgt = Group(GroupID(self, group_json)) tgt[basename] = obj @@ -977,8 +990,8 @@ def __len__(self): req = "/groups/" + self.id.uuid params = {} - if self._track_order is not None: - params["CreateOrder"] = "1" if self._track_order else "0" + if self.track_order is not None: + params["CreateOrder"] = "1" if self.track_order else "0" rsp_json = self.GET(req, params=params) return rsp_json['linkCount'] @@ -989,8 +1002,8 @@ def __iter__(self): if links is None: req = "/groups/" + self.id.uuid + "/links" params = {} - if self._track_order is not None: - params["CreateOrder"] = "1" if self._track_order else "0" + if self.track_order is not None: + params["CreateOrder"] = "1" if self.track_order else "0" rsp_json = self.GET(req, params=params) links = rsp_json['links'] @@ -1003,7 +1016,7 @@ def __iter__(self): for x in links: yield x['title'] else: - if self._track_order: + if self.track_order: links = sorted(links.items(), key=lambda x: x[1]['created']) else: links = sorted(links.items()) @@ -1217,8 +1230,8 @@ def visititems(self, func): # request from server req = "/groups/" + parent.id.uuid + "/links" params = {} - if self._track_order is not None: - params["CreateOrder"] = "1" if self._track_order else "0" + if self.track_order is not None: + params["CreateOrder"] = "1" if self.track_order else "0" rsp_json = self.GET(req, params=params) links = rsp_json['links'] for link in links: @@ -1270,7 +1283,7 @@ def __reversed__(self): if links is None: req = "/groups/" + self.id.uuid + "/links" - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + rsp_json = self.GET(req, params={"CreateOrder": "1" if self.track_order else "0"}) links = rsp_json['links'] # reset the link cache @@ -1282,7 +1295,7 @@ def __reversed__(self): for x in reversed(links): yield x['title'] else: - if self._track_order: + if self.track_order: links = sorted(links.items(), key=lambda x: x[1]['created']) else: links = sorted(links.items()) diff --git a/h5pyd/_hl/table.py b/h5pyd/_hl/table.py index 7750170..040c871 100644 --- a/h5pyd/_hl/table.py +++ b/h5pyd/_hl/table.py @@ -82,13 +82,13 @@ class Table(Dataset): """ Represents an HDF5 dataset """ - def __init__(self, bind): + def __init__(self, bind, track_order=None): """ Create a new Table object by binding to a low-level DatasetID. """ if not isinstance(bind, DatasetID): raise ValueError(f"{bind} is not a DatasetID") - Dataset.__init__(self, bind) + Dataset.__init__(self, bind, track_order=track_order) if len(self._dtype) < 1: raise ValueError("Table type must be compound") diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 408ed99..6bbe1db 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -306,13 +306,22 @@ def test_track_order(self): filename = self.getFileName("test_test_track_order_attribute") print(f"filename: {filename}") with h5py.File(filename, 'w') as f: - g1 = f.create_group('test', track_order=True) - self.fill_attrs(g1) - self.assertEqual(list(g1.attrs), list(self.titles)) + grp1 = f.create_group('grp1', track_order=True) + self.fill_attrs(grp1) + self.assertEqual(list(grp1.attrs), list(self.titles)) + dset1 = f.create_dataset('dset1', data=[42,], track_order=True) + self.fill_attrs(dset1) + dset2 = f.create_dataset_like('dset2', dset1) + self.fill_attrs(dset2) + self.assertEqual(list(dset1.attrs), list(self.titles)) + self.assertEqual(list(dset2.attrs), list(self.titles)) # group should return track order with h5py.File(filename) as f: - g1 = f['test'] - self.assertEqual(list(g1.attrs), list(self.titles)) + grp1 = f['grp1'] + self.assertEqual(list(grp1.attrs), list(self.titles)) + dset1 = f['dset1'] + self.assertEqual(list(dset1.attrs), list(self.titles)) + self.assertEqual(list(dset2.attrs), list(self.titles)) def test_track_order_cfg(self): filename = self.getFileName("test_test_track_order_attribute") @@ -320,15 +329,19 @@ def test_track_order_cfg(self): cfg = h5py.get_config() with h5py.File(filename, 'w') as f: cfg.track_order = True - g1 = f.create_group('test') + grp1 = f.create_group('grp1') + dset1 = f.create_dataset('dset1', data=[42,]) cfg.track_order = False # reset - - self.fill_attrs(g1) - self.assertEqual(list(g1.attrs), list(self.titles)) + self.fill_attrs(grp1) + self.fill_attrs(dset1) + self.assertEqual(list(grp1.attrs), list(self.titles)) + self.assertEqual(list(dset1.attrs), list(self.titles)) with h5py.File(filename) as f: - g1 = f['test'] - self.assertEqual(list(g1.attrs), list(self.titles)) + grp1 = f['grp1'] + self.assertEqual(list(grp1.attrs), list(self.titles)) + dset1 = f['dset1'] + self.assertEqual(list(dset1.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_test_no_track_order_attribute") diff --git a/test/hl/test_group.py b/test/hl/test_group.py index 89d391d..40b4fcc 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -667,6 +667,7 @@ def test_get_group_track_order(self): print(f"filename: {filename}") with h5py.File(filename, 'w') as f: g = f.create_group('order') + g._track_order = True # create subgroup and populate it with links g.create_group('subgroup') self.populate(g['subgroup'])