-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix HDFVirtualBackend handling of non coordinate dimension HDF datasets. #410
Changes from all commits
7647a40
1e916b5
fceb871
5d12003
ce118fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -371,6 +371,10 @@ def _virtual_vars_from_hdf( | |||||
group_name = "/" | ||||||
|
||||||
variables = {} | ||||||
non_coordinate_dimesion_vars = HDFVirtualBackend._find_non_coord_dimension_vars( | ||||||
group=g | ||||||
) | ||||||
drop_variables = list(set(drop_variables + non_coordinate_dimesion_vars)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick but I think this is more legible and faster:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Speed of this is not important, and I think the first one makes intent clearer (if the intent is to de-duplicate using set?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! I was getting too nitpicky. Just the typo then :) |
||||||
for key in g.keys(): | ||||||
if key not in drop_variables: | ||||||
if isinstance(g[key], h5py.Dataset): | ||||||
|
@@ -403,3 +407,17 @@ def _get_group_attrs( | |||||
g = f | ||||||
attrs = HDFVirtualBackend._extract_attrs(g) | ||||||
return attrs | ||||||
|
||||||
@staticmethod | ||||||
def _find_non_coord_dimension_vars(group: H5Group) -> List[str]: | ||||||
dimension_names = [] | ||||||
non_coordinate_dimension_variables = [] | ||||||
for name, obj in group.items(): | ||||||
if "_Netcdf4Dimid" in obj.attrs: | ||||||
dimension_names.append(name) | ||||||
for name, obj in group.items(): | ||||||
if type(obj) is h5py.Dataset: | ||||||
if obj.id.get_storage_size() == 0 and name in dimension_names: | ||||||
non_coordinate_dimension_variables.append(name) | ||||||
|
||||||
return non_coordinate_dimension_variables |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,11 +322,11 @@ def test_mixture_of_manifestarrays_and_numpy_arrays( | |
@requires_imagecodecs | ||
def test_nbytes(simple_netcdf4): | ||
vds = open_virtual_dataset(simple_netcdf4) | ||
assert vds.virtualize.nbytes == 88 | ||
assert vds.nbytes == 104 | ||
assert vds.virtualize.nbytes == 32 | ||
assert vds.nbytes == 48 | ||
|
||
vds = open_virtual_dataset(simple_netcdf4, loadable_variables=["foo"]) | ||
assert vds.virtualize.nbytes == 104 | ||
assert vds.virtualize.nbytes == 48 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👏🏻 |
||
|
||
ds = open_dataset(simple_netcdf4) | ||
assert ds.virtualize.nbytes == ds.nbytes |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: typo (
dimesion
->dimension
)