Skip to content

Commit

Permalink
No units attribute for single variable in xarray engine (#592)
Browse files Browse the repository at this point in the history
  • Loading branch information
sandorkertesz authored Jan 22, 2025
1 parent ef3b9c4 commit 1fdabaa
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 12 deletions.
8 changes: 5 additions & 3 deletions src/earthkit/data/readers/grib/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,12 @@ def to_xarray(self, engine=None, xarray_open_dataset_kwargs=None, **kwargs):
- "fixed": Use the attributes defined in ``variable_attrs`` as variables
attributes and ``global_attrs`` as global attributes.
- "unique": Use all the attributes defined in ``attrs``, ``variable_attrs``
and ``global_attrs``. When an attribute has unique a value for a dataset
and ``global_attrs``. When an attribute has unique value for a dataset
it will be a global attribute, otherwise it will be a variable attribute.
However keys in ``variable_attrs`` are always used as variable attributes,
while keys in ``global_attrs`` are always used as global attributes.
However, this logic is only applied if a unique variable attribute can be
a global attribute according to the CF conventions Appendix A. (e.g. "units" cannot
be a global attribute). Additionally, keys in ``variable_attrs`` are always used as
variable attributes, while keys in ``global_attrs`` are always used as global attributes.
* attrs: str, number, callable, dict or list of these, None
Attribute or list of attributes. Only used when ``attrs_mode`` is ``unique``.
Its default value (None) expands to [] unless the ``profile`` overwrites it.
Expand Down
49 changes: 46 additions & 3 deletions src/earthkit/data/utils/xarray/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,48 @@
#

import logging
import os
from abc import ABCMeta
from abc import abstractmethod
from collections import defaultdict
from functools import cached_property

from earthkit.data.utils import ensure_dict
from earthkit.data.utils import ensure_iterable

LOG = logging.getLogger(__name__)


class CFAttrs:
def _load(self):
here = os.path.dirname(__file__)
path = os.path.join(here, "cf_attrs.yaml")
if os.path.exists(path):
import yaml

try:
with open(path, "r") as f:
return yaml.safe_load(f)
except Exception as e:
LOG.exception(f"Failed read CF attributes file {path}. {e}")
raise
else:
raise ValueError(f"CF attributes file not found! path={path}")

@cached_property
def attrs(self):
return self._load()

def can_be_global(self, name):
item = self.attrs.get(name, None)
if item:
return "G" in item["use"]
return True


CF_ATTRS = CFAttrs()


class Attr:
"""Generic attribute class.
Expand Down Expand Up @@ -238,7 +270,7 @@ def _id(x):
global_attrs[item.name] = item.value()

# TODO: make it optional
global_attrs.pop("units", None)
# global_attrs.pop("units", None)

return global_attrs

Expand All @@ -257,13 +289,24 @@ def _build(self, ds, t_vars, rename=None):
if len(v) == 1 and k not in self.attrs.variable_attrs:
global_attrs[k] = list(v)[0]

for var_obj in t_vars.values():
var_obj.adjust_attrs(drop_keys=global_attrs.keys(), rename=rename)
# Some attrs cannot be global according to the CF convention.
# These are removed from global attrs and kept as variable attrs.
global_attrs_keys = list(global_attrs.keys())
global_attrs_renamed_keys = global_attrs_keys
if rename:
global_attrs_renamed_keys = list(rename(global_attrs).keys())

for k1, k2 in zip(global_attrs_keys, global_attrs_renamed_keys):
if not CF_ATTRS.can_be_global(k1) or not CF_ATTRS.can_be_global(k2):
global_attrs.pop(k1)

for k in self.attrs.variable_attrs:
if k in global_attrs:
global_attrs.pop(k)

for var_obj in t_vars.values():
var_obj.adjust_attrs(drop_keys=global_attrs.keys(), rename=rename)

global_attrs = {k: v for k, v in global_attrs.items() if v is not None}

return global_attrs
Expand Down
195 changes: 195 additions & 0 deletions src/earthkit/data/utils/xarray/cf_attrs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Based on CF Conventions Appendix A
# All CF attributes are listed here except for those that are used to describe grid mappings. See Appendix F for the grid mapping attributes.
# The 'Type' values are S for string, N for numeric, and D for the type of the data variable.
# The 'Use' values are G for global, C for variables containing coordinate data, and D for variables containing non-coordinate data
Conventions:
type: S
use: G
_FillValue:
type: D
use:
- C
- D
actual_range:
type: N
use:
- C
- D
add_offset:
type: N
use:
- C
- D
ancillary_variables:
type: S
use: D
axis:
type: S
use: C
bounds:
type: S
use: C
calendar:
type: S
use: C
cell_measures:
type: S
use: D
cell_methods:
type: S
use: D
cf_role:
type: S
use: C
climatology:
type: S
use: C
comment:
type: S
use:
- G
- C
- D
compress:
type: S
use: C
computed_standard_name:
type: S
use: C
coordinates:
type: S
use:
- D
- M
external_variables:
type: S
use: G
featureType:
type: S
use: G
flag_masks:
type: D
use: D
flag_meanings:
type: S
use: D
flag_values:
type: D
use: D
formula_terms:
type: S
use: C
geometry:
type: S
use:
- C
- D
geometry_type:
type: S
use: M
grid_mapping:
type: S
use:
- D
- M
history:
type: S
use:
- G
- Gr
instance_dimension:
type: S
use: "-"
institution:
type: S
use:
- G
- D
interior_ring:
type: S
use: M
leap_month:
type: N
use: C
leap_year:
type: N
use: C
long_name:
type: S
use:
- C
- D
missing_value:
type: D
use:
- C
- D
month_lengths:
type: N
use: C
node_coordinates:
type: S
use: M
node_count:
type: S
use: M
nodes:
type: S
use: C
part_node_count:
type: S
use: M
positive:
type: S
use: C
references:
type: S
use:
- G
- D
sample_dimension:
type: S
use: "-"
scale_factor:
type: N
use:
- C
- D
source:
type: S
use:
- G
- D
standard_error_multiplier:
type: N
use: D
standard_name:
type: S
use:
- C
- D
title:
type: S
use:
- G
- Gr
units:
type: S
use:
- C
- D
valid_max:
type: N
use:
- C
- D
valid_min:
type: N
use:
- C
- D
valid_range:
type: N
use:
- C
- D
8 changes: 5 additions & 3 deletions src/earthkit/data/utils/xarray/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,12 @@ def open_dataset(
- "fixed": Use the attributes defined in ``variable_attrs`` as variables
attributes and ``global_attrs`` as global attributes.
- "unique": Use all the attributes defined in ``attrs``, ``variable_attrs``
and ``global_attrs``. When an attribute has unique a value for a dataset
and ``global_attrs``. When an attribute has unique value for a dataset
it will be a global attribute, otherwise it will be a variable attribute.
However keys in ``variable_attrs`` are always used as variable attributes,
while keys in ``global_attrs`` are always used as global attributes.
However, this logic is only applied if a unique variable attribute can be
a global attribute according to the CF conventions Appendix A. (e.g. "units" cannot
be a global attribute). Additionally, keys in ``variable_attrs`` are always used as
variable attributes, while keys in ``global_attrs`` are always used as global attributes.
attrs: str, number, callable, dict or list of these, None
Attribute or list of attributes. Only used when ``attrs_mode`` is ``unique``.
Its default value (None) expands to [] unless the ``profile`` overwrites it.
Expand Down
13 changes: 10 additions & 3 deletions tests/xr_engine/test_xr_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,14 @@ def test_xr_engine_single_field():
lats = np.linspace(90, -90, 19)
lons = np.linspace(0, 350, 36)

attrs_ref = {
"param": "t",
var_attrs_ref = {
"standard_name": "air_temperature",
"long_name": "Temperature",
"units": "K",
}

global_attrs_ref = {
"param": "t",
"paramId": 130,
"class": "od",
"stream": "oper",
Expand All @@ -488,7 +492,7 @@ def test_xr_engine_single_field():
"institution": "ECMWF",
}

assert ds.attrs == attrs_ref
assert ds.attrs == global_attrs_ref

data_vars = ["t"]

Expand All @@ -510,6 +514,9 @@ def test_xr_engine_single_field():

da = ds["t"]

for k, v in var_attrs_ref.items():
assert da.attrs[k] == v

r = da[:, :]
r.shape == (19, 36)
assert np.allclose(r.values, vals_ref)
Expand Down

0 comments on commit 1fdabaa

Please sign in to comment.