Skip to content

Commit

Permalink
Fix issue when cannot write GRIB based array field to disk (#555)
Browse files Browse the repository at this point in the history
* Fix issue when cannot write GRIB based array field to disk
  • Loading branch information
sandorkertesz authored Dec 9, 2024
1 parent 5946e2b commit e03e1d1
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 131 deletions.
148 changes: 19 additions & 129 deletions docs/examples/grib_metadata_object.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.GribFieldMetadata at 0x290e35390>"
"<earthkit.data.readers.grib.metadata.GribFieldMetadata at 0x294e37ee0>"
]
},
"execution_count": 3,
Expand Down Expand Up @@ -250,7 +250,7 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.StandAloneGribMetadata at 0x290e36080>"
"<earthkit.data.readers.grib.metadata.StandAloneGribMetadata at 0x294e7cee0>"
]
},
"execution_count": 5,
Expand All @@ -263,108 +263,6 @@
"md_copy"
]
},
{
"cell_type": "raw",
"id": "388d83a5-8165-4b3d-a121-2c7f2cb578e1",
"metadata": {
"editable": true,
"raw_mimetype": "text/restructuredtext",
"slideshow": {
"slide_type": ""
},
"tags": [],
"vscode": {
"languageId": "raw"
}
},
"source": [
"By default :py:meth:`~data.readers.grib.metadata.GribMetadata.override` is called with ``headers_only_clone=True`` generating the new handle with all the data values (and some related information) removed. With this the resulting object can be significantly smaller, especially if the data section is large. The downside is that now the value related keys either cannot be accessed or give back wrong values. E.g when using the \"average\" key we get:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "568fe5c9-df84-4d49-aee1-ada0e6a15c28",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(279.70703560965404, 47485.4296875)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"md[\"average\"], md_copy[\"average\"]"
]
},
{
"cell_type": "markdown",
"id": "8796976f-a02d-4101-8cff-c14a7730d82c",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"To get a copy without shrinking the GRIB handle use ``headers_only_clone=False``."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ba989ad8-e034-4168-bc29-1d73877edad2",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"md_copy_full = md.override(headers_only_clone=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "15d723c7-2f73-4c14-ab87-2a94de2379f8",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"279.70703560965404"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"md_copy_full[\"average\"]"
]
},
{
"cell_type": "markdown",
"id": "3240cef2-baa9-4a87-a83a-dafa97b78e43",
Expand Down Expand Up @@ -396,7 +294,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"id": "40c6d232-03de-402b-82bf-8647e8a7bece",
"metadata": {
"editable": true,
Expand All @@ -412,7 +310,7 @@
"('z', 850)"
]
},
"execution_count": 9,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -438,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"id": "ef78a3ec-4ea2-4ff5-8c90-e60b5e07e77f",
"metadata": {
"editable": true,
Expand All @@ -454,7 +352,7 @@
"('t', 1000)"
]
},
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -507,7 +405,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 8,
"id": "cb59ad5f-c48b-4943-984d-3abdf48fda8d",
"metadata": {
"editable": true,
Expand All @@ -516,15 +414,7 @@
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array.shape[0]= 84 len(metadata)= 1\n"
]
}
],
"outputs": [],
"source": [
"from earthkit.data import FieldList\n",
"import numpy as np\n",
Expand All @@ -537,7 +427,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 9,
"id": "85c32bfb-c929-404f-add9-9adae40418d2",
"metadata": {
"editable": true,
Expand Down Expand Up @@ -606,7 +496,7 @@
"0 an 0 regular_ll "
]
},
"execution_count": 12,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -630,12 +520,12 @@
}
},
"source": [
"Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override(headers_only_clone=True)`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. "
"Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override()`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 10,
"id": "c6fe87ed-ee88-4f4d-a2b6-9401b364e2df",
"metadata": {
"editable": true,
Expand All @@ -648,10 +538,10 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.RestrictedGribMetadata at 0x290e7d990>"
"<earthkit.data.readers.grib.metadata.RestrictedGribMetadata at 0x294e7dc60>"
]
},
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -662,7 +552,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 11,
"id": "27686ac4-9382-4916-ad0e-be96a649d034",
"metadata": {
"editable": true,
Expand All @@ -678,7 +568,7 @@
"'Wind speed'"
]
},
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -689,7 +579,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 12,
"id": "dc28fa77-4020-431f-ad37-e480a69f9d7f",
"metadata": {
"editable": true,
Expand Down Expand Up @@ -730,7 +620,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 13,
"id": "8eab3462-3661-4fc1-9d23-8be05dc99cd8",
"metadata": {
"editable": true,
Expand All @@ -746,7 +636,7 @@
"7.450183054360252"
]
},
"execution_count": 16,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
11 changes: 9 additions & 2 deletions docs/release_notes/version_0.11_updates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ Version 0.11 Updates
/////////////////////////


Version 0.11.4
===============

Fixes
++++++

- Enforced the use of ``headers_only_clone=False`` when calling :meth:`GribMetadata.override() <data.readers.grib.metadata.GribMetadata.override>`. It was a necessary step to fix issues when writing :py:class:`ArrayField`\ containing :class:`~data.readers.grib.metadata.grib.GribMetadata` to disk. This is considered a temporary change until the issues with ``headers_only_clone`` are sorted out (:pr:`555`).


Version 0.11.3
===============

Expand All @@ -13,8 +22,6 @@ Fixes
- Increased the minimum version of ``multiurl`` to 0.3.3




Version 0.11.2
===============

Expand Down
4 changes: 4 additions & 0 deletions src/earthkit/data/readers/grib/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ def _copy_key(self, target_handle, key):
def override(self, *args, headers_only_clone=True, **kwargs):
d = dict(*args, **kwargs)

# using headers_only_clone=True can cause problems when we want to write GRIB
# to disk or modify the generated handle. Until it is fixed, we use headers_only_clone=False.
headers_only_clone = False

new_value_size = None
# extra = None
gridspec = d.pop("gridspec", None)
Expand Down
58 changes: 58 additions & 0 deletions tests/array_fieldlist/test_numpy_fl_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from earthkit.data.testing import ARRAY_BACKENDS
from earthkit.data.testing import check_array_type
from earthkit.data.testing import earthkit_examples_file
from earthkit.data.testing import earthkit_test_data_file
from earthkit.data.testing import get_array_namespace

here = os.path.dirname(__file__)
Expand Down Expand Up @@ -225,6 +226,63 @@ def test_array_fl_grib_write_bits_per_value(array_backend, _kwargs, expected_val
assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds)


@pytest.mark.parametrize(
"filename,shape",
[
(earthkit_examples_file("test.grib"), (11, 19)),
(earthkit_test_data_file("O32_global.grib1"), (5248,)),
(earthkit_test_data_file("O32_global.grib2"), (5248,)),
],
)
def test_array_fl_grib_single_write_to_path(filename, shape):
ds = from_source("file", filename)

assert len(ds) >= 1
v1 = ds[0].values + 1

md = ds[0].metadata()
md1 = md.override(shortName="msl")
r = FieldList.from_array(v1, md1)
assert r[0].shape == shape

with temp_file() as tmp:
r.save(tmp)
assert os.path.exists(tmp)
r_tmp = from_source("file", tmp)
# r_tmp = r_tmp.to_fieldlist(array_backend=array_backend)
assert r_tmp[0].shape == shape
assert r_tmp[0].metadata("shortName") == "msl"
v_tmp = r_tmp[0].values
assert np.allclose(v1, v_tmp)


@pytest.mark.parametrize(
"filename,shape",
[
(earthkit_examples_file("test.grib"), (11, 19)),
(earthkit_test_data_file("O32_global.grib1"), (5248,)),
(earthkit_test_data_file("O32_global.grib2"), (5248,)),
],
)
@pytest.mark.parametrize(
"_kwargs,expected_value",
[({}, None), ({"bits_per_value": 8}, 8), ({"bits_per_value": None}, None)],
)
def test_array_fl_grib_single_write_bits_per_value(filename, shape, _kwargs, expected_value):
ds0 = from_source("file", filename)

ds = ds0.from_fields([ds0[0].copy()])
assert ds[0].shape == shape

if expected_value is None:
expected_value = ds[0].metadata("bitsPerValue")

with temp_file() as tmp:
ds.save(tmp, **_kwargs)
ds1 = from_source("file", tmp)
assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds)


if __name__ == "__main__":
from earthkit.data.testing import main

Expand Down
1 change: 1 addition & 0 deletions tests/core/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def test_grib_metadata_override_invalid():
assert "EncodingError" in e.typename


@pytest.mark.skipif(True, reason="headers_only_clone has to be fixed")
def test_grib_metadata_override_headers_only_true():
ds = from_source("file", earthkit_examples_file("test.grib"))
ref_size = ds[0].metadata("totalLength")
Expand Down
Binary file added tests/data/O32_global.grib1
Binary file not shown.
Binary file added tests/data/O32_global.grib2
Binary file not shown.

0 comments on commit e03e1d1

Please sign in to comment.