Add Cook23 to the shop (#1724)

* Add Cook23 to the shop * Add whats new
OGGM · Aug 16, 2024 · 9f6b028 · 9f6b028
1 parent fb3e42b
commit 9f6b028
Show file tree

Hide file tree

Showing 3 changed files with 221 additions and 4 deletions.
diff --git a/docs/whats-new.rst b/docs/whats-new.rst
@@ -33,11 +33,14 @@ Enhancements
   Based on :pull:`1403` and :pull:`1532` (unmerged).
   By `Jan Malles <https://github.com/jmalles>`_ and
   `Fabien Maussion <https://github.com/fmaussion>`_
-- Added the option to use the 2D mask of a glacier for the calibration 
-  of the mass balance. One can now calibrate the mass balance from 2D data, which 
-  adds consistency when also the run itself is fully distributed. Changes are 
+- Added the option to use the 2D mask of a glacier for the calibration
+  of the mass balance. One can now calibrate the mass balance from 2D data, which
+  adds consistency when also the run itself is fully distributed. Changes are
   found in :pull:`1709`.
   By `Alex Fischer <https://github.com/afisc>`_
+- Added Cook et al 2023 Alps thickness data to the shop (:pull:`1724`).
+  By `Fabien Maussion <https://github.com/fmaussion>`_
+
 
 Bug fixes
 ~~~~~~~~~

diff --git a/oggm/shop/cook23.py b/oggm/shop/cook23.py
@@ -0,0 +1,176 @@
+import logging
+import os
+import warnings
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+import shapely.geometry as shpg
+
+try:
+    import salem
+except ImportError:
+    pass
+
+try:
+    import geopandas as gpd
+except ImportError:
+    pass
+
+from oggm import utils, cfg
+from oggm.exceptions import InvalidWorkflowError
+
+# Module logger
+log = logging.getLogger(__name__)
+
+default_base_url = 'https://cluster.klima.uni-bremen.de/~oggm/ice_thickness/cook23/'
+
+_lookup_thickness = None
+
+
+def _get_lookup_thickness():
+    global _lookup_thickness
+    if _lookup_thickness is None:
+        fname = default_base_url + 'cook23_thickness_lookup_shp_20240815.zip'
+        _lookup_thickness = gpd.read_file('zip://' + utils.file_downloader(fname))
+    return _lookup_thickness
+
+
+@utils.entity_task(log, writes=['gridded_data'])
+def cook23_to_gdir(gdir, vars=['thk', 'divflux']):
+    """Add the Cook 23 thickness data (and others if wanted)
+    to this glacier directory.
+
+    Parameters
+    ----------
+    gdir : :py:class:`oggm.GlacierDirectory`
+        the glacier directory to process
+    vars : list of str
+        the list of variables to add
+        to gdir. Must be available in the netcdf files.
+
+    """
+
+    # Find out which file(s) we need
+    gdf = _get_lookup_thickness()
+    cp = shpg.Point(gdir.cenlon, gdir.cenlat)
+    sel = gdf.loc[gdf.contains(cp)]
+    if len(sel) == 0:
+        raise InvalidWorkflowError(f'There seems to be no Cook file for this '
+                                   f'glacier: {gdir.rgi_id}')
+
+    if len(sel) > 1:
+        # We have multiple files for this glacier
+        other = gdir.read_shapefile('outlines').to_crs(gdf.crs)
+        sel = gdf.loc[gdf.contains_properly(other)]
+        if len(sel) != 1:
+            raise InvalidWorkflowError(f'Weird double file situation for '
+                                       f'glacier: {gdir.rgi_id}')
+
+    sel = sel.iloc[0]
+    url = default_base_url + sel['thickness']
+    input_file = utils.file_downloader(url)
+
+    # Read and reproject
+    with xr.open_dataset(input_file) as ds:
+        for var in vars:
+            if var not in ds:
+                raise InvalidWorkflowError(f'Variable {var} not found in the '
+                                           f'Cook file for glacier {gdir.rgi_id}')
+            ds[var].attrs['pyproj_srs'] = 'EPSG:32632'
+        ds.attrs['pyproj_srs'] = 'EPSG:32632'
+        ds = ds[vars].load()
+
+    # Reproject and write
+    with utils.ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc:
+
+        for var in vars:
+
+            # Reproject
+            data = gdir.grid.map_gridded_data(ds[var].data,
+                                              ds.salem.grid,
+                                              interp='linear')
+
+            # Write
+            vn = 'cook23_' + var
+            if vn in nc.variables:
+                v = nc.variables[vn]
+            else:
+                v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True)
+
+            if var == 'thk':
+                v.units = 'm'
+                v.long_name = 'Ice thickness from Cook et al. 2023'
+            elif var == 'divflux':
+                v.units = 'm yr-1'
+                v.long_name = 'Divergence of ice flux from Cook et al. 2023'
+
+            v.data_source = url
+            v[:] = data
+
+
+@utils.entity_task(log)
+def cook23_statistics(gdir):
+    """Gather statistics about the Cook23 data interpolated to this glacier.
+    """
+
+    d = dict()
+
+    # Easy stats - this should always be possible
+    d['rgi_id'] = gdir.rgi_id
+    d['rgi_region'] = gdir.rgi_region
+    d['rgi_subregion'] = gdir.rgi_subregion
+    d['rgi_area_km2'] = gdir.rgi_area_km2
+    d['cook23_vol_km3'] = 0
+    d['cook23_area_km2'] = 0
+    d['cook23_perc_cov'] = 0
+
+    try:
+        with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
+            thick = ds['cook23_thk'].where(ds['glacier_mask'], np.nan).load()
+            with warnings.catch_warnings():
+                # For operational runs we ignore the warnings
+                warnings.filterwarnings('ignore', category=RuntimeWarning)
+                d['cook23_vol_km3'] = float(thick.sum() * gdir.grid.dx ** 2 * 1e-9)
+                d['cook23_area_km2'] = float((~thick.isnull()).sum() * gdir.grid.dx ** 2 * 1e-6)
+                d['cook23_perc_cov'] = float(d['cook23_area_km2'] / gdir.rgi_area_km2)
+
+    except (FileNotFoundError, AttributeError, KeyError):
+        pass
+
+    return d
+
+
+@utils.global_task(log)
+def compile_cook23_statistics(gdirs, filesuffix='', path=True):
+    """Gather as much statistics as possible about a list of glaciers.
+
+    It can be used to do result diagnostics and other stuffs. If the data
+    necessary for a statistic is not available (e.g.: flowlines length) it
+    will simply be ignored.
+
+    Parameters
+    ----------
+    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
+        the glacier directories to process
+    filesuffix : str
+        add suffix to output file
+    path : str, bool
+        Set to "True" in order  to store the info in the working directory
+        Set to a path to store the file to your chosen location
+    """
+    from oggm.workflow import execute_entity_task
+
+    out_df = execute_entity_task(cook23_statistics, gdirs)
+
+    out = pd.DataFrame(out_df).set_index('rgi_id')
+
+    if path:
+        if path is True:
+            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
+                                    ('cook23_statistics' +
+                                     filesuffix + '.csv')))
+        else:
+            out.to_csv(path)
+
+    return out
diff --git a/oggm/tests/test_shop.py b/oggm/tests/test_shop.py
@@ -13,7 +13,7 @@
 import pandas as pd
 from oggm import utils
 from oggm.utils import get_demo_file
-from oggm.shop import its_live, rgitopo, bedtopo, millan22, hugonnet_maps, glathida
+from oggm.shop import its_live, rgitopo, bedtopo, millan22, cook23, hugonnet_maps, glathida
 from oggm.core import gis, centerlines, massbalance
 from oggm import cfg, tasks, workflow
 
@@ -166,6 +166,44 @@ def test_thickvel_to_glacier(self, class_case_dir, monkeypatch):
         assert df['millan_vol_km3'] > 174
 
 
+class Test_cook23:
+
+    # @pytest.mark.slow
+    def test_cook23_to_glacier(self, class_case_dir, monkeypatch):
+
+        # Init
+        cfg.initialize()
+        cfg.PATHS['working_dir'] = class_case_dir
+        cfg.PARAMS['use_intersects'] = False
+        cfg.PATHS['dem_file'] = get_demo_file('hef_srtm.tif')
+        cfg.PARAMS['border'] = 10
+
+        entity = gpd.read_file(get_demo_file('Hintereisferner_RGI5.shp')).iloc[0]
+        gdir = oggm.GlacierDirectory(entity)
+        tasks.define_glacier_region(gdir)
+        tasks.glacier_masks(gdir)
+
+        # use our files
+        base_url = 'https://cluster.klima.uni-bremen.de/~oggm/test_files/cook23/'
+        monkeypatch.setattr(cook23, 'default_base_url', base_url)
+
+        cook23.cook23_to_gdir(gdir)
+
+        with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
+            mask = ds.glacier_mask.data.astype(bool)
+            thick = ds.cook23_thk.where(mask).data
+
+        # Simply some coverage and sanity checks
+        assert np.isfinite(thick).sum() / mask.sum() > 0.98
+        assert np.nanmax(thick) > 120
+        assert np.nansum(thick) * gdir.grid.dx**2 * 1e-9 > 0.4
+
+        # Stats
+        df = cook23.compile_cook23_statistics([gdir]).iloc[0]
+        assert df['cook23_perc_cov'] > 0.98
+        assert df['cook23_vol_km3'] > 0.4
+
+
 class Test_HugonnetMaps:
 
     @pytest.mark.slow