diff --git a/metanorm/normalizers/geospaas/downscaled_ecmwf.py b/metanorm/normalizers/geospaas/downscaled_ecmwf.py new file mode 100644 index 0000000..4865e20 --- /dev/null +++ b/metanorm/normalizers/geospaas/downscaled_ecmwf.py @@ -0,0 +1,64 @@ +"""Normalizer for the downscaled ECMWF seasonal forecasts""" + +import re +from datetime import datetime, timezone + +import dateutil.parser +from dateutil.relativedelta import relativedelta + +import metanorm.utils as utils + +from .base import GeoSPaaSMetadataNormalizer +from ...errors import MetadataNormalizationError + + +class DownscaledECMWFMetadataNormalizer(GeoSPaaSMetadataNormalizer): + """Generate the properties of a GeoSPaaS Dataset from a downscaled + ECMWF seasonal forecast netcdf file + """ + + def check(self, raw_metadata): + """Check that the dataset's id matches the pattern""" + try: + entry_id = self.get_entry_id(raw_metadata) + except MetadataNormalizationError: + return False + return bool(entry_id) # return True if the entry_id is not empty + + def get_entry_title(self, raw_metadata): + return 'Downscaled ECMWF seasonal forecast' + + @utils.raises((AttributeError, KeyError)) + def get_entry_id(self, raw_metadata): + return re.match( + r'^.*[/\\](Seasonal_[a-zA-Z]{3}[0-9]{2}_[a-zA-Z]+_n[0-9]+).nc$', + raw_metadata['url'] + ).group(1) + + def get_summary(self, raw_metadata): + """Get the dataset's summary if it is available in the + metadata, otherwise use a default + """ + return "Downscaled version of ECMWF's seasonal forecasts" + + @utils.raises((KeyError, dateutil.parser.ParserError)) + def get_time_coverage_start(self, raw_metadata): + creation_date = dateutil.parser.parse(raw_metadata['date']) + return datetime(creation_date.year, creation_date.month, 1, + tzinfo=creation_date.tzinfo or timezone.utc) + + @utils.raises((KeyError, dateutil.parser.ParserError)) + def get_time_coverage_end(self, raw_metadata): + return self.get_time_coverage_start(raw_metadata) + relativedelta(months=6) + + def get_platform(self, raw_metadata): + return utils.get_gcmd_platform('OPERATIONAL MODELS') + + def get_instrument(self, raw_metadata): + return utils.get_gcmd_instrument('Computer') + + def get_location_geometry(self, raw_metadata): + return '' + + def get_provider(self, raw_metadata): + return utils.get_gcmd_provider(['NERSC']) diff --git a/tests/normalizers/test_downscaled_ecmwf.py b/tests/normalizers/test_downscaled_ecmwf.py new file mode 100644 index 0000000..2b60a4a --- /dev/null +++ b/tests/normalizers/test_downscaled_ecmwf.py @@ -0,0 +1,112 @@ +"""Tests for the nextsim normalizer""" + +import unittest +import unittest.mock as mock +from datetime import datetime, timezone + +import metanorm.normalizers as normalizers +from metanorm.errors import MetadataNormalizationError + + +class DownscaledECMWFMetadataNormalizerTests(unittest.TestCase): + """Tests for DownscaledECMWFMetadataNormalizer""" + + def setUp(self): + self.normalizer = normalizers.DownscaledECMWFMetadataNormalizer() + + def test_check(self): + """Test the checking condition""" + self.assertTrue(self.normalizer.check({'url': '/foo/bar/Seasonal_Nov23_SAT_n15.nc'})) + self.assertTrue(self.normalizer.check({'url': '/foo/bar/Seasonal_Nov23_SDA_n15.nc'})) + + self.assertFalse(self.normalizer.check({})) + self.assertFalse(self.normalizer.check({'url': ''})) + self.assertFalse(self.normalizer.check({'url': '/foo/bar/baz.nc'})) + + def test_get_entry_title(self): + """Test getting the title""" + self.assertEqual(self.normalizer.get_entry_title({}), 'Downscaled ECMWF seasonal forecast') + + def test_get_entry_id(self): + """Test getting the ID""" + self.assertEqual( + self.normalizer.get_entry_id({ + 'url': '/foo/bar/Seasonal_Nov23_SDA_n15.nc' + }), + 'Seasonal_Nov23_SDA_n15') + self.assertEqual( + self.normalizer.get_entry_id({ + 'url': '/foo/bar/Seasonal_Nov23_SAT_n15.nc' + }), + 'Seasonal_Nov23_SAT_n15') + + def test_entry_id_error(self): + """A MetadataNormalizationError should be raised if the url + attribute is missing or the ID is not found + """ + with self.assertRaises(MetadataNormalizationError): + self.normalizer.get_entry_id({}) + with self.assertRaises(MetadataNormalizationError): + self.normalizer.get_entry_id({'url': 'foo'}) + + def test_summary(self): + """Test getting the summary""" + self.assertEqual( + self.normalizer.get_summary({}), + "Downscaled version of ECMWF's seasonal forecasts") + + def test_get_time_coverage_start(self): + """Test getting the start of the time coverage""" + self.assertEqual( + self.normalizer.get_time_coverage_start({'date': '2023-11-14 13:21:08'}), + datetime(year=2023, month=11, day=1, tzinfo=timezone.utc)) + + def test_missing_time_coverage_start(self): + """A MetadataNormalizationError must be raised when the + time_coverage_start raw attribute is missing + """ + with self.assertRaises(MetadataNormalizationError): + self.normalizer.get_time_coverage_start({}) + + def test_get_time_coverage_end(self): + """Test getting the end of the time coverage""" + self.assertEqual( + self.normalizer.get_time_coverage_end({'date': '2023-11-14 13:21:08'}), + datetime(year=2024, month=5, day=1, tzinfo=timezone.utc)) + + def test_missing_time_coverage_end(self): + """A MetadataNormalizationError must be raised when the + time_coverage_end raw attribute is missing + """ + with self.assertRaises(MetadataNormalizationError): + self.normalizer.get_time_coverage_end({}) + + def test_gcmd_platform(self): + """Test getting the platform""" + with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method: + self.assertEqual( + self.normalizer.get_platform({}), + mock_get_gcmd_method.return_value) + mock_get_gcmd_method.assert_called_with('OPERATIONAL MODELS') + + def test_gcmd_instrument(self): + """Test getting the instrument""" + with mock.patch('metanorm.utils.get_gcmd_instrument') as mock_get_gcmd_method: + self.assertEqual( + self.normalizer.get_instrument({}), + mock_get_gcmd_method.return_value) + mock_get_gcmd_method.assert_called_with('Computer') + + def test_gcmd_provider(self): + """Test getting the provider""" + with mock.patch('metanorm.utils.get_gcmd_provider') as mock_get_gcmd_method: + self.assertEqual( + self.normalizer.get_provider({}), + mock_get_gcmd_method.return_value) + mock_get_gcmd_method.assert_called_with(['NERSC']) + + def test_get_location_geometry(self): + """get_location_geometry() should return the location + of the dataset + """ + self.assertEqual(self.normalizer.get_location_geometry({}), '')