Skip to content

Commit

Permalink
Merge pull request #40 from ESGF/fix-synda-status-waiting
Browse files Browse the repository at this point in the history
fix(import_synda): map synda status names to esgpull status names, waiting->queued, running->started
  • Loading branch information
svenrdz authored May 14, 2024
2 parents e54c4cb + 46f3348 commit 648f411
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 2 deletions.
4 changes: 4 additions & 0 deletions esgpull/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class FileStatus(Enum):
def retryable(cls) -> list[FileStatus]:
return [cls.Error, cls.Cancelled]

@classmethod
def contains(cls, s: str) -> bool:
return s in [v.value for v in cls]


class FileDict(TypedDict):
file_id: str
Expand Down
20 changes: 18 additions & 2 deletions esgpull/models/synda_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@
from esgpull.models.file import FileStatus
from esgpull.models.query import File

SyndaStatusMap = {
"running": FileStatus.Started,
"waiting": FileStatus.Queued,
}


class SyndaBase(MappedAsDataclass, DeclarativeBase):
...
pass


class SyndaFile(SyndaBase):
Expand Down Expand Up @@ -44,6 +49,17 @@ class SyndaFile(SyndaBase):
timestamp: Mapped[str]
file_id: Mapped[int] = mapped_column(init=False, primary_key=True)

def get_status(self) -> FileStatus:
s = self.status.lower()
result: FileStatus
if FileStatus.contains(s):
result = FileStatus(s)
elif s in SyndaStatusMap:
result = SyndaStatusMap[s]
else:
raise ValueError(s)
return result

def to_file(self) -> File:
file_id = self.file_functional_id
dataset_id = file_id.removesuffix(self.filename).strip(".")
Expand All @@ -63,7 +79,7 @@ def to_file(self) -> File:
checksum=self.checksum,
checksum_type=self.checksum_type.upper(),
size=self.size,
status=FileStatus(self.status),
status=self.get_status(),
)
result.compute_sha()
return result
125 changes: 125 additions & 0 deletions tests/test_synda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pytest

from esgpull.models import SyndaFile
from esgpull.models.file import FileStatus

synda_files_by_status = {
FileStatus.Done: {
"url": "http://esgf3.dkrz.de/thredds/fileServer/cmip6/CMIP/IPSL/IPSL-CM6A-LR/1pctCO2/r1i1p1f1/Oyr/bfe/gn/v20190305/bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"file_functional_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.1pctCO2.r1i1p1f1.Oyr.bfe.gn.v20190305.bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"filename": "bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"local_path": "CMIP6/CMIP/IPSL/IPSL-CM6A-LR/1pctCO2/r1i1p1f1/Oyr/bfe/gn/v20190305/bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"data_node": "esgf3.dkrz.de",
"checksum": "13137cf98fdbcf9a06ec61b148fce054d7e48d4194ac787d008035f64351bc63",
"checksum_type": "sha256",
"duration": 74.001225,
"size": 1866454847,
"rate": 25221945.271851916,
"start_date": "2022-05-24 15:10:12.570356",
"end_date": "2022-05-24 15:11:26.571581",
"crea_date": "2022-05-12 18:00:41.054310",
"status": "done",
"error_msg": "",
"sdget_status": "0",
"sdget_error_msg": None,
"priority": 998,
"tracking_id": "hdl:21.14100/a8515dde-be82-4e06-a5f4-b206c9c0e828",
"model": None,
"project": "CMIP6",
"variable": "bfe",
"last_access_date": None,
"dataset_id": 1,
"insertion_group_id": 1,
"timestamp": "2019-03-14T14:55:34Z",
},
FileStatus.Error: {
"url": "http://esgf1.dkrz.de/thredds/fileServer/cordex_l02/cordex/output/EUR-11/GERICS/ICHEC-EC-EARTH/rcp45/r12i1p1/GERICS-REMO2015/v1/3hr/psl/v20220404/psl_EUR-11_ICHEC-EC-EARTH_rcp45_r12i1p1_GERICS-REMO2015_v1_3hr_202701010100-202712312200.nc",
"file_functional_id": "cordex.output.EUR-11.GERICS.ICHEC-EC-EARTH.rcp45.r12i1p1.REMO2015.v1.3hr.psl.v20220404.psl_EUR-11_ICHEC-EC-EARTH_rcp45_r12i1p1_GERICS-REMO2015_v1_3hr_202701010100-202712312200.nc",
"filename": "psl_EUR-11_ICHEC-EC-EARTH_rcp45_r12i1p1_GERICS-REMO2015_v1_3hr_202701010100-202712312200.nc",
"local_path": "cordex/output/EUR-11/GERICS/ICHEC-EC-EARTH/rcp45/r12i1p1/REMO2015/v1/3hr/psl/v20220404/psl_EUR-11_ICHEC-EC-EARTH_rcp45_r12i1p1_GERICS-REMO2015_v1_3hr_202701010100-202712312200.nc",
"data_node": "esgf1.dkrz.de",
"checksum": "e302adf110b37444bfe4f3ab362537d2ff6c58e4cbccad671cff8097c083b790",
"checksum_type": "sha256",
"duration": None,
"size": 955958456,
"rate": None,
"start_date": "2022-10-13 16:36:56.653705",
"end_date": "2022-10-13 16:38:57.368596",
"crea_date": "2022-07-21 11:05:18.948145",
"status": "error",
"error_msg": None,
"sdget_status": "-1",
"sdget_error_msg": "The operation has exceeded the given deadline. Perhaps : 1 / The data node is unavailable or 2 / You should increase the value of the following parameter : [download]async_http_timeout in the sdt.conf file",
"priority": 999,
"tracking_id": "hdl:21.14103/cd63c5d9-8b6a-44cc-9521-dd7426282a45",
"model": None,
"project": "CORDEX",
"variable": "psl",
"last_access_date": None,
"dataset_id": 5,
"insertion_group_id": 13,
"timestamp": "2022-02-17T19:30:28Z",
},
FileStatus.Queued: {
"url": "http://esgf3.dkrz.de/thredds/fileServer/cmip6/CMIP/IPSL/IPSL-CM6A-LR/1pctCO2/r1i1p1f1/Oyr/bfe/gn/v20190305/bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"file_functional_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.1pctCO2.r1i1p1f1.Oyr.bfe.gn.v20190305.bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"filename": "bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"local_path": "CMIP6/CMIP/IPSL/IPSL-CM6A-LR/1pctCO2/r1i1p1f1/Oyr/bfe/gn/v20190305/bfe_Oyr_IPSL-CM6A-LR_1pctCO2_r1i1p1f1_gn_1850-1999.nc",
"data_node": "esgf3.dkrz.de",
"checksum": "13137cf98fdbcf9a06ec61b148fce054d7e48d4194ac787d008035f64351bc63",
"checksum_type": "sha256",
"duration": 74.0012250000000051,
"size": 1866454847,
"rate": 25221945.2718519158,
"start_date": "2022-05-24 15:10:12.570356",
"end_date": "2022-05-24 15:11:26.571581",
"crea_date": "2022-05-12 18:00:41.054310",
"status": "waiting",
"error_msg": "",
"sdget_status": "0",
"sdget_error_msg": None,
"priority": 998,
"tracking_id": "hdl:21.14100/a8515dde-be82-4e06-a5f4-b206c9c0e828",
"model": None,
"project": "CMIP6",
"variable": "bfe",
"last_access_date": None,
"dataset_id": 1,
"insertion_group_id": 1,
"timestamp": "2019-03-14T14:55:34Z",
},
FileStatus.Started: {
"url": "http://vesg.ipsl.upmc.fr/thredds/fileServer/cmip6/CMIP/IPSL/IPSL-CM6A-LR/historical/r10i1p1f1/Amon/ua/gr/v20180803/ua_Amon_IPSL-CM6A-LR_historical_r10i1p1f1_gr_185001-201412.nc",
"file_functional_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r10i1p1f1.Amon.ua.gr.v20180803.ua_Amon_IPSL-CM6A-LR_historical_r10i1p1f1_gr_185001-201412.nc",
"filename": "ua_Amon_IPSL-CM6A-LR_historical_r10i1p1f1_gr_185001-201412.nc",
"local_path": "CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r10i1p1f1/Amon/ua/gr/v20180803/ua_Amon_IPSL-CM6A-LR_historical_r10i1p1f1_gr_185001-201412.nc",
"data_node": "vesg.ipsl.upmc.fr",
"checksum": "659637ef4581e070729deeed04605f0da2abadd7e63245107f63878b2aaa2f8c",
"checksum_type": "sha256",
"duration": 27.8784020000000012,
"size": 2221707038,
"rate": 79692768.5453420132,
"start_date": "2022-05-24 15:10:12.782149",
"end_date": "2022-05-24 15:10:40.660551",
"crea_date": "2022-05-19 11:35:06.755159",
"status": "running",
"error_msg": "",
"sdget_status": "0",
"sdget_error_msg": None,
"priority": 998,
"tracking_id": "hdl:21.14100/9d9ce601-bbc2-48da-868d-0128692d458f",
"model": None,
"project": "CMIP6",
"variable": "ua",
"last_access_date": None,
"dataset_id": 2,
"insertion_group_id": 2,
"timestamp": "2018-09-03T15:44:33Z",
},
}


@pytest.mark.parametrize("status, data", synda_files_by_status.items())
def test_synda_file_convert(status: FileStatus, data: dict):
synda_file = SyndaFile(**data)
assert synda_file.get_status() == status

0 comments on commit 648f411

Please sign in to comment.