diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 470a0e7..54fdadf 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -7,6 +7,7 @@ on: - cron: '00 4 * * *' # daily at 4AM jobs: + build: runs-on: ubuntu-latest diff --git a/pyscicat/client.py b/pyscicat/client.py index 3056850..d90aea4 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -273,6 +273,75 @@ def upload_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: + """Upsert a raw dataset + + Parameters + ---------- + dataset : Dataset + Dataset to load + + filter_fields + Filters to locate where to upsert dataset + + Returns + ------- + str + pid (or unique identifier) of the dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + query_results = self.get_datasets(filter_fields) + if not query_results: + logger.info("Dataset does not exist already, will be inserted") + filter_fields = json.dumps(filter_fields) + raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' + resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting raw dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset upserted {new_pid}") + return new_pid + + def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: + """Upsert a derived dataset + + Parameters + ---------- + dataset : Dataset + Dataset to upsert + + filter_fields + Filters to locate where to upsert dataset + + Returns + ------- + str + pid (or unique identifier) of the dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + + query_results = self.get_datasets(filter_fields) + if not query_results: + logger.info("Dataset does not exist already, will be inserted") + filter_fields = json.dumps(filter_fields) + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' + resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting derived dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset upserted {new_pid}") + return new_pid + def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): """Upload a Datablock diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 39ba2ca..b9201d1 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -15,6 +15,7 @@ Datablock, DataFile, Dataset, + RawDataset, Ownable, ) @@ -28,6 +29,26 @@ def add_mock_requests(mock_request): ) mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"}) mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"}) + mock_request.get( + local_url + + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", + json={"response": "random"}, + ) + mock_request.get( + local_url + + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D", + json={"response": "random"}, + ) + mock_request.post( + local_url + + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", + json={"pid": "42"}, + ) + mock_request.post( + local_url + + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D", + json={"pid": "54"}, + ) mock_request.post( local_url + "RawDatasets/42/origdatablocks", json={"response": "random"}, @@ -80,6 +101,33 @@ def test_scicate_ingest(): ) dataset_id = scicat.upload_raw_dataset(dataset) + # new dataset + dataset = RawDataset( + path="/foo/bar", + size=42, + owner="slartibartfast", + contactEmail="slartibartfast@magrathea.org", + creationLocation="magrathea", + creationTime=str(datetime.now()), + type="raw", + instrumentId="earth", + proposalId="deepthought", + dataFormat="planet", + principalInvestigator="A. Mouse", + sourceFolder="/foo/bar", + scientificMetadata={"a": "newfield"}, + sampleId="gargleblaster", + **ownable.dict() + ) + + # Update existing record + dataset_id = scicat.upsert_raw_dataset(dataset, {"sampleId": "gargleblaster"}) + assert dataset_id == "42" + + # Upsert non-existing record + dataset_id_2 = scicat.upsert_raw_dataset(dataset, {"sampleId": "wowza"}) + assert dataset_id_2 == "54" + # Datablock with DataFiles data_file = DataFile(path="/foo/bar", size=42) data_block = Datablock( diff --git a/requirements-dev.txt b/requirements-dev.txt index 42d5247..7c4c01d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,7 +7,6 @@ pytest sphinx twine black -nbstripout requests_mock # These are dependencies of various sphinx extensions for documentation. ipython