diff --git a/src/scicat_communication.py b/src/scicat_communication.py index ab75ab2..d9216c0 100644 --- a/src/scicat_communication.py +++ b/src/scicat_communication.py @@ -1,5 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import logging +from urllib.parse import urljoin + import requests from scicat_configuration import SciCatOptions @@ -15,3 +18,82 @@ def retrieve_value_from_scicat( url, headers={"token": config.token}, timeout=config.timeout ).json() return response[field_name] + + +class ScicatDatasetAPIError(Exception): + pass + + +def _post_to_scicat(*, url: str, posting_obj: dict, headers: dict, timeout: int): + return requests.request( + method="POST", + url=url, + json=posting_obj, + headers=headers, + timeout=timeout, + stream=False, + verify=True, + ) + + +def create_scicat_dataset( + *, dataset: dict, config: SciCatOptions, logger: logging.Logger +) -> dict: + """ + Execute a POST request to scicat to create a dataset + """ + logger.info("Sending POST request to create new dataset") + response = _post_to_scicat( + url=urljoin(config.host, "datasets"), + posting_obj=dataset, + headers={"token": config.token, **config.headers}, + timeout=config.timeout, + ) + result: dict = response.json() + if not response.ok: + logger.error( + "Failed to create new dataset. \nError message from scicat backend: \n%s", + result.get("error", {}), + ) + raise ScicatDatasetAPIError(f"Error creating new dataset: \n{dataset}") + + logger.info( + "Dataset created successfully. Dataset pid: %s", + result.get("pid"), + ) + return result + + +class ScicatOrigDatablockAPIError(Exception): + pass + + +def create_scicat_origdatablock( + *, origdatablock: dict, config: SciCatOptions, logger: logging.Logger +) -> dict: + """ + Execute a POST request to scicat to create a new origdatablock + """ + logger.info("Sending POST request to create new origdatablock") + response = _post_to_scicat( + url=urljoin(config.host, "origdatablocks"), + posting_obj=origdatablock, + headers={"token": config.token, **config.headers}, + timeout=config.timeout, + ) + result: dict = response.json() + if not response.ok: + logger.error( + "Failed to create new origdatablock. " + "Error message from scicat backend: \n%s", + result.get("error", {}), + ) + raise ScicatOrigDatablockAPIError( + f"Error creating new origdatablock: \n{origdatablock}" + ) + + logger.info( + "Origdatablock created successfully. Origdatablock pid: %s", + result['_id'], + ) + return result diff --git a/src/scicat_offline_ingestor.py b/src/scicat_offline_ingestor.py index 67e9a3a..87e6b29 100644 --- a/src/scicat_offline_ingestor.py +++ b/src/scicat_offline_ingestor.py @@ -3,13 +3,11 @@ # import scippnexus as snx import copy import json -import logging import os import pathlib -from urllib.parse import urljoin import h5py -import requests +from scicat_communication import create_scicat_dataset, create_scicat_origdatablock from scicat_configuration import ( build_offline_ingestor_arg_parser, build_scicat_offline_ingestor_config, @@ -26,36 +24,6 @@ from system_helpers import exit, offline_ingestor_exit_at_exceptions -def _create_scicat_dataset(dataset: dict, config, logger: logging.Logger) -> dict: - """ - Execute a POST request to scicat to create a dataset - """ - logger.info("_create_scicat_dataset: Sending POST request to create new dataset") - response = requests.request( - method="POST", - url=urljoin(config.scicat.host, "datasets"), - json=dataset, - headers=config.scicat.headers, - timeout=config.scicat.timeout, - stream=False, - verify=True, - ) - - result = response.json() - if not response.ok: - err = result.get("error", {}) - logger.error( - "_create_scicat_dataset: Failed to create new dataset. Error %s", err - ) - raise Exception(f"Error creating new dataset: {err}") - - logger.info( - "_create_scicat_dataset: Dataset created successfully. Dataset pid: %s", - result['pid'], - ) - return result - - def _prepare_scicat_origdatablock(scicat_dataset, datafilelist, config, logger): """ Create local copy of the orig datablock to send to scicat @@ -79,43 +47,6 @@ def _prepare_scicat_origdatablock(scicat_dataset, datafilelist, config, logger): return origdatablock -def _create_scicat_origdatablock( - origdatablock: dict, config, logger: logging.Logger -) -> dict: - """ - Execute a POST request to scicat to create a new origdatablock - """ - logger.info( - "_create_scicat_origdatablock: Sending POST request to create new origdatablock" - ) - response = requests.request( - method="POST", - url=urljoin(config.scicat.host, "origdatablocks"), - json=origdatablock, - headers=config.scicat.headers, - timeout=config.scicat.timeout, - stream=False, - verify=True, - ) - - result = response.json() - if not response.ok: - err = result.get("error", {}) - logger.error( - "_create_scicat_origdatablock: Failed to create new origdatablock." - "Error %s", - err, - ) - raise Exception(f"Error creating new origdatablock: {err}") - - logger.info( - "_create_scicat_origdatablock: Origdatablock created successfully. " - "Origdatablock pid: %s", - result['_id'], - ) - return result - - def _define_dataset_source_folder(datafilelist) -> pathlib.Path: """ Return the dataset source folder, which is the common path @@ -208,7 +139,9 @@ def main() -> None: ) ) # create dataset in scicat - scicat_dataset = _create_scicat_dataset(local_dataset, config, logger) + scicat_dataset = create_scicat_dataset( + dataset=local_dataset, config=config.scicat, logger=logger + ) dataset_source_folder = _define_dataset_source_folder(data_file_list) @@ -222,8 +155,8 @@ def main() -> None: ) # create origdatablock in scicat - scicat_origdatablock = _create_scicat_origdatablock( - local_origdatablock, config, logger + scicat_origdatablock = create_scicat_origdatablock( + origdatablock=local_origdatablock, config=config.scicat, logger=logger ) # check one more time if we successfully created the entries in scicat