From 3ea78a24e15327ddba3b040749383b6dc98995f7 Mon Sep 17 00:00:00 2001 From: Stuart McAlpine Date: Tue, 22 Oct 2024 15:53:34 +0200 Subject: [PATCH] Change the way the auto generated relative paths are done for single files --- src/dataregistry/registrar/dataset.py | 2 +- src/dataregistry/registrar/registrar_util.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/dataregistry/registrar/dataset.py b/src/dataregistry/registrar/dataset.py index daa433fd..e7fffb25 100644 --- a/src/dataregistry/registrar/dataset.py +++ b/src/dataregistry/registrar/dataset.py @@ -432,7 +432,7 @@ def register( # If `relative_path` not passed, automatically generate it if kwargs_dict["relative_path"] is None: kwargs_dict["relative_path"] = _relpath_from_name( - name, kwargs_dict["version_string"] + name, kwargs_dict["version_string"], kwargs_dict["old_location"] ) # Make sure the relative_path in the `root_dir` is avaliable diff --git a/src/dataregistry/registrar/registrar_util.py b/src/dataregistry/registrar/registrar_util.py index e5336016..77f37b36 100644 --- a/src/dataregistry/registrar/registrar_util.py +++ b/src/dataregistry/registrar/registrar_util.py @@ -327,7 +327,7 @@ def _compute_checksum(file_path): raise Exception(e) -def _relpath_from_name(name, version): +def _relpath_from_name(name, version, old_location): """ Construct a relative path from the name and version of a dataset. We use this when the `relative_path` is not explicitly defined. @@ -337,12 +337,20 @@ def _relpath_from_name(name, version): into this top level folder. This is to prevent clashes with user specified `relative_path`'s. + The auto-generated `relative_path` will be a directory that contains the + name and version, which is where the ingested data (from `old_location`) + will eventually reside. If the data being ingested is a single file, the + `relative_path` will be the full path to the file within the registry, not + just the directory that contains the file. + Parameters ---------- name : str Dataset name version : str Dataset version + old_location : str + Path the data is coming from (needed to parse filename) Returns ------- @@ -350,4 +358,9 @@ def _relpath_from_name(name, version): Automatically generated `relative_path` """ - return os.path.join(".gen_paths", f"{name}_{version}") + # For single files, scrape the filename and add it to the `relative_path` + if (old_location is not None) and os.path.isfile(old_location): + return os.path.join(".gen_paths", f"{name}_{version}", os.path.basename(old_location)) + else: + # For directories, only need the autogenerated directory name + return os.path.join(".gen_paths", f"{name}_{version}")