Skip to content

Commit

Permalink
Add .gen_paths to relative path for auto generated relative paths
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Oct 18, 2024
1 parent 9d2bb02 commit f3ecb28
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 4 deletions.
13 changes: 13 additions & 0 deletions src/dataregistry/registrar/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
_read_configuration_file,
get_directory_info,
_relpath_from_name,
get_first_directory,
)
from .dataset_util import set_dataset_status, get_dataset_status

Expand Down Expand Up @@ -83,6 +84,18 @@ def _validate_register_inputs(
"External datasets require either a url or contact_email"
)

# Make sure the user passed `relative_path` is legal
# Only needed for `register` function, `replace` has no `relative_path`
# argument as this cannot be changed from the original `register`
if "relative_path" in kwargs_dict.keys():
if kwargs_dict["relative_path"] is not None:
first_dir = get_first_directory(kwargs_dict["relative_path"])

if first_dir is not None:
if first_dir == ".gen_paths":
raise ValueError("Can't start relative path with .gen_paths, "
"this is reserved for auto-generated `relative_paths")

# Assign the `owner_type`
if kwargs_dict["owner_type"] is None:
if self._owner_type is not None:
Expand Down
34 changes: 33 additions & 1 deletion src/dataregistry/registrar/registrar_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def _relpath_from_name(name, version):
Construct a relative path from the name and version of a dataset.
We use this when the `relative_path` is not explicitly defined.
Every automatically generated `relative_path` is prefixed with
`.gen_paths/`, meaning that all automatically generated `relative_paths` go
into this top level folder. This is to prevent clashes with user specified
`relative_path`'s.
Parameters
----------
name : str
Expand All @@ -345,4 +350,31 @@ def _relpath_from_name(name, version):
Automatically generated `relative_path`
"""

return f"{name}_{version}"
return os.path.join(".gen_paths", f"{name}_{version}")

def get_first_directory(path):
"""
Get back the root folder of a path
Parameters
----------
path : str
Absolute or relative path string
Returns
-------
- : str
Root (or 1st) folder name
Returns None if path had no directory separators
"""

# Normalize the path and split it into components
parts = os.path.normpath(path).split(os.sep)

# Filter out any empty strings (in case of absolute paths starting with '/')
parts = [part for part in parts if part]

# Return the first part, which should be the first directory
if parts:
return parts[0]
return None
4 changes: 2 additions & 2 deletions tests/end_to_end_tests/test_register_dataset_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_register_dataset_defaults(dummy_file):
assert results["owner"][0] == os.getenv("USER")
assert results["owner_type"][0] == "user"
assert results["description"][0] == None
assert results["relative_path"][0] == f"{_NAME}_0.0.1"
assert results["relative_path"][0] == f".gen_paths/{_NAME}_0.0.1"
assert results["data_org"][0] == "dummy"
assert results["execution_id"][0] >= 0
assert results["dataset_id"][0] >= 0
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_dataset_bumping(dummy_file, v_type, ans):
# Check the result
assert results["dataset.name"][0] == _NAME
assert results["dataset.version_string"][0] == ans
assert results["dataset.relative_path"][0] == f"{_NAME}_{ans}"
assert results["dataset.relative_path"][0] == f".gen_paths/{_NAME}_{ans}"


@pytest.mark.parametrize("owner_type", ["user", "group", "project"])
Expand Down
22 changes: 22 additions & 0 deletions tests/end_to_end_tests/test_register_dataset_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,28 @@ def test_registering_bad_relative_path(dummy_file, link):
relative_path=f"test/register/bad/relpath/{link}",
)

@pytest.mark.parametrize("link", ["file1.txt", "directory1"])
def test_registering_bad_relative_path_2(dummy_file, link):
"""
Make sure we cannot register a datataset to a relative path that is using
the auto generated .gen_paths directry.
"""

# Establish connection to database
tmp_src_dir, tmp_root_dir = dummy_file
datareg = DataRegistry(root_dir=str(tmp_root_dir), schema=DEFAULT_SCHEMA_WORKING)

data_path = str(tmp_src_dir / link)

with pytest.raises(ValueError, match="Can't start relative path with .gen_paths"):
d_id = _insert_dataset_entry(
datareg,
f"DESC:datasets:test_registering_bad_relative_path_3_{link}",
"0.0.1",
old_location=data_path,
location_type="dataregistry",
relative_path=f".gen_paths/test/register/bad/relpath/{link}",
)

@pytest.mark.parametrize("link", ["file1.txt", "directory1"])
def test_registering_deleted_relative_path(dummy_file, link):
Expand Down
20 changes: 19 additions & 1 deletion tests/unit_tests/test_registrar_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
_read_configuration_file,
get_directory_info,
_relpath_from_name,
get_first_directory,
)


Expand Down Expand Up @@ -159,7 +160,7 @@ def test_read_file(tmpdir, nchars, max_config_length, ans):
@pytest.mark.parametrize(
"name,version_string,ans",
[
("mydataset", "1.1.1", "mydataset_1.1.1"),
("mydataset", "1.1.1", ".gen_paths/mydataset_1.1.1"),
],
)
def test_relpath_from_name(name, version_string, ans):
Expand All @@ -185,3 +186,20 @@ def test_name_from_relpath(rel_path,ans):
"""Make sure names are extracted from paths correctly"""

assert _name_from_relpath(rel_path) == ans

@pytest.mark.parametrize(
"path,ans",
[
("/testing/test", "testing"),
("./testing/test", "testing"),
("/testing/test/", "testing"),
("test", "test"),
(".gen_paths/test", ".gen_paths"),
("test/testing/tested", "test"),
("/test/testing/tested", "test"),
],
)
def test_get_first_directory(path,ans):
"""Make sure the first directory is pulled out correct"""

assert get_first_directory(path) == ans

0 comments on commit f3ecb28

Please sign in to comment.