Skip to content

Commit

Permalink
Change the .gen_paths check on the
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Oct 22, 2024
1 parent f3ecb28 commit bb5ac1f
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 53 deletions.
26 changes: 18 additions & 8 deletions src/dataregistry/registrar/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
_read_configuration_file,
get_directory_info,
_relpath_from_name,
get_first_directory,
)
from .dataset_util import set_dataset_status, get_dataset_status

_ILLEGAL_NAME_CHAR = ["$", "*", "&", "/", "?", "\\", " "]

_ILLEGAL_RELPATH_CHAR = ["$", "*", "&", "?", "\\", " "]

class DatasetTable(BaseTable):
def __init__(self, db_connection, root_dir, owner, owner_type, execution_table):
Expand Down Expand Up @@ -88,13 +87,24 @@ def _validate_register_inputs(
# Only needed for `register` function, `replace` has no `relative_path`
# argument as this cannot be changed from the original `register`
if "relative_path" in kwargs_dict.keys():
if kwargs_dict["relative_path"] is not None:
first_dir = get_first_directory(kwargs_dict["relative_path"])

if first_dir is not None:
if first_dir == ".gen_paths":
raise ValueError("Can't start relative path with .gen_paths, "
"this is reserved for auto-generated `relative_paths")
if kwargs_dict["relative_path"] is not None:
if type(kwargs_dict["relative_path"]) != str:
raise ValueError("Relative path is not a valid string")

# Relative path cannot start with a "/"
if kwargs_dict["relative_path"][0] == "/":
raise ValueError("Relative path cannot start with a '/'")

# Make sure `relative_path` is legal (i.e., no illegal characters)
for i_char in _ILLEGAL_RELPATH_CHAR:
if i_char in kwargs_dict["relative_path"]:
raise ValueError(f"Cannot have character {i_char} in relative path")

# The '.gen_paths' directory is reserved for autogenerated paths
if kwargs_dict["relative_path"].startswith(".gen_paths"):
raise ValueError("Can't start relative path with '.gen_paths', "
"this is reserved for auto-generated `relative_paths")

# Assign the `owner_type`
if kwargs_dict["owner_type"] is None:
Expand Down
27 changes: 0 additions & 27 deletions src/dataregistry/registrar/registrar_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,30 +351,3 @@ def _relpath_from_name(name, version):
"""

return os.path.join(".gen_paths", f"{name}_{version}")

def get_first_directory(path):
"""
Get back the root folder of a path
Parameters
----------
path : str
Absolute or relative path string
Returns
-------
- : str
Root (or 1st) folder name
Returns None if path had no directory separators
"""

# Normalize the path and split it into components
parts = os.path.normpath(path).split(os.sep)

# Filter out any empty strings (in case of absolute paths starting with '/')
parts = [part for part in parts if part]

# Return the first part, which should be the first directory
if parts:
return parts[0]
return None
14 changes: 13 additions & 1 deletion tests/end_to_end_tests/test_register_dataset_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def test_registering_bad_relative_path_2(dummy_file, link):

data_path = str(tmp_src_dir / link)

with pytest.raises(ValueError, match="Can't start relative path with .gen_paths"):
# Test going into the '.gen_paths' folder (not allowed)
with pytest.raises(ValueError, match="Can't start relative path with '.gen_paths'"):
d_id = _insert_dataset_entry(
datareg,
f"DESC:datasets:test_registering_bad_relative_path_3_{link}",
Expand All @@ -169,6 +170,17 @@ def test_registering_bad_relative_path_2(dummy_file, link):
relative_path=f".gen_paths/test/register/bad/relpath/{link}",
)

# Test registering a file or directory as '.gen_paths' (not allowed)
with pytest.raises(ValueError, match="Can't start relative path with '.gen_paths'"):
d_id = _insert_dataset_entry(
datareg,
f"DESC:datasets:test_registering_bad_relative_path_4_{link}",
"0.0.1",
old_location=data_path,
location_type="dataregistry",
relative_path=f".gen_paths",
)

@pytest.mark.parametrize("link", ["file1.txt", "directory1"])
def test_registering_deleted_relative_path(dummy_file, link):
"""
Expand Down
17 changes: 0 additions & 17 deletions tests/unit_tests/test_registrar_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,20 +186,3 @@ def test_name_from_relpath(rel_path,ans):
"""Make sure names are extracted from paths correctly"""

assert _name_from_relpath(rel_path) == ans

@pytest.mark.parametrize(
"path,ans",
[
("/testing/test", "testing"),
("./testing/test", "testing"),
("/testing/test/", "testing"),
("test", "test"),
(".gen_paths/test", ".gen_paths"),
("test/testing/tested", "test"),
("/test/testing/tested", "test"),
],
)
def test_get_first_directory(path,ans):
"""Make sure the first directory is pulled out correct"""

assert get_first_directory(path) == ans

0 comments on commit bb5ac1f

Please sign in to comment.