Skip to content

Commit

Permalink
Remove version suffix support
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Sep 22, 2024
1 parent d90add5 commit 2308c35
Show file tree
Hide file tree
Showing 11 changed files with 22 additions and 126 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- There is now a `reg_admin` account which is the only account to create the
initial schemas. The schema creation script has been updated to give the
correct `reg_writer` and `reg_reader` privileges.
- Remove `version_suffix`

## Version 0.6.4

Expand Down
11 changes: 1 addition & 10 deletions src/dataregistry/db_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,19 +273,10 @@ def _insert_provenance(
from git import InvalidGitRepositoryError

version_fields = __version__.split(".")
patch = version_fields[2]
suffix = None
if "-" in patch:
subfields = patch.split("-")
patch = subfields[0]
suffix = "-".join(subfields[1:])

values = dict()
values["code_version_major"] = version_fields[0]
values["code_version_minor"] = version_fields[1]
values["code_version_patch"] = patch
if suffix:
values["code_version_suffix"] = suffix
values["code_version_patch"] = version_fields[2]
values["db_version_major"] = db_version_major
values["db_version_minor"] = db_version_minor
values["db_version_patch"] = db_version_patch
Expand Down
29 changes: 6 additions & 23 deletions src/dataregistry/registrar/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ def _validate_register_inputs(
if kwargs_dict["owner_type"] == "production":
if kwargs_dict["is_overwritable"]:
raise ValueError("Cannot overwrite production entries")
if kwargs_dict["version_suffix"] is not None:
raise ValueError("Production entries can't have version suffix")
if (not self._table_metadata.is_production_schema) and (
not kwargs_dict["test_production"]
):
Expand Down Expand Up @@ -216,10 +214,6 @@ def _register_row(self, name, version, kwargs_dict):
kwargs_dict[
"execution_name"
] = f"for_dataset_{name}-{kwargs_dict['version_string']}"
if kwargs_dict["version_suffix"]:
kwargs_dict[
"execution_name"
] = f"{kwargs_dict['execution_name']}-{kwargs_dict['version_suffix']}"
if kwargs_dict["execution_description"] is None:
kwargs_dict[
"execution_description"
Expand Down Expand Up @@ -314,7 +308,6 @@ def register(
self,
name,
version,
version_suffix=None,
creation_date=None,
description=None,
execution_id=None,
Expand Down Expand Up @@ -357,7 +350,6 @@ def register(
----------
name** : str
version** : str
version_suffix** : str, optional
creation_date** : datetime, optional
description** : str, optional
execution_id** : int, optional
Expand Down Expand Up @@ -417,13 +409,12 @@ def register(
# If `relative_path` not passed, automatically generate it
if kwargs_dict["relative_path"] is None:
kwargs_dict["relative_path"] = _relpath_from_name(
name, kwargs_dict["version_string"], kwargs_dict["version_suffix"]
name, kwargs_dict["version_string"]
)

# Make sure the relative_path in the `root_dir` is avaliable
if kwargs_dict["location_type"] in ["dataregistry", "dummy"]:
previous_datasets = self._find_previous(
None,
None,
None,
kwargs_dict["owner"],
Expand Down Expand Up @@ -461,15 +452,14 @@ def register(
previous_datasets = self._find_previous(
name,
kwargs_dict["version_string"],
kwargs_dict["version_suffix"],
kwargs_dict["owner"],
kwargs_dict["owner_type"],
)

if len(previous_datasets) > 0:
raise ValueError(
"There is already a dataset with combination name,"
"version_string, version_suffix, owner, owner_type"
"version_string, owner, owner_type"
)

# Register the new row in the dataset table
Expand All @@ -482,7 +472,6 @@ def replace(
self,
name,
version,
version_suffix=None,
creation_date=None,
description=None,
execution_id=None,
Expand Down Expand Up @@ -512,7 +501,7 @@ def replace(
Replace a dataset in the registry.
This is so a user can keep the same
name/version/version_suffix/ower/owner_type combination as a previous
name/version/ower/owner_type combination as a previous
dataset. Note the original dataset must have `is_overwritable=True` to
allow the replace to work.
Expand Down Expand Up @@ -549,15 +538,11 @@ def replace(
previous_datasets = self._find_previous(
name,
kwargs_dict["version_string"],
kwargs_dict["version_suffix"],
kwargs_dict["owner"],
kwargs_dict["owner_type"],
)

full_name = (
f"name: {name} v: {kwargs_dict['version_string']} "
f"v-suff: {kwargs_dict['version_suffix']}"
)
full_name = f"name: {name} v: {kwargs_dict['version_string']}"

if len(previous_datasets) == 0:
raise ValueError(f"Dataset {full_name} does not exist")
Expand Down Expand Up @@ -712,14 +697,13 @@ def _find_previous(
self,
name,
version_string,
version_suffix,
owner,
owner_type,
relative_path=None,
):
"""
Find all dataset entries with the same `name`, `version`,
`version_suffix`, `owner` and `owner_type`.
`owner` and `owner_type`.
If `relative_path` is not None, instead search for all dataset entries
with the same `owner`, `owner_type` and `relative_path` combination.
Expand All @@ -729,7 +713,7 @@ def _find_previous(
Parameters
----------
name/version/version_suffix/owner/owner_type : str
name/version/owner/owner_type : str
Returns
-------
Expand Down Expand Up @@ -762,7 +746,6 @@ def _find_previous(
stmt = stmt.where(
dataset_table.c.name == name,
dataset_table.c.version_string == version_string,
dataset_table.c.version_suffix == version_suffix,
dataset_table.c.owner == owner,
dataset_table.c.owner_type == owner_type,
)
Expand Down
42 changes: 9 additions & 33 deletions src/dataregistry/registrar/registrar_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,41 +19,31 @@
_nonneg_int_re = "0|[1-9][0-9]*"


def _parse_version_string(version, with_suffix=False):
def _parse_version_string(version):
"""
Parase a version string into its components.
Parameters
----------
version : str
Version string
with_suffix : bool
False means version string *must not* include suffix
True means it *may* have a suffix
Returns
-------
d : dict
Dict with keys "major", "minor", "patch" and optionally "suffix"
Dict with keys "major", "minor", "patch"
"""

cmp = version.split(VERSION_SEPARATOR)
if not with_suffix:
if len(cmp) != 3:
raise ValueError("Version string must have 3 components")
else:
if len(cmp) < 3 or len(cmp) > 4:
raise ValueError("Version string must have 3 or 4 components")
for c in cmp[0:3]:
if len(cmp) != 3:
raise ValueError("Version string must have 3 components")
for c in cmp:
if not re.fullmatch(_nonneg_int_re, c):
raise ValueError(f"Version component {c} is not non-negative int")
d = {"major": cmp[0]}
d["minor"] = cmp[1]
d["patch"] = cmp[2]

if len(cmp) > 3:
d["suffix"] = cmp[3]

return d


Expand Down Expand Up @@ -152,7 +142,7 @@ def _bump_version(name, v_string, dataset_table, engine):

# Find the previous dataset based on the name and version
stmt = select(
dataset_table.c["version_major", "version_minor", "version_patch", "version_suffix"]
dataset_table.c["version_major", "version_minor", "version_patch"]
).where(dataset_table.c.name == name)
stmt = (
stmt.order_by(dataset_table.c.version_major.desc())
Expand All @@ -168,14 +158,6 @@ def _bump_version(name, v_string, dataset_table, engine):
old_minor = 0
old_patch = 0
else:
# We don't bump datasets with a version suffix
if r.version_suffix is not None:
raise ValueError(
"Cannot bump dataset automatically as it "
f"has a version suffix ({r.version_suffix}). "
"Select the version/suffix manually instead."
)

old_major = int(r.version_major)
old_minor = int(r.version_minor)
old_patch = int(r.version_patch)
Expand Down Expand Up @@ -345,7 +327,7 @@ def _compute_checksum(file_path):

raise Exception(e)

def _relpath_from_name(name, version, version_suffix):
def _relpath_from_name(name, version):
"""
Construct a relative path from the name and version of a dataset.
We use this when the `relative_path` is not explicitly defined.
Expand All @@ -356,17 +338,11 @@ def _relpath_from_name(name, version, version_suffix):
Dataset name
version : str
Dataset version
version_suffix : str
Dataset version suffix
Returns
-------
relative_path : str
Automatically generated `relative_path`
"""

if version_suffix is not None:
relative_path = f"{name}_{version}_{version_suffix}"
else:
relative_path = f"{name}_{version}"

return relative_path
return f"{name}_{version}"
13 changes: 3 additions & 10 deletions src/dataregistry/schema/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,6 @@ tables:
type: "Integer"
description: "Patch version of code when this schema was created"
nullable: False
code_version_suffix:
type: "String"
description: "Version suffix of code when this schema was created"
creator_uid:
type: "StringShort"
description: "UID of person who registered the entry"
Expand Down Expand Up @@ -330,7 +327,7 @@ tables:

unique_constraints:
dataset_unique:
unique_list: ["name", "version_string", "version_suffix", "owner", "owner_type", "replace_iteration"]
unique_list: ["name", "version_string", "owner", "owner_type", "replace_iteration"]

column_definitions:
dataset_id:
Expand All @@ -339,11 +336,11 @@ tables:
description: "Unique identifier for this dataset"
name:
type: "String"
description: "Any convenient, evocative name for the human. Note the combination of name, version and version_suffix must be unique."
description: "Any convenient, evocative name for the human. Note the combination of name and version must be unique."
nullable: False
relative_path:
type: "String"
description: "Relative path storing the data, relative to `<root_dir>`. If None, generated from the `name`, `version_string` and `version_suffix`"
description: "Relative path storing the data, relative to `<root_dir>`. If None, generated from the `name` and `version_string`"
nullable: False
cli_optional: True
version_major:
Expand All @@ -358,10 +355,6 @@ tables:
type: "Integer"
description: "Patch version in semantic string (i.e., x.x.X)"
nullable: False
version_suffix:
type: "String"
description: "Optional version suffix to place at the end of the version string. Cannot be used for production datasets."
cli_optional: True
version_string:
type: "String"
description: "Version string"
Expand Down
2 changes: 1 addition & 1 deletion src/dataregistry_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def get_parser():
"name",
help=(
"Any convenient, evocative name for the human. Note the "
"combination of name, version and version_suffix must be unique."
"combination of name and version must be unique."
),
type=str,
)
Expand Down
1 change: 0 additions & 1 deletion src/dataregistry_cli/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def register_dataset(args):
new_id = datareg.Registrar.dataset.register(
args.name,
args.version,
version_suffix=args.version_suffix,
creation_date=args.creation_date,
access_api=args.access_api,
execution_id=args.execution_id,
Expand Down
4 changes: 0 additions & 4 deletions tests/end_to_end_tests/database_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ def _insert_dataset_entry(
owner=None,
description=None,
execution_id=None,
version_suffix=None,
old_location=None,
is_overwritable=False,
which_datareg=None,
Expand Down Expand Up @@ -210,7 +209,6 @@ def _insert_dataset_entry(
dataset_id, execution_id = datareg.Registrar.dataset.register(
name,
version,
version_suffix=version_suffix,
creation_date=None,
description=description,
old_location=old_location,
Expand Down Expand Up @@ -247,7 +245,6 @@ def _replace_dataset_entry(
owner=None,
description=None,
execution_id=None,
version_suffix=None,
old_location=None,
is_overwritable=False,
which_datareg=None,
Expand Down Expand Up @@ -282,7 +279,6 @@ def _replace_dataset_entry(
dataset_id, execution_id = datareg.Registrar.dataset.replace(
name,
version,
version_suffix=version_suffix,
creation_date=None,
description=description,
old_location=old_location,
Expand Down
2 changes: 1 addition & 1 deletion tests/end_to_end_tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_dataset_entry_with_execution(dummy_file):
cmd = "register dataset my_cli_dataset3 1.2.3 --location_type dummy"
cmd += " --description 'This is my dataset description'"
cmd += " --access_api 'Awesome API' --owner DESC --owner_type group"
cmd += " --version_suffix test --creation_date '2020-01-01'"
cmd += " --creation_date '2020-01-01'"
cmd += " --input_datasets 1 2 --execution_name 'I have given the execution a name'"
cmd += " --is_overwritable"
cmd += f" --schema {DEFAULT_SCHEMA_WORKING} --root_dir {str(tmp_root_dir)}"
Expand Down
13 changes: 0 additions & 13 deletions tests/end_to_end_tests/test_production_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,3 @@ def test_production_schema_bad_register(dummy_file):
owner_type="production",
is_overwritable=True,
)

# Try to have a version suffix
with pytest.raises(
ValueError, match="Production entries can't have version suffix"
):
d_id = _insert_dataset_entry(
datareg,
"DESC:datasets:bad_production_dataset_3",
"0.0.1",
owner="production",
owner_type="production",
version_suffix="prod",
)
Loading

0 comments on commit 2308c35

Please sign in to comment.