From 123fb4679e498cbdf1a4e45213d55fd6e44c2a59 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Thu, 5 Dec 2024 19:49:53 -0500 Subject: [PATCH 1/8] Improve add version (continued from #130) (#131) --- docs/demo_build_v1_0_spec_examples.md | 32 +--- docs/demo_build_v1_1_spec_examples.md | 30 +--- docs/demo_using_bagit_bags.md | 33 ++-- docs/validation_status.md | 4 +- ocfl/inventory.py | 4 +- ocfl/new_version.py | 50 ++++-- ocfl/object.py | 211 +++++++------------------- tests/test_inventory.py | 12 +- tests/test_object.py | 162 ++------------------ 9 files changed, 141 insertions(+), 397 deletions(-) diff --git a/docs/demo_build_v1_0_spec_examples.md b/docs/demo_build_v1_0_spec_examples.md index 4a614fd..4825132 100644 --- a/docs/demo_build_v1_0_spec_examples.md +++ b/docs/demo_build_v1_0_spec_examples.md @@ -52,34 +52,8 @@ This is inventory should match the example with 3 versions in +INFO:root:Updated OCFL object info:bb123cd4567 by adding v2 +### Updated object info:bb123cd4567 to v2 ``` @@ -103,8 +103,8 @@ INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/t INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v3/bagit.txt INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v3/bag-info.txt INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v3/manifest-sha512.txt -INFO:root:Updated OCFL object info:bb123cd4567 in tmp/obj by adding v3 -### +INFO:root:Updated OCFL object info:bb123cd4567 by adding v3 +### Updated object info:bb123cd4567 to v3 ``` @@ -149,8 +149,8 @@ INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/t INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v4/bagit.txt INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v4/bag-info.txt INFO:bagit:Verifying checksum for file /home/runner/work/ocfl-py/ocfl-py/tests/testdata/bags/uaa_v4/manifest-sha512.txt -INFO:root:Updated OCFL object info:bb123cd4567 in tmp/obj by adding v4 -### +INFO:root:Updated OCFL object info:bb123cd4567 by adding v4 +### Updated object info:bb123cd4567 to v4 ``` @@ -164,12 +164,13 @@ Taking the newly created OCFL object `/tmp/obj` we can `--extract` the `v4` cont INFO:root:Extracted v4 into tmp/extracted_v4 INFO:bagit:Creating bag for directory tmp/extracted_v4 INFO:bagit:Creating data directory -INFO:bagit:Moving my_content to tmp/extracted_v4/tmpt_h62tn4/my_content -INFO:bagit:Moving tmp/extracted_v4/tmpt_h62tn4 to data +INFO:bagit:Moving my_content to tmp/extracted_v4/tmp6gveuavx/my_content +INFO:bagit:Moving tmp/extracted_v4/tmp6gveuavx to data INFO:bagit:Using 1 processes to generate manifests: sha512 INFO:bagit:Generating manifest lines for file data/my_content/dracula.txt INFO:bagit:Generating manifest lines for file data/my_content/dunwich.txt INFO:bagit:Generating manifest lines for file data/my_content/poe-nevermore.txt +INFO:bagit:Generating manifest lines for file data/my_content/another_directory/a_third_copy_of_dracula.txt INFO:bagit:Creating bagit.txt INFO:bagit:Creating bag-info.txt INFO:bagit:Creating tmp/extracted_v4/tagmanifest-sha512.txt @@ -186,24 +187,14 @@ We note that the OCFL object had only one `content` file in `v4` but the extract diff -r tmp/extracted_v4/bag-info.txt tests/testdata/bags/uaa_v4/bag-info.txt 1,2c1 < Bag-Software-Agent: bagit.py v1.8.1 -< Bagging-Date: 2024-12-05 +< Bagging-Date: 2024-12-06 --- > Bagging-Date: 2020-01-04 -7c6 -< Payload-Oxum: 1032810.3 ---- -> Payload-Oxum: 1915970.4 -Only in tests/testdata/bags/uaa_v4/data/my_content: another_directory -diff -r tmp/extracted_v4/manifest-sha512.txt tests/testdata/bags/uaa_v4/manifest-sha512.txt -3a4 -> ffc150e7944b5cf5ddb899b2f48efffbd490f97632fc258434aefc4afb92aef2e3441ddcceae11404e5805e1b6c804083c9398c28f061c9ba42dd4bac53d5a2e data/my_content/another_directory/a_third_copy_of_dracula.txt diff -r tmp/extracted_v4/tagmanifest-sha512.txt tests/testdata/bags/uaa_v4/tagmanifest-sha512.txt -2,3c2,3 -< ba0f77d695226f32ef316aa1c0000a13d8f78b6948a5415a3e11a95f73153ecf351c91211a5e7b20ab6b785ec472ec5fcc6e8e6fb5a3e718ea86d7719cd3c690 bag-info.txt -< e4cb52067909ba58668f21c0da72c73c586a3f87d64b471187cb6234ed605d17974e4c7833f6ae7e639ab1126502534b3bed4cfeee8cba485cfc764b48225e0f manifest-sha512.txt +2c2 +< 7e23b308ac51b064e7471d7b8e5ba1f758891631ad8c8fb57799a39018d7d77e893a8236a608a8087117000c55efde9529cb76cdb63bacc5642b38ab459b30d5 bag-info.txt --- > 10624e6d45462def7af66d1a0d977606c7b073b01809c1d42258cfab5c34a275480943cbe78044416aee1f23822cc3762f92247b8f39b5c6ddc5ae32a8f94ce5 bag-info.txt -> 5c2e2b9cacc93cb315d57f09fac6d199c3378313b6cf918bb0a70e1839c4e4c0c2e5a7f9ae869cf7755e09a196a835be1af7c510d3d5faa5d0c0b3f6be9f816a manifest-sha512.txt ``` (last command exited with return code 1) diff --git a/docs/validation_status.md b/docs/validation_status.md index 5b51e15..9263efe 100644 --- a/docs/validation_status.md +++ b/docs/validation_status.md @@ -53,7 +53,7 @@ The following tables show the implementation status of all errors and warnings i | [E030](https://ocfl.io/1.1/spec#E030) | 'SHA-256 algorithm defined by [FIPS-180-4] and must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | | [E031](https://ocfl.io/1.1/spec#E031) | 'SHA-512 algorithm defined by [FIPS-180-4] and must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | | [E032](https://ocfl.io/1.1/spec#E032) | '[blake2b-512] must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | -| [E033](https://ocfl.io/1.1/spec#E033) | 'An OCFL Object Inventory MUST follow the [JSON] structure described in this section and must be named inventory.json.' | OCFL Object %s inventory is not valid JSON (%s) \[[ocfl/object.py#L738](https://github.com/zimeon/ocfl-py/blob/main/ocfl/object.py#L738) [ocfl/validator.py#L201](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L201)\] | +| [E033](https://ocfl.io/1.1/spec#E033) | 'An OCFL Object Inventory MUST follow the [JSON] structure described in this section and must be named inventory.json.' | OCFL Object %s inventory is not valid JSON (%s) \[[ocfl/object.py#L643](https://github.com/zimeon/ocfl-py/blob/main/ocfl/object.py#L643) [ocfl/validator.py#L201](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L201)\] | | [E034](https://ocfl.io/1.1/spec#E034) | 'An OCFL Object Inventory must follow the [JSON] structure described in this section and MUST be named inventory.json.' | _Not implemented_ | | [E035](https://ocfl.io/1.1/spec#E035) | 'The forward slash (/) path separator must be used in content paths in the manifest and fixity blocks within the inventory.' | _Not implemented_ | | [E036](https://ocfl.io/1.1/spec#E036) | 'An OCFL Object Inventory must include the following keys: [id, type, digestAlgorithm, head]' | _See multiple cases identified with suffixes below_ | @@ -206,4 +206,4 @@ The following tables show the implementation status of all errors and warnings i | [W016](https://ocfl.io/1.1/spec#W016) | 'In the Storage Root, extension sub-directories SHOULD be named according to a registered extension name.' | _Not implemented_ | | W901 | **Not in specification** | OCFL Storage Root includes unregistered extension directory '%s' \[[ocfl/storage_root.py#L275](https://github.com/zimeon/ocfl-py/blob/main/ocfl/storage_root.py#L275)\] | -_Generated by `extract_codes.py` at 2024-12-05 18:22:36.654306_ \ No newline at end of file +_Generated by `extract_codes.py` at 2024-12-06 00:46:27.116525_ \ No newline at end of file diff --git a/ocfl/inventory.py b/ocfl/inventory.py index 58ff2fc..10a074a 100644 --- a/ocfl/inventory.py +++ b/ocfl/inventory.py @@ -410,7 +410,7 @@ def add_version(self, vdir=None, metadata=None, state=None, self.head = vdir return self.version(vdir) - def add_file(self, *, digest, content_path): + def add_file_to_manifest(self, *, digest, content_path): """Add file to the manifest. Arguments: @@ -733,7 +733,7 @@ def add_file(self, *, digest, logical_path, content_path=None, dedupe=True): content_path = make_unused_filepath(filepath=suggested, used=self.inv.content_paths) # Have location now, add to manifest - self.inv.add_file(digest=digest, content_path=content_path) + self.inv.add_file_to_manifest(digest=digest, content_path=content_path) else: # File or files with same digest exist content_path = None diff --git a/ocfl/new_version.py b/ocfl/new_version.py index 924cb46..24788c7 100644 --- a/ocfl/new_version.py +++ b/ocfl/new_version.py @@ -7,11 +7,12 @@ import copy import hashlib import logging +import os.path from urllib.parse import quote as urlquote import fs.path -from .constants import DEFAULT_DIGEST_ALGORITHM, DEFAULT_CONTENT_DIRECTORY +from .constants import DEFAULT_DIGEST_ALGORITHM, DEFAULT_CONTENT_DIRECTORY, DEFAULT_SPEC_VERSION from .digest import file_digest from .inventory import Inventory, InventoryException from .object_utils import make_unused_filepath @@ -38,7 +39,6 @@ def __init__(self, *, srcdir="."): """ # Configuration self.inventory = None - self.objdir = None self.srcdir = srcdir self.src_fs = None self.content_path_normalization = None @@ -52,13 +52,17 @@ def __init__(self, *, srcdir="."): @classmethod def first_version(cls, *, srcdir=".", + identifier, + spec_version=DEFAULT_SPEC_VERSION, digest_algorithm=None, content_directory=None, metadata=None, + fixity=None, content_path_normalization="uri"): """Start the first version for this object. Arguments: + identifier (str): identifier of the object to be created digest_algorithm (str or None): content_directort (str or None): content_path_normalization (str): the path normalization strategy @@ -68,6 +72,19 @@ def first_version(cls, *, """ self = cls(srcdir=srcdir) inventory = Inventory() + self.inventory = inventory + inventory.id = identifier + inventory.spec_version = spec_version + inventory.digest_algorithm = digest_algorithm + inventory.init_manifest_and_versions() + # Add contentDirectory if not "content" + if self.content_directory != DEFAULT_CONTENT_DIRECTORY: + inventory.content_directory = self.content_directory + # Add fixity section if requested + if fixity is not None and len(fixity) > 0: + for fixity_type in fixity: + inventory.add_fixity_type(fixity_type) + # inventory.add_version(metadata=metadata) # also sets head "v1" if digest_algorithm is None: digest_algorithm = DEFAULT_DIGEST_ALGORITHM @@ -75,15 +92,14 @@ def first_version(cls, *, if (content_directory is not None and content_directory != DEFAULT_CONTENT_DIRECTORY): inventory.content_directory = content_directory - self.inventory = inventory self.content_path_normalization = content_path_normalization return self @classmethod def next_version(cls, *, inventory, - objdir=None, srcdir=".", + spec_version=DEFAULT_SPEC_VERSION, metadata=None, content_path_normalization="uri", forward_delta=True, @@ -130,12 +146,17 @@ def next_version(cls, *, """ self = cls(srcdir=srcdir) self.inventory = inventory - self.objdir = objdir self.content_path_normalization = content_path_normalization self.forward_delta = forward_delta self.dedupe = dedupe self.old_digest_algorithm = old_digest_algorithm state = {} + if spec_version != inventory.spec_version: + # Check we are upgrading + if spec_version < inventory.spec_version: + raise NewVersionException("Must not create new version with lower spec version (%s) than last version (%s)" + % (spec_version, inventory.spec_version)) + inventory.spec_version = spec_version if carry_content_forward: state = copy.deepcopy(self.inventory.current_version.state) self.inventory.add_version(state=state, metadata=metadata) @@ -183,7 +204,7 @@ def _map_filepath(self, filepath): vfilepath = make_unused_filepath(vfilepath, used) return vfilepath - def add(self, src_path, logical_path, content_path=None): + def add(self, src_path, logical_path, content_path=None, src_path_has_prefix=False): # pylint: disable=unused-argument """Add a file to the new version. Arguments: @@ -203,9 +224,11 @@ def add(self, src_path, logical_path, content_path=None): NewVersionException: if the specifies content path is not allowed """ inventory = self.inventory + prefix = inventory.head + "/" + self.content_directory + "/" if content_path is None: + # src_path = src_path if src_path_has_prefix else prefix + src_path content_path = self._map_filepath(src_path) - elif not content_path.startswith(inventory.head + "/" + self.content_directory + "/"): + elif not content_path.startswith(prefix): raise NewVersionException("Bad content path %s, must start with version directory and content directory path elements" % (content_path)) elif content_path in inventory.content_paths: @@ -217,7 +240,10 @@ def add(self, src_path, logical_path, content_path=None): raise NewVersionException("Logical path %s already exists in new version %s" % (logical_path, inventory.head)) # Work out digest, add to state digest = file_digest(src_path, inventory.digest_algorithm, pyfs=self.src_fs) - inventory.current_version.state[digest] = [logical_path] + if digest in inventory.current_version.state_add_if_not_present(): + inventory.current_version.state[digest].append(logical_path) + else: + inventory.current_version.state[digest] = [logical_path] # Work out whether we already have this content in the current # and or previous versions, as a basis to work out whether we want # to add a content file @@ -236,7 +262,7 @@ def add(self, src_path, logical_path, content_path=None): or (in_current_version and not self.dedupe)): # Yes, we copy this file in... self.files_to_copy[src_path] = content_path - inventory.add_file(digest=digest, content_path=content_path) + inventory.add_file_to_manifest(digest=digest, content_path=content_path) def delete(self, logical_path): """Delete a logical path from this new version. @@ -280,3 +306,9 @@ def rename(self, old_logical_path, new_logical_path): inventory.current_version.state[digest] = [new_logical_path] except InventoryException: raise NewVersionException("Cannot rename logical path %s that does not exist in new version %s" % (old_logical_path, inventory.head)) + + def add_from_srcdir(self): + """Add all content from srcdir.""" + for src_path in sorted(self.src_fs.walk.files()): + src_path = os.path.relpath(src_path, "/") + self.add(src_path, src_path, src_path_has_prefix=False) diff --git a/ocfl/object.py b/ocfl/object.py index fdb2ec4..b447437 100755 --- a/ocfl/object.py +++ b/ocfl/object.py @@ -7,12 +7,10 @@ ``s3://`` filesystems. """ import copy -import hashlib import json import os.path import re import logging -from urllib.parse import quote as urlquote import fs import fs.path @@ -23,8 +21,7 @@ from .inventory import Inventory from .inventory_validator import InventoryValidator from .new_version import NewVersion -from .object_utils import make_unused_filepath, parse_version_directory, \ - ObjectException +from .object_utils import parse_version_directory, ObjectException from .pyfs import pyfs_openfs from .namaste import Namaste from .validator import Validator, ValidatorAbortException @@ -150,45 +147,6 @@ def copy_into_object(self, src_fs, srcfile, filepath, create_dirs=False): self.obj_fs.makedirs(dstpath) fs.copy.copy_file(src_fs, srcfile, self.obj_fs, filepath) - def map_filepath(self, filepath, vdir, used): - """Map source filepath to a content path within the object. - - FIXME - Remove this method in favor or NewVersion._map_filepath - - The purpose of the mapping might be normalization, sanitization, - content distribution, or something else. The mapping is set by the - content_path_normalization attribute where None indicates no mapping, the - source file name and path are preserved. - - Arguments: - filepath: the source filepath (possibly including directories) that - will be mapped into the object content path. - vdir: the current version directory name. - used: distionary used to check whether a given vfilepath has - been used already. - - Returns vfilepath, the version filepath for this content that starts - with `vdir/content_directory/`. - """ - if self.content_path_normalization == "uri": - filepath = urlquote(filepath) - # also encode any leading period to unhide files - if filepath[0] == ".": - filepath = "%2E" + filepath[1:] - elif self.content_path_normalization == "md5": - # Truncated MD5 hash of the _filepath_ as an illustration of diff - # paths for the specification. Not sure whether there should be any - # real application of this - filepath = hashlib.md5(filepath.encode("utf-8")).hexdigest()[0:16] - elif self.content_path_normalization is not None: - raise Exception("Unknown filepath normalization '%s' requested" % (self.content_path_normalization)) - vfilepath = fs.path.join(vdir, self.content_directory, filepath) # path relative to root, inc v#/content - # Check we don"t already have this vfilepath from many to one - # normalization, add suffix to distinguish if necessary - if vfilepath in used: - vfilepath = make_unused_filepath(vfilepath, used) - return vfilepath - def start_inventory(self): """Create inventory start with metadata from self. @@ -212,78 +170,7 @@ def start_inventory(self): self.fixity = None return inventory - def _add_version_to_inventory(self, *, - inventory, src_fs, src_dir, vdir, - metadata=None): - """Add to inventory data for new version based on files in srcdir. - - Changes the inventory data for this object but does change anything on - storage (inventory, inventory digest files, or content files). - - Arguments: - inventory: an Invenory object with data up to version (vdir-1) - which must include blocks for the manifest and versions. It - must also include a fixity block for every algorithm in - self.fixity - src_fs: pyfs filesystem where the new version files exist - src_dir: the version directory in src_fs that files are being - added from - vdir: the version directory name of the version being added - metadata: a VersionMetadata object with any metadata for this - version - - Returns: - dict: manifest_to_srcfile, a dict mapping from paths in manifest to - the path of the source file in src_fs that should be include in - the content for this new version. - """ - state = {} # state for this new version - manifest = inventory.manifest - digests_in_version = {} - manifest_to_srcfile = {} - # Go through all files to find new files in manifest and state for this version - for filepath in sorted(src_fs.walk.files(src_dir)): - sfilepath = os.path.relpath(filepath, src_dir) - vfilepath = self.map_filepath(sfilepath, vdir, used=manifest_to_srcfile) - digest = file_digest(filepath, self.digest_algorithm, pyfs=src_fs) - # Always add file to state - if digest not in state: - state[digest] = [] - state[digest].append(sfilepath) - if self.forward_delta and digest in manifest: - # We already have this content in a previous version and we are using - # forward deltas so do not need to copy in this one - pass - else: - # This is a new digest so an addition in this version and - # we save the information for later includes - if digest not in digests_in_version: - digests_in_version[digest] = [vfilepath] - elif not self.dedupe: - digests_in_version[digest].append(vfilepath) - manifest_to_srcfile[vfilepath] = filepath - - # Add any new digests in this version to the manifest - for digest, paths in digests_in_version.items(): - if digest not in manifest: - manifest[digest] = paths - else: - for p in paths: - manifest[digest].append(p) - # Add extra fixity entries if required - if self.fixity is not None: - for fixity_type in self.fixity: - for digest, vfilepaths in digests_in_version.items(): - for vfilepath in vfilepaths: - fixity_digest = file_digest(manifest_to_srcfile[vfilepath], fixity_type, pyfs=src_fs) - inventory.add_fixity_data(digest_algorithm=fixity_type, - digest=fixity_digest, - filepath=vfilepath) - # Add this new version to inventory (also updates head) - inventory.add_version(vdir=vdir, metadata=metadata, state=state) - return manifest_to_srcfile - - def build_inventory(self, src_fs, versions_metadata=None): + def version_dirs_and_metadata(self, src_fs, versions_metadata=None): """Generate an OCFL inventory from a set of source files. Arguments: @@ -300,7 +187,6 @@ def build_inventory(self, src_fs, versions_metadata=None): """ if versions_metadata is None: versions_metadata = {} - inventory = self.start_inventory() # Find the versions versions = {} for vdir in src_fs.listdir("/"): @@ -312,18 +198,19 @@ def build_inventory(self, src_fs, versions_metadata=None): except ObjectException: # Ignore directories that are not valid version dirs pass + # Do we have a valid sequence of versions? + n = 1 + for vn in sorted(versions.keys()): + if vn != n: + raise ObjectException("Bad version sequence (%s)" % (str(sorted(versions.keys())))) + n += 1 # Go through versions in order building versions array, deduping # files to include if selected for vn in sorted(versions.keys()): vdir = versions[vn] # Do we have metadata for this version? Else empty. metadata = versions_metadata.get(vn, VersionMetadata()) - manifest_to_srcfile = self._add_version_to_inventory(inventory=inventory, - src_fs=src_fs, - src_dir=vdir, - vdir=vdir, - metadata=metadata) - yield (vdir, inventory, manifest_to_srcfile) + yield (vdir, metadata) def object_declaration_object(self): """NAMASTE object declaration Namaste object.""" @@ -397,13 +284,33 @@ def build(self, srcdir, versions_metadata=None, objdir=None): src_fs = pyfs_openfs(srcdir) inventory = None # Create each version of the object - for (vdir, inventory, manifest_to_srcfile) in self.build_inventory(src_fs, versions_metadata): + for (vdir, metadata) in self.version_dirs_and_metadata(src_fs, versions_metadata): + if vdir == "v1": + nv = NewVersion.first_version(srcdir=os.path.join(srcdir, vdir), + identifier=self.id, + spec_version=self.spec_version, + digest_algorithm=self.digest_algorithm, + content_directory=self.content_directory, + metadata=metadata, + fixity=self.fixity, + content_path_normalization=self.content_path_normalization) + else: + nv = NewVersion.next_version(inventory=inventory, + srcdir=os.path.join(srcdir, vdir), + metadata=metadata, + content_path_normalization=self.content_path_normalization, + forward_delta=self.forward_delta, + dedupe=self.dedupe, + carry_content_forward=False) + # Add content, everything in srcdir + nv.add_from_srcdir() + inventory = nv.inventory num_versions += 1 if objdir is not None: self.write_inventory_and_sidecar(inventory, vdir) # Copy files into this version - for (path, srcfile) in manifest_to_srcfile.items(): - self.copy_into_object(src_fs, srcfile, path, create_dirs=True) + for (srcpath, objpath) in nv.files_to_copy.items(): + self.copy_into_object(nv.src_fs, srcpath, objpath, create_dirs=True) # Finally populate the object root if objdir is not None: # Write object declaration, inventory and sidecar @@ -429,26 +336,29 @@ def create(self, srcdir, metadata=None, objdir=None): """ if self.id is None: raise ObjectException("Identifier is not set!") - src_fs = pyfs_openfs(srcdir) - if objdir is not None: - self.open_obj_fs(objdir, create=True) - inventory = self.start_inventory() - vdir = "v1" - manifest_to_srcfile = self._add_version_to_inventory(inventory=inventory, - src_fs=src_fs, - src_dir="", vdir=vdir, - metadata=metadata) + nv = NewVersion.first_version(srcdir=srcdir, + identifier=self.id, + spec_version=self.spec_version, + digest_algorithm=self.digest_algorithm, + content_directory=self.content_directory, + metadata=metadata, + fixity=self.fixity, + content_path_normalization=self.content_path_normalization) + # Add content, everything in srcdir + nv.add_from_srcdir() + inventory = nv.inventory + # If objdir is not set then don't write anything, just return inventory if objdir is None: return inventory # Write out v1 object - self.write_inventory_and_sidecar(inventory, vdir) + self.open_obj_fs(objdir, create=True) + self.write_inventory_and_sidecar(inventory, "v1") # Write object root with object declaration, inventory and sidecar self.write_object_declaration() self.write_inventory_and_sidecar(inventory) # Write version files - for path in inventory.content_paths: - srcfile = manifest_to_srcfile[path] - self.copy_into_object(src_fs, srcfile, path, create_dirs=True) + for (srcpath, objpath) in nv.files_to_copy.items(): + self.copy_into_object(nv.src_fs, srcpath, objpath, create_dirs=True) logging.info("Created OCFL object %s in %s", self.id, objdir) return inventory @@ -472,21 +382,17 @@ def add_version_with_content(self, objdir="", srcdir=None, metadata=None): between versions. """ print("### " + str(metadata)) - new_version = self.start_new_version(objdir=objdir, - srcdir=srcdir, - digest_algorithm=self.digest_algorithm, - fixity=self.fixity, - metadata=metadata, - carry_content_forward=False) - # Add and remove any contents by comparing srcdir with existing state and manifest + nv = self.start_new_version(objdir=objdir, + srcdir=srcdir, + digest_algorithm=self.digest_algorithm, + fixity=self.fixity, + metadata=metadata, + carry_content_forward=False) + # Add files if srcdir is set if srcdir is not None: - src_fs = pyfs_openfs(srcdir) - for src_path in sorted(src_fs.walk.files()): - src_path = os.path.relpath(src_path, "/") - obj_path = self.map_filepath(src_path, new_version.inventory.head, used={}) - new_version.add(src_path, src_path, obj_path) + nv.add_from_srcdir() # Write the new version - return self.write_new_version(new_version) + return self.write_new_version(nv) def start_new_version(self, *, objdir=None, @@ -582,7 +488,6 @@ def start_new_version(self, *, inventory.version(vdir).state = state inventory.manifest = manifest return NewVersion.next_version(inventory=inventory, - objdir=objdir, srcdir=srcdir, metadata=metadata, content_path_normalization=self.content_path_normalization, @@ -614,7 +519,7 @@ def write_new_version(self, new_version): # Write inventory in both root and head version self.write_inventory_and_sidecar(inventory, inventory.head) self.write_inventory_and_sidecar(inventory) - logging.info("Updated OCFL object %s in %s by adding %s", inventory.id, new_version.objdir, inventory.head) + logging.info("Updated OCFL object %s by adding %s", inventory.id, inventory.head) return inventory def tree(self, objdir): diff --git a/tests/test_inventory.py b/tests/test_inventory.py index 5259bf8..bb7b2b8 100644 --- a/tests/test_inventory.py +++ b/tests/test_inventory.py @@ -117,18 +117,18 @@ def test_add_version(self): self.assertEqual(v.message, "and again") self.assertEqual(inv.head, "v0002") - def test_add_file(self): - """Test add_file method.""" + def test_add_file_to_manifest(self): + """Test add_file_to_manifest method.""" inv = Inventory() - inv.add_file(digest="abc123", content_path="file1") + inv.add_file_to_manifest(digest="abc123", content_path="file1") self.assertEqual(inv.manifest["abc123"], ["file1"]) - inv.add_file(digest="abc123", content_path="file2") + inv.add_file_to_manifest(digest="abc123", content_path="file2") self.assertEqual(inv.manifest["abc123"], ["file1", "file2"]) - inv.add_file(digest="def456", content_path="file3") + inv.add_file_to_manifest(digest="def456", content_path="file3") self.assertEqual(inv.manifest["abc123"], ["file1", "file2"]) self.assertEqual(inv.manifest["def456"], ["file3"]) # Errors - self.assertRaises(InventoryException, inv.add_file, + self.assertRaises(InventoryException, inv.add_file_to_manifest, digest="anything", content_path="file1") def test_getter_properties_minimal_example(self): diff --git a/tests/test_object.py b/tests/test_object.py index fde6510..19651f0 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -57,138 +57,22 @@ def test04_start_inventory(self): self.assertEqual(inventory.content_directory, "stuff") self.assertEqual(inventory.digest_algorithm, "sha512") - def test05__add_version_to_inventory(self): - """Test _add_version_to_inventory method.""" - self.maxDiff = None - oo = Object(digest_algorithm="md5") - inventory = Inventory({'manifest': {}, 'versions': {}}) - with open('fixtures/1.0/content/spec-ex-full/v1_inventory.json', 'r', encoding="utf-8") as fh: - v_inventory = json.load(fh) - metadata = VersionMetadata(inventory=v_inventory, version='v1') - src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full') - oo._add_version_to_inventory(inventory=inventory, src_fs=src_fs, - src_dir='v1', vdir='v1', metadata=metadata) - self.assertEqual(inventory.head, 'v1') - self.assertEqual(inventory.manifest, - {'184f84e28cbe75e050e9c25ea7f2e939': ['v1/content/foo/bar.xml'], - 'c289c8ccd4bab6e385f5afdd89b5bda2': ['v1/content/image.tiff'], - 'd41d8cd98f00b204e9800998ecf8427e': ['v1/content/empty.txt']}) - self.assertEqual(inventory.versiondata("v1"), - {'created': '2018-01-01T01:01:01Z', - 'message': 'Initial import', - 'state': { - '184f84e28cbe75e050e9c25ea7f2e939': ['foo/bar.xml'], - 'c289c8ccd4bab6e385f5afdd89b5bda2': ['image.tiff'], - 'd41d8cd98f00b204e9800998ecf8427e': ['empty.txt']}, - 'user': {'address': 'alice@example.com', 'name': 'Alice'}}) - self.assertNotIn('fixity', inventory.data) - # Now add second version to check forward delta - with open('fixtures/1.0/content/spec-ex-full/v2_inventory.json', 'r', encoding="utf-8") as fh: - v_inventory = json.load(fh) - metadata = VersionMetadata(inventory=v_inventory, version='v2') - src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full/v2') - oo._add_version_to_inventory(inventory=inventory, src_fs=src_fs, - src_dir='', vdir='v2', metadata=metadata) - self.assertEqual(inventory.head, 'v2') - self.assertEqual(inventory.manifest, - {'184f84e28cbe75e050e9c25ea7f2e939': ['v1/content/foo/bar.xml'], - '2673a7b11a70bc7ff960ad8127b4adeb': ['v2/content/foo/bar.xml'], - 'c289c8ccd4bab6e385f5afdd89b5bda2': ['v1/content/image.tiff'], - 'd41d8cd98f00b204e9800998ecf8427e': ['v1/content/empty.txt']}) - self.assertEqual(inventory.versiondata('v2'), - {'created': '2018-02-02T02:02:02Z', - 'message': 'Fix bar.xml, remove image.tiff, add empty2.txt', - 'state': { - '2673a7b11a70bc7ff960ad8127b4adeb': ['foo/bar.xml'], - 'd41d8cd98f00b204e9800998ecf8427e': ['empty.txt', 'empty2.txt']}, - 'user': {'address': 'bob@example.com', 'name': 'Bob'}}) - # Now with fixity - oo = Object(digest_algorithm="md5", fixity=['sha1']) - inventory = Inventory({'manifest': {}, 'versions': {}, 'fixity': {'sha1': {}}}) - md1 = VersionMetadata() - with open('fixtures/1.0/content/spec-ex-full/v1_inventory.json', 'r', encoding="utf-8") as fh: - v_inventory = json.load(fh) - md1 = VersionMetadata(inventory=v_inventory, version='v1') - src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full/v1') - manifest_to_srcfile = oo._add_version_to_inventory(inventory=inventory, - src_fs=src_fs, - src_dir='', - vdir='v1', - metadata=md1) - self.assertEqual(manifest_to_srcfile, { - 'v1/content/image.tiff': 'image.tiff', - 'v1/content/empty.txt': 'empty.txt', - 'v1/content/foo/bar.xml': 'foo/bar.xml' - }) - self.assertEqual(len(inventory.fixity['sha1']), 3) - # Test dedupe=False and forward_delta=False settings - oo = Object(dedupe=False, forward_delta=False, fixity=['md5']) - inventory = Inventory({'manifest': {}, 'versions': {}, 'fixity': {'md5': {}}}) - md1 = VersionMetadata(inventory={ - "id": "http://example.org/dedupe_content", - "versions": { - "v1": { - "created": "2020-07-15T17:40:00", - "message": "Initial import", - "user": { - "address": "mailto:alice@example.com", - "name": "Alice" - } - } - }}, version='v1') - src_fs = fs.open_fs('extra_fixtures/content/dedupe_content') - manifest_to_srcfile = oo._add_version_to_inventory(inventory=inventory, - src_fs=src_fs, - src_dir='v1', - vdir='v1', - metadata=md1) - # Because of dedupe=False we will have multiple copies of empty files - self.assertEqual(manifest_to_srcfile, { - 'v1/content/empty1.txt': 'v1/empty1.txt', - 'v1/content/empty2.txt': 'v1/empty2.txt', - 'v1/content/empty3.txt': 'v1/empty3.txt'}) - self.assertEqual(inventory.fixity['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [ - "v1/content/empty1.txt", "v1/content/empty2.txt", "v1/content/empty3.txt"]}) - # Add a second version which will test for forward_delta=False - md2 = VersionMetadata(inventory={ - "id": "http://example.org/dedupe_content", - "versions": { - "v2": { - "created": "2020-07-15T17:54:00", - "message": "Update", - "user": { - "address": "mailto:bob@example.com", - "name": "Bob" - } - } - }}, version='v2') - manifest_to_srcfile = oo._add_version_to_inventory(inventory=inventory, - src_fs=src_fs, - src_dir='v2', - vdir='v2', - metadata=md2) - # Because of forward_delta=False we will have an additional copy of the empty file - self.assertEqual(manifest_to_srcfile, { - 'v2/content/empty4.txt': 'v2/empty4.txt'}) - self.assertEqual(inventory.fixity['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [ - "v1/content/empty1.txt", "v1/content/empty2.txt", - "v1/content/empty3.txt", "v2/content/empty4.txt"]}) - - def test06_build_inventory(self): - """Test build_inventory.""" + def test06_version_dirs_and_metadata(self): + """Test version_dirs_and_metadata.""" oo = Object(digest_algorithm="md5", spec_version='1.0') src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full') - inventory = None - for (dummy_vdir, inventory, dummy_manifest_to_srcfile) in oo.build_inventory(src_fs): - pass - self.assertEqual(inventory.data['type'], 'https://ocfl.io/1.0/spec/#inventory') - self.assertEqual(inventory.head, 'v3') - self.assertEqual(inventory.manifest, - {'184f84e28cbe75e050e9c25ea7f2e939': ['v1/content/foo/bar.xml'], - '2673a7b11a70bc7ff960ad8127b4adeb': ['v2/content/foo/bar.xml'], - 'c289c8ccd4bab6e385f5afdd89b5bda2': ['v1/content/image.tiff'], - 'd41d8cd98f00b204e9800998ecf8427e': ['v1/content/empty.txt']}) - self.assertEqual(len(inventory.version_numbers), 3) + vdirs = [] + metadatas = [] + for (vdir, metadata) in oo.version_dirs_and_metadata(src_fs): + vdirs.append(vdir) + metadatas.append(metadata) + self.assertEqual(vdirs, ["v1", "v2", "v3"]) + self.assertEqual(metadatas[0].created, None) # FIXME - Was the intention to read from the json files alongside version directories? + self.assertEqual(metadatas[0].message, None) + self.assertEqual(metadatas[0].name, None) + self.assertEqual(metadatas[0].address, None) + self.assertEqual(metadatas[1].created, None) + self.assertEqual(metadatas[2].created, None) def test07_write_object_declaration(self): """Test write_object_declaration.""" @@ -259,6 +143,7 @@ def test10_create(self): 'inventory.json', 'inventory.json.sha512', 'v1'])) self.assertEqual(inv.head, "v1") + self.assertEqual(len(inv.manifest), 3) self.assertEqual(len(inv.version("v1").state), 3) def test11_add_version_with_content(self): @@ -374,23 +259,6 @@ def test_parse_inventory(self): oo.open_obj_fs('fixtures/1.0/bad-objects/E036_no_id') self.assertRaises(ObjectException, oo.parse_inventory) - def test_map_filepath(self): - """Test map_filepath method.""" - oo = Object() - # default is uri - self.assertEqual(oo.map_filepath('a', 'v1', {}), 'v1/content/a') - self.assertEqual(oo.map_filepath('.a?', 'v1', {}), 'v1/content/%2Ea%3F') - self.assertEqual(oo.map_filepath('a', 'v1', {'v1/content/a': True}), 'v1/content/a__2') - # md5 - oo = Object() - oo.content_path_normalization = 'md5' - self.assertEqual(oo.map_filepath('a', 'v1', {}), 'v1/content/0cc175b9c0f1b6a8') - self.assertEqual(oo.map_filepath('a', 'v1', {'v1/content/0cc175b9c0f1b6a8': True}), 'v1/content/0cc175b9c0f1b6a8__2') - # error case - oo = Object() - oo.content_path_normalization = '???' - self.assertRaises(Exception, oo.map_filepath, 'a', 'v1', {}) - def test_extract(self): """Test extract method.""" tempdir = tempfile.mkdtemp(prefix='test_extract') From 244011e87e2250681bdd03755f369cb664843725 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Mon, 9 Dec 2024 16:45:31 -0500 Subject: [PATCH 2/8] Distinguish titles --- docs/fixtures.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/fixtures.rst b/docs/fixtures.rst index 4890e93..6c6549a 100644 --- a/docs/fixtures.rst +++ b/docs/fixtures.rst @@ -8,21 +8,21 @@ Official `OCFL Fixtures * Specification examples: - * Section `5.1 Minimal OCFL Object `_ is good + * Section `5.1 Minimal OCFL v1.1 Object `_ is good - * Section `5.2 Versioned OCFL Object `_ is valid and has fixture object `1.1/good-objects/spec-ex-full `_ + * Section `5.2 Versioned OCFL v1.1 Object `_ is valid and has fixture object `1.1/good-objects/spec-ex-full `_ - * Section `5.3 Different Logical and Content Paths in an OCFL Object `_ is valid but should generate a warning because the ``message`` property is missing. + * Section `5.3 Different Logical and Content Paths in an OCFL v1.1 Object `_ is valid but should generate a warning because the ``message`` property is missing. * OCFL v1.0 * Specification examples: - * Section `5.1 Minimal OCFL Object `_ is valid but should generate a warning because the ``address`` is not a URI + * Section `5.1 Minimal OCFL v1.0 Object `_ is valid but should generate a warning because the ``address`` is not a URI - * Section `5.2 Versioned OCFL Object `_ is valid and has fixture object `1.0/good-objects/spec-ex-full `_ + * Section `5.2 Versioned OCFL v1.0 Object `_ is valid and has fixture object `1.0/good-objects/spec-ex-full `_ - * Section `5.3 Different Logical and Content Paths in an OCFL Object `_ is valid but should generate warnings because the ``message`` and the ``user`` ``name`` and ``address`` properties are missing. + * Section `5.3 Different Logical and Content Paths in an OCFL v1.0 Object `_ is valid but should generate warnings because the ``message`` and the ``user`` ``name`` and ``address`` properties are missing. Additions from `ocfl-py `_ are in the ``extra_fixtures`` directory From 6dba56d20a1fb00c844eb5b7219d3ab29e1c3620 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Tue, 10 Dec 2024 17:11:11 -0500 Subject: [PATCH 3/8] Doc improvements --- docs/conformance.rst | 16 +++++++++++----- docs/fixtures.rst | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/conformance.rst b/docs/conformance.rst index 4a19cdd..cc0f111 100644 --- a/docs/conformance.rst +++ b/docs/conformance.rst @@ -1,17 +1,23 @@ Conformance =========== -``ocfl-py`` is designed to follow closely the OCFL specifications. +``ocfl-py`` is designed to follow closely the OCFL specifications, which +includes reporting errors in response to violations of MUST and MUST NOT +requirements in the specification, and warnings in response to not following +SHOULD or SHOULD NOT requirements in the specification (see the `Conformance +`_ section). The following is list of error and warning codes implemented but the ``ocfl-py`` -package. The complete list `OCFL Validation Codes -`_ is provided as part of the -OCFL specifications. +package: .. toctree:: :maxdepth: 1 validation_status -(The validation status table is generated by the `ocfl-validate.py` script +The complete list `OCFL Validation Codes +`_ is provided as part of the +OCFL specifications. + +(The validation status table is generated by the `extract_codes.py` script run against the ``ocfl`` module.) diff --git a/docs/fixtures.rst b/docs/fixtures.rst index 6c6549a..fed834b 100644 --- a/docs/fixtures.rst +++ b/docs/fixtures.rst @@ -24,5 +24,5 @@ Official `OCFL Fixtures * Section `5.3 Different Logical and Content Paths in an OCFL v1.0 Object `_ is valid but should generate warnings because the ``message`` and the ``user`` ``name`` and ``address`` properties are missing. -Additions from `ocfl-py -`_ are in the ``extra_fixtures`` directory +Additions from ``ocfl-py`` are in the `extra_fixtures +`_ directory. From 4ce68db86a3ea8ae264c366dc9f1096b554239a9 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Wed, 11 Dec 2024 22:44:28 -0500 Subject: [PATCH 4/8] Docs and add metadata accessors --- ocfl/new_version.py | 118 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 4 deletions(-) diff --git a/ocfl/new_version.py b/ocfl/new_version.py index 24788c7..0d2928b 100644 --- a/ocfl/new_version.py +++ b/ocfl/new_version.py @@ -31,7 +31,8 @@ def __init__(self, *, srcdir="."): Arguments: srcdir (str): source directory name for files that will be added - to this new version. May be a pyfs filesystem specificarion + to this new version. May be a pyfs filesystem specification. + Default is "." The default constructor is not expected to be used directly, see NewVersion.first_version(...) and NewVersion.next_version(..) for the @@ -62,13 +63,65 @@ def first_version(cls, *, """Start the first version for this object. Arguments: + srcdir (str): source directory name for files that will be added + to this new version. May be a pyfs filesystem specification identifier (str): identifier of the object to be created - digest_algorithm (str or None): - content_directort (str or None): + spec_version (str): the specification version that the new version + should be created in accord with. Defaults to + ocfl.constants.DEFAULT_SPEC_VERSION + digest_algorithm (str or None): the digest algorithm to use for + content addressing. If None (default) then will use the value + ocfl.constants.DEFAULT_DIGEST_ALGORITHM + content_directory (str or None): the content directory name. If + None (default) then will use the value "content" (as set in + ocfl.constants.DEFAULT_CONTENT_DIRECTORY) + metadata (ocfl.VersionMetadata or None): if an ocfl.VersionMetadata + object is provided then this is used to set the metadata of the + new version. The setters .created, .message, .user_address and + .user_name may alternatively be used + fixity (None or list of str): If a list then will be interpretted as + the set of fixity digest types to be added for all content files + in this version content_path_normalization (str): the path normalization strategy to use with content paths when files are added to this object (default "uri") + Example use: + + >>> import ocfl + >>> nv = ocfl.NewVersion.first_version(identifier="http://example.org/minimal") + >>> nv.add("fixtures/1.1/good-objects/spec-ex-minimal/v1/content/file.txt", "file.txt") + >>> nv.created = "2018-10-02T12:00:00Z" + >>> nv.message = "One file" + >>> nv.user_address = "mailto:alice@example.org" + >>> nv.user_name = "Alice" + >>> print(nv.inventory.as_json()) + { + "digestAlgorithm": "sha512", + "head": "v1", + "id": "http://example.org/minimal", + "manifest": { + "7545b8720a601235067473f2c87f43461f5c147fb622d51bfcdcda05e0773c96e9f922f4d88d371bb7f87793b655b9e1c3b8bbca35f2950c5c87eda955179f67": [ + "v1/content/fixtures/1.1/good-objects/spec-ex-minimal/v1/content/file.txt" + ] + }, + "type": "https://ocfl.io/1.1/spec/#inventory", + "versions": { + "v1": { + "created": "2018-10-02T12:00:00Z", + "message": "One file", + "state": { + "7545b8720a601235067473f2c87f43461f5c147fb622d51bfcdcda05e0773c96e9f922f4d88d371bb7f87793b655b9e1c3b8bbca35f2950c5c87eda955179f67": [ + "file.txt" + ] + }, + "user": { + "address": "mailto:alice@example.org", + "name": "Alice" + } + } + } + } """ self = cls(srcdir=srcdir) inventory = Inventory() @@ -115,16 +168,33 @@ def next_version(cls, *, Arguments: inventory (ocfl.Inventory): inventory that we will modify to build the new version. + srcdir (str): source directory name for files that will be added + to this new version. May be a pyfs filesystem specification metadata (ocfl.VersionMetadata or None): Either a VersionMetadata object to set the metadata for the new version, None to not set metadata content_path_normalization (str): the path normalization strategy to use with content paths when files are added to this object (default "uri") + forward_delta (bool): True (default) to use forward delta strategy + for files in the new version, meaning that only files not + present in a previous version will be added in the content + directory of this new version. If False the all files that are + part of this version's state will be added in the content + directory + dedupe (bool): True (defult) to deduplicate files within this + version, meaning that only one copy of a given file will be + included in the content directory even if there are multiple + copies in the new version state. If False then will store + multiple copies carry_content_forward (bool): True to carry forward the state from the last current version as a starting point. False to start with empty version state. - + old_digest_algorithm (str): Can be used to record the digest + algorithm of the previous version so that the root inventory + sidecar is cleaned up when writing the new inventory in the + object root. The value is not used within NewVerion code. + Default is None Example use: @@ -312,3 +382,43 @@ def add_from_srcdir(self): for src_path in sorted(self.src_fs.walk.files()): src_path = os.path.relpath(src_path, "/") self.add(src_path, src_path, src_path_has_prefix=False) + + @property + def created(self): + """Created string for this version.""" + return self.inventory.current_version.created + + @created.setter + def created(self, value): + """Set created string for this version.""" + self.inventory.current_version.created = value + + @property + def message(self): + """Message string for this version.""" + return self.inventory.current_version.message + + @message.setter + def message(self, value): + """Set message string for this version.""" + self.inventory.current_version.message = value + + @property + def user_address(self): + """User_address string for this version.""" + return self.inventory.current_version.user_address + + @user_address.setter + def user_address(self, value): + """Set user_address string for this version.""" + self.inventory.current_version.user_address = value + + @property + def user_name(self): + """user_name string for this version.""" + return self.inventory.current_version.user_name + + @user_name.setter + def user_name(self, value): + """Set user_name string for this version.""" + self.inventory.current_version.user_name = value From 2a4f6205ea9e67c01a00e4ae4af0795fe540f7ea Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Wed, 11 Dec 2024 22:59:50 -0500 Subject: [PATCH 5/8] Add test_add_fixity_data --- tests/test_inventory.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_inventory.py b/tests/test_inventory.py index bb7b2b8..6791282 100644 --- a/tests/test_inventory.py +++ b/tests/test_inventory.py @@ -131,6 +131,21 @@ def test_add_file_to_manifest(self): self.assertRaises(InventoryException, inv.add_file_to_manifest, digest="anything", content_path="file1") + def test_add_fixity_data(self): + """Test add_fixity_data method.""" + inv = Inventory() + self.assertEqual(inv.fixity, {}) + self.assertRaises(KeyError, inv.add_fixity_data, "fx1", "digest1", "path1") + inv.add_fixity_type("fx1") + self.assertEqual(inv.fixity, {"fx1": {}}) + inv.add_fixity_data("fx1", "digest1", "path1") + self.assertEqual(inv.fixity, {"fx1": {"digest1": ["path1"]}}) + inv.add_fixity_data("fx1", "digest1", "path2") + self.assertEqual(inv.fixity, {"fx1": {"digest1": ["path1", "path2"]}}) + inv.add_fixity_data("fx1", "digest2", "path3") + self.assertEqual(inv.fixity, {"fx1": {"digest1": ["path1", "path2"], + "digest2": ["path3"]}}) + def test_getter_properties_minimal_example(self): """Test read of fixture and extract properties.""" inv = Inventory(filepath="fixtures/1.1/good-objects/minimal_one_version_one_file/inventory.json") From 8c636a1c952702a9106db9b336e12611429c6376 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Wed, 11 Dec 2024 23:08:01 -0500 Subject: [PATCH 6/8] Add test for type not string --- .../E038_type_not_str/0=ocfl_object_1.1 | 1 + .../bad-objects/E038_type_not_str/inventory.json | 15 +++++++++++++++ .../E038_type_not_str/inventory.json.sha512 | 1 + .../E038_type_not_str/v1/inventory.json | 15 +++++++++++++++ .../E038_type_not_str/v1/inventory.json.sha512 | 1 + ocfl/data/validation-errors.json | 6 ++++++ ocfl/inventory_validator.py | 2 +- tests/test_validator.py | 1 + 8 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 extra_fixtures/1.1/bad-objects/E038_type_not_str/0=ocfl_object_1.1 create mode 100644 extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json create mode 100644 extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json.sha512 create mode 100644 extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json create mode 100644 extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json.sha512 diff --git a/extra_fixtures/1.1/bad-objects/E038_type_not_str/0=ocfl_object_1.1 b/extra_fixtures/1.1/bad-objects/E038_type_not_str/0=ocfl_object_1.1 new file mode 100644 index 0000000..14705cb --- /dev/null +++ b/extra_fixtures/1.1/bad-objects/E038_type_not_str/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json b/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json new file mode 100644 index 0000000..8540dd2 --- /dev/null +++ b/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json @@ -0,0 +1,15 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "http://example.org/E038_type_not_str", + "manifest": { }, + "type": [ "THIS IS AN ARRAY NOT JUST A STRING" ], + "versions": { + "v1": { + "created": "2019-01-01T02:03:04Z", + "message": "One version and no content", + "state": { }, + "user": { "address": "mailto:Person_A@example.org", "name": "Person A" } + } + } +} diff --git a/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json.sha512 b/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json.sha512 new file mode 100644 index 0000000..cdd9ddf --- /dev/null +++ b/extra_fixtures/1.1/bad-objects/E038_type_not_str/inventory.json.sha512 @@ -0,0 +1 @@ +6cdd5082fe06fa39d31627c05c09dcab4e32dc4d00caaf4c436129402b37daae1e938b3e0e3ee1b02dd09764fd3fd7e4ffa918657f4df57a7da879382df537f8 inventory.json diff --git a/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json b/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json new file mode 100644 index 0000000..8540dd2 --- /dev/null +++ b/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json @@ -0,0 +1,15 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "http://example.org/E038_type_not_str", + "manifest": { }, + "type": [ "THIS IS AN ARRAY NOT JUST A STRING" ], + "versions": { + "v1": { + "created": "2019-01-01T02:03:04Z", + "message": "One version and no content", + "state": { }, + "user": { "address": "mailto:Person_A@example.org", "name": "Person A" } + } + } +} diff --git a/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json.sha512 b/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json.sha512 new file mode 100644 index 0000000..cdd9ddf --- /dev/null +++ b/extra_fixtures/1.1/bad-objects/E038_type_not_str/v1/inventory.json.sha512 @@ -0,0 +1 @@ +6cdd5082fe06fa39d31627c05c09dcab4e32dc4d00caaf4c436129402b37daae1e938b3e0e3ee1b02dd09764fd3fd7e4ffa918657f4df57a7da879382df537f8 inventory.json diff --git a/ocfl/data/validation-errors.json b/ocfl/data/validation-errors.json index 37dcd81..dd31514 100644 --- a/ocfl/data/validation-errors.json +++ b/ocfl/data/validation-errors.json @@ -220,6 +220,12 @@ "en": "OCFL Object %s inventory `type` attribute has an unsupported specification version number (%s), will proceed as if using version %s" } }, + "E038d": { + "params": ["where"], + "description": { + "en": "OCFL Object %s inventory `type` attribute does not have a string value" + } + }, "E040": { "params": ["where", "got", "expected"], "description": { diff --git a/ocfl/inventory_validator.py b/ocfl/inventory_validator.py index 2abc6d4..e697acf 100644 --- a/ocfl/inventory_validator.py +++ b/ocfl/inventory_validator.py @@ -128,7 +128,7 @@ def validate(self, inventory, force_spec_version=None): if "type" not in inventory: self._error("E036b") elif not isinstance(inventory["type"], str): - self._error("E999") + self._error("E038d") elif (force_spec_version and inventory["type"] != "https://ocfl.io/" + force_spec_version + "/spec/#inventory"): self._error("E038a", expected="https://ocfl.io/" + force_spec_version + "/spec/#inventory", got=inventory["type"]) diff --git a/tests/test_validator.py b/tests/test_validator.py index f24f95e..0eefe8a 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -122,6 +122,7 @@ def test02_bad_v1_1(self): "E036_no_id": ["E036a"], "E036_no_head": ["E036d"], "E037_inconsistent_id": ["E037b"], + "E038_type_not_str": ["E038d"], "E040_head_not_most_recent": ["E040"], "E040_wrong_head_doesnt_exist": ["E040"], "E040_wrong_head_format": ["E040"], From 971e255fb474d2fe9ed8acb2ca36ffb5f9b3f66e Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Wed, 11 Dec 2024 23:13:14 -0500 Subject: [PATCH 7/8] Test status_str() and str --- tests/test_validation_logger.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_validation_logger.py b/tests/test_validation_logger.py index f5aa83a..bb74fe5 100644 --- a/tests/test_validation_logger.py +++ b/tests/test_validation_logger.py @@ -77,3 +77,14 @@ def test_warning(self): vl.warning("W333") self.assertEqual(vl.num_warnings, 1) self.assertIn("Unknown warning: W333 - params ({})", vl.messages[-1]) + + def test_status_str_and_str(self): + """Test status_str method and __str__.""" + vl = ValidationLogger() + self.assertEqual(vl.status_str(), "") + vl.error("E991") + vl.error("E992") + self.assertEqual(vl.status_str(), + "Unknown error: E991 - params ({})\nUnknown error: E992 - params ({})") + # str(vl) is just a call to vl.status_str() + self.assertEqual(vl.status_str(), str(vl)) From f5f255f4b2d2388cee22d6796289a8d681363ff3 Mon Sep 17 00:00:00 2001 From: Simeon Warner Date: Thu, 12 Dec 2024 09:22:45 -0500 Subject: [PATCH 8/8] Docs --- ocfl/inventory.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ocfl/inventory.py b/ocfl/inventory.py index 10a074a..b776869 100644 --- a/ocfl/inventory.py +++ b/ocfl/inventory.py @@ -1,5 +1,11 @@ """OCFL Inventory and Version. +The Inventory class provides storage for inventory data and mehtods to +conveniently access and manipulate it. The associated Version class provides +methods to access and manipulate information about a specific object version. +Neither of these classes interact with object content, see ocfl.NewVersion +and ocfl.Object. + The storage mechanism for the inventory data is the python dict() structure resulting from reading the inventory JSON file and suitable for writing an inventory JSON file. Here we provide convenient @@ -91,7 +97,11 @@ class Inventory(): # pylint: disable=too-many-public-methods attribute is a string that is not set in the underlying data, else the value if it is. In the cases that the normal return value would be an array or a dict, then and empty array or empty dict are returned - if not present in the underlying data. + if not present in the underlying data. In some cases, additional methods + with a suffix ``_add_if_not_present`` are provided so that assignements + will work for previously missing attributes. See, for example, the + ``manifest`` property and the corresponding + ``manifest_add_if_not_present()`` method. Attributes: data: dict that is the top level JSON object of the parsed JSON @@ -559,9 +569,9 @@ def normalize_digests(self, digest_algorithm=None): class Version(): """Version class to represent version information in an Inventory. - The class stores only pointers to the appropriate inventory and - version directory with the versions block. These are used to access - data in the inventory. + The class stores only pointers to the appropriate ocfl.Inventory object + and the version directory key within the versions block. These are used + to access and manipulate data for a specific version in the inventory. """ def __init__(self, inv, vdir):