From b9e2d1fa52f08887926eedf111747b396fe4b5fe Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 5 Jul 2019 13:01:11 -0400 Subject: [PATCH 01/10] insert logic for source and destination dirs for bag-in-place code --- bagit.py | 85 +++++++++++++++++++++++++++++++++++++------------------- test.py | 2 +- 2 files changed, 57 insertions(+), 30 deletions(-) diff --git a/bagit.py b/bagit.py index 18cb352..3de2883 100755 --- a/bagit.py +++ b/bagit.py @@ -142,7 +142,7 @@ def find_locale_dir(): def make_bag( - bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8" + bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8", dest_dir=None ): """ Convert a given directory into a bag. You can pass in arbitrary @@ -162,20 +162,27 @@ def make_bag( if checksums is None: checksums = DEFAULT_CHECKSUMS - bag_dir = os.path.abspath(bag_dir) + if dest_dir: + bag_name = os.path.dirname(bag_dir) + dest_dir = os.path.abspath(os.path.join(dest_dir, bag_name)) + else: + dest_dir = os.path.abspath(bag_dir) + + source_dir = os.path.abspath(bag_dir) + cwd = os.path.abspath(os.path.curdir) - if cwd.startswith(bag_dir) and cwd != bag_dir: + if cwd.startswith(source_dir) and cwd != source_dir: raise RuntimeError( _("Bagging a parent of the current directory is not supported") ) - LOGGER.info(_("Creating bag for directory %s"), bag_dir) - - if not os.path.isdir(bag_dir): - LOGGER.error(_("Bag directory %s does not exist"), bag_dir) - raise RuntimeError(_("Bag directory %s does not exist") % bag_dir) + LOGGER.info(_("Creating bag from directory %s"), source_dir) + if not os.path.isdir(source_dir): + LOGGER.error(_("Bag source directory %s does not exist"), bag_dir) + raise RuntimeError(_("Bag source directory %s does not exist") % bag_dir) + # FIXME: we should do the permissions checks before changing directories old_dir = os.path.abspath(os.path.curdir) @@ -185,7 +192,7 @@ def make_bag( # bag to a destination other than the source. It would be nice if we could avoid # walking the directory tree more than once even if most filesystems will cache it - unbaggable = _can_bag(bag_dir) + unbaggable = _can_bag(dest_dir) if unbaggable: LOGGER.error( @@ -194,7 +201,7 @@ def make_bag( ) raise BagError(_("Missing permissions to move all files and directories")) - unreadable_dirs, unreadable_files = _can_read(bag_dir) + unreadable_dirs, unreadable_files = _can_read(source_dir) if unreadable_dirs or unreadable_files: if unreadable_dirs: @@ -214,19 +221,22 @@ def make_bag( LOGGER.info(_("Creating data directory")) # FIXME: if we calculate full paths we won't need to deal with changing directories - os.chdir(bag_dir) + os.chdir(source_dir) cwd = os.getcwd() - temp_data = tempfile.mkdtemp(dir=cwd) - - for f in os.listdir("."): - if os.path.abspath(f) == temp_data: - continue - new_f = os.path.join(temp_data, f) - LOGGER.info( - _("Moving %(source)s to %(destination)s"), - {"source": f, "destination": new_f}, - ) - os.rename(f, new_f) + temp_data = tempfile.mkdtemp(dir=dest_dir) + # getcwd resolves symlinks, dest_dir used abspath, which doesn't + temp_data = os.path.realpath(temp_data) + + if source_dir == dest_dir: + for f in os.listdir("."): + if os.path.abspath(f) == temp_data: + continue + new_f = os.path.join(temp_data, f) + LOGGER.info( + _("Moving %(source)s to %(destination)s"), + {"source": f, "destination": new_f}, + ) + os.rename(f, new_f) LOGGER.info( _("Moving %(source)s to %(destination)s"), @@ -236,7 +246,8 @@ def make_bag( # permissions for the payload directory should match those of the # original directory - os.chmod("data", os.stat(cwd).st_mode) + + os.chmod(os.path.join(dest_dir, "data"), os.stat(cwd).st_mode) total_bytes, total_files = make_manifests( "data", processes, algorithms=checksums, encoding=encoding @@ -271,7 +282,7 @@ def make_bag( finally: os.chdir(old_dir) - return Bag(bag_dir) + return Bag(dest_dir) class Bag(object): @@ -1513,6 +1524,15 @@ def _make_parser(): ) % ", ".join(DEFAULT_CHECKSUMS), ) + parser.add_argument( + "--destination", + type=str, + dest="dest_dir", + default=None, + help=_( + "Create bag in destination directory rather than in place." + ), + ) for i in CHECKSUM_ALGOS: alg_name = re.sub(r"^([A-Z]+)(\d+)$", r"\1-\2", i.upper()) @@ -1601,13 +1621,20 @@ def main(): bag_info=args.bag_info, processes=args.processes, checksums=args.checksums, + dest_dir=args.destination ) except Exception as exc: - LOGGER.error( - _("Failed to create bag in %(bag_directory)s: %(error)s"), - {"bag_directory": bag_dir, "error": exc}, - exc_info=True, - ) + if args.dest_dir: + LOGGER.error(_("Failed to create bag in %(bag_directory)s: %(error)s"), + {'bag_directory': args.dest_dir, 'error': exc}, + exc_info=True, + ) + else: + LOGGER.error( + _("Failed to create bag in %(bag_directory)s: %(error)s"), + {"bag_directory": bag_dir, "error": exc}, + exc_info=True, + ) rc = 1 sys.exit(rc) diff --git a/test.py b/test.py index eab3d95..bedc54e 100644 --- a/test.py +++ b/test.py @@ -622,7 +622,7 @@ def test_make_bag_with_bogus_directory(self): bagit.make_bag(bogus_directory) self.assertEqual( - "Bag directory %s does not exist" % bogus_directory, + "Bag source directory %s does not exist" % bogus_directory, str(error_catcher.exception), ) From 838e1218d5e797dde0557476d3d5719ca7171445 Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 5 Jul 2019 14:25:58 -0400 Subject: [PATCH 02/10] add tests for 1 bag to dest, multi-bag to dest, existing dest --- test.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/test.py b/test.py index bedc54e..2033a26 100644 --- a/test.py +++ b/test.py @@ -4,6 +4,7 @@ import codecs import datetime +import filecmp import hashlib import logging import os @@ -96,6 +97,48 @@ def test_make_bag_md5_sha1_sha256_manifest(self): # check valid with three manifests self.assertTrue(self.validate(bag, fast=True)) + def test_make_bag_with_destination(self): + tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-') + dest_dir = j(tmp_dir_out, 'test-dest') + bag = bagit.make_bag( + self.tmpdir, dest_dir=dest_dir, checksums=['sha256', 'sha512'] + ) + subdir = os.path.basename(self.tmpdir) + self.assertTrue(os.path.isfile(j(dest_dir, subdir, 'manifest-sha256.txt'))) + self.assertTrue(os.path.isfile(j(dest_dir, subdir, 'manifest-sha512.txt'))) + self.assertTrue(self.validate(bag, fast=True)) + diff = filecmp.dircmp(self.tmpdir, os.path.join(dest_dir, subdir, 'data')) + self.assertTrue(len(diff.left_only+diff.right_only) == 0) + shutil.rmtree(tmp_dir_out) + + def test_make_bags_with_destinations(self): + dest = tempfile.mkdtemp(prefix='bagit-test-dest-') + src_par = tempfile.mkdtemp(prefix='bagit-test-src-') + srcs = tuple(os.path.join(src_par, '%04d' % i) for i in range(10)) + subdirs = tuple(os.path.relpath(src, src_par) for src in srcs) + for src in srcs: + shutil.copytree('test-data', src) + bag = bagit.make_bag(src, dest_dir=dest, checksum=['sha256']) + self.assertTrue(tuple(sorted(os.listdir(dest))) == subdirs) + for src, subdir in zip(srcs, subdirs): + diff = filecmp.dircmp(src, os.path.join(dest, subdir, 'data')) + self.assertTrue(len(diff.left_only+diff.right_only) == 0) + self.assertTrue(self.validate(bag)) + shutil.rmtree(src_par) + shutil.rmtree(dest) + + def test_make_bag_bad_destination(self): + tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-dest') + subdir = os.path.basename(self.tmpdir) + os.makedirs(os.path.join(tmp_dir_out, subdir)) + + self.assertRaises( + RuntimeError, bagit.make_bag, + self.tmpdir, dest_dir=tmp_dir_out, checksum=['sha256', 'sha512'] + ) + + shutil.rmtree(tmp_dir_out) + def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") From eae9a0b36d6c2d184e5fdcd673a9233f9f56983a Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 5 Jul 2019 14:28:03 -0400 Subject: [PATCH 03/10] add code for bagging to destination --- bagit.py | 56 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/bagit.py b/bagit.py index 3de2883..eee93d3 100755 --- a/bagit.py +++ b/bagit.py @@ -12,6 +12,7 @@ import os import re import signal +import shutil import sys import tempfile import unicodedata @@ -163,8 +164,12 @@ def make_bag( checksums = DEFAULT_CHECKSUMS if dest_dir: - bag_name = os.path.dirname(bag_dir) + bag_name = os.path.basename(bag_dir) dest_dir = os.path.abspath(os.path.join(dest_dir, bag_name)) + if not os.path.isdir(dest_dir): + os.makedirs(dest_dir) + else: + raise RuntimeError(_("The following directory already exists:\n%s"), dest_dir) else: dest_dir = os.path.abspath(bag_dir) @@ -186,7 +191,7 @@ def make_bag( # FIXME: we should do the permissions checks before changing directories old_dir = os.path.abspath(os.path.curdir) - try: + try: # TODO: These two checks are currently redundant since an unreadable directory will also # often be unwritable, and this code will require review when we add the option to # bag to a destination other than the source. It would be nice if we could avoid @@ -237,21 +242,41 @@ def make_bag( {"source": f, "destination": new_f}, ) os.rename(f, new_f) + else: + for f in os.listdir("."): + new_f = os.path.join(temp_data, f) + LOGGER.info( + _("Copying %(source)s to %(destination)s"), + {"source": f, "destination": new_f}, + ) + if os.path.isdir(f): + shutil.copytree(f, new_f) + else: + shutil.copy(f, new_f) LOGGER.info( _("Moving %(source)s to %(destination)s"), {"source": temp_data, "destination": "data"}, ) - os.rename(temp_data, "data") + + os.rename(temp_data, os.path.join(dest_dir, "data")) # permissions for the payload directory should match those of the # original directory os.chmod(os.path.join(dest_dir, "data"), os.stat(cwd).st_mode) - total_bytes, total_files = make_manifests( - "data", processes, algorithms=checksums, encoding=encoding - ) + if source_dir == dest_dir: + total_bytes, total_files = make_manifests( + "data", processes, algorithms=checksums, encoding=encoding + ) + else: + total_bytes, total_files = make_manifests( + ".", processes, algorithms=checksums, encoding=encoding, dest_dir=dest_dir, rel_path="data" + ) + + os.chdir(dest_dir) + cwd = os.getcwd() LOGGER.info(_("Creating bagit.txt")) txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n""" @@ -275,7 +300,7 @@ def make_bag( _make_tag_file("bag-info.txt", bag_info) for c in checksums: - _make_tagmanifest_file(c, bag_dir, encoding="utf-8") + _make_tagmanifest_file(c, dest_dir, encoding="utf-8") except Exception: LOGGER.exception(_("An error occurred creating a bag in %s"), bag_dir) raise @@ -1248,13 +1273,18 @@ def _make_tag_file(bag_info_path, bag_info): f.write("%s: %s\n" % (h, txt)) -def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding="utf-8"): +def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding="utf-8", dest_dir=None, rel_path=None): LOGGER.info( _("Using %(process_count)d processes to generate manifests: %(algorithms)s"), {"process_count": processes, "algorithms": ", ".join(algorithms)}, ) - manifest_line_generator = partial(generate_manifest_lines, algorithms=algorithms) + if not dest_dir: + dest_dir = os.getcwd() + + data_dir = os.path.relpath(data_dir) + + manifest_line_generator = partial(generate_manifest_lines, algorithms=algorithms, rel_path=rel_path) if processes > 1: pool = multiprocessing.Pool(processes=processes) @@ -1277,8 +1307,9 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" for algorithm, values in manifest_data.items(): manifest_filename = "manifest-%s.txt" % algorithm + manifest_path = os.path.join(dest_dir, manifest_filename) - with open_text_file(manifest_filename, "w", encoding=encoding) as manifest: + with open_text_file(manifest_path, "w", encoding=encoding) as manifest: for digest, filename, byte_count in values: manifest.write("%s %s\n" % (digest, _encode_filename(filename))) num_files[algorithm] += 1 @@ -1397,7 +1428,7 @@ def _can_read(test_dir): return (tuple(unreadable_dirs), tuple(unreadable_files)) -def generate_manifest_lines(filename, algorithms=DEFAULT_CHECKSUMS): +def generate_manifest_lines(filename, algorithms=DEFAULT_CHECKSUMS, rel_path=None): LOGGER.info(_("Generating manifest lines for file %s"), filename) # For performance we'll read the file only once and pass it block @@ -1419,6 +1450,9 @@ def generate_manifest_lines(filename, algorithms=DEFAULT_CHECKSUMS): decoded_filename = _decode_filename(filename) + if rel_path: + decoded_filename = os.path.join(rel_path, decoded_filename) + # We'll generate a list of results in roughly manifest format but prefixed with the algorithm: results = [ (alg, hasher.hexdigest(), decoded_filename, total_bytes) From 7fd6e42025ac3dad6b50d68e3796204e81f222da Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 26 Jul 2019 12:51:08 -0400 Subject: [PATCH 04/10] use realpath instead of abspath everywhere --- bagit.py | 22 ++++++++++------------ test.py | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/bagit.py b/bagit.py index eee93d3..c9a97c1 100755 --- a/bagit.py +++ b/bagit.py @@ -20,7 +20,7 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join +from os.path import abspath, isdir, isfile, join, realpath from pkg_resources import DistributionNotFound, get_distribution @@ -165,17 +165,17 @@ def make_bag( if dest_dir: bag_name = os.path.basename(bag_dir) - dest_dir = os.path.abspath(os.path.join(dest_dir, bag_name)) + dest_dir = realpath(os.path.join(dest_dir, bag_name)) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) else: raise RuntimeError(_("The following directory already exists:\n%s"), dest_dir) else: - dest_dir = os.path.abspath(bag_dir) + dest_dir = realpath(bag_dir) - source_dir = os.path.abspath(bag_dir) + source_dir = realpath(bag_dir) - cwd = os.path.abspath(os.path.curdir) + cwd = realpath(os.path.curdir) if cwd.startswith(source_dir) and cwd != source_dir: raise RuntimeError( @@ -189,7 +189,7 @@ def make_bag( raise RuntimeError(_("Bag source directory %s does not exist") % bag_dir) # FIXME: we should do the permissions checks before changing directories - old_dir = os.path.abspath(os.path.curdir) + old_dir = realpath(os.path.curdir) try: # TODO: These two checks are currently redundant since an unreadable directory will also @@ -228,13 +228,11 @@ def make_bag( # FIXME: if we calculate full paths we won't need to deal with changing directories os.chdir(source_dir) cwd = os.getcwd() - temp_data = tempfile.mkdtemp(dir=dest_dir) - # getcwd resolves symlinks, dest_dir used abspath, which doesn't - temp_data = os.path.realpath(temp_data) + temp_data = realpath(tempfile.mkdtemp(dir=dest_dir)) if source_dir == dest_dir: for f in os.listdir("."): - if os.path.abspath(f) == temp_data: + if realpath(f) == temp_data: continue new_f = os.path.join(temp_data, f) LOGGER.info( @@ -338,7 +336,7 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) + self.path = realpath(path) if path: # if path ends in a path separator, strip it off if path[-1] == os.sep: @@ -542,7 +540,7 @@ def save(self, processes=1, manifests=False): ) # Change working directory to bag directory so helper functions work - old_dir = os.path.abspath(os.path.curdir) + old_dir = realpath(os.path.curdir) os.chdir(self.path) # Generate new manifest files diff --git a/test.py b/test.py index 2033a26..c537ad7 100644 --- a/test.py +++ b/test.py @@ -43,7 +43,7 @@ def setUp(self): self.starting_directory = ( os.getcwd() ) # FIXME: remove this after we stop changing directories in bagit.py - self.tmpdir = tempfile.mkdtemp() + self.tmpdir = os.path.realpath(tempfile.mkdtemp()) if os.path.isdir(self.tmpdir): shutil.rmtree(self.tmpdir) shutil.copytree("test-data", self.tmpdir) From d3a08248f55f1c16787c35d2e0b8e1c105cd8f87 Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 26 Jul 2019 13:18:35 -0400 Subject: [PATCH 05/10] fail on pre-existing bag directory only if it already contains files --- bagit.py | 4 ++-- test.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/bagit.py b/bagit.py index c9a97c1..b8188f9 100755 --- a/bagit.py +++ b/bagit.py @@ -168,8 +168,8 @@ def make_bag( dest_dir = realpath(os.path.join(dest_dir, bag_name)) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) - else: - raise RuntimeError(_("The following directory already exists:\n%s"), dest_dir) + elif len(os.listdir(dest_dir)) > 0: + raise RuntimeError(_("The following directory already exists and contains files:\n%s"), dest_dir) else: dest_dir = realpath(bag_dir) diff --git a/test.py b/test.py index c537ad7..4d9b643 100644 --- a/test.py +++ b/test.py @@ -129,8 +129,11 @@ def test_make_bags_with_destinations(self): def test_make_bag_bad_destination(self): tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-dest') - subdir = os.path.basename(self.tmpdir) - os.makedirs(os.path.join(tmp_dir_out, subdir)) + pre_existing_bag_dir = j(tmp_dir_out, os.path.basename(self.tmpdir)) + os.makedirs(pre_existing_bag_dir) + pre_existing_file = j(pre_existing_bag_dir, '.DS_Store') + with open(pre_existing_file, 'w') as f: + f.write('ugh') self.assertRaises( RuntimeError, bagit.make_bag, From eb0f0935359796815437e9b40c9f5c725c52c23d Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 26 Jul 2019 14:05:16 -0400 Subject: [PATCH 06/10] add test for flipped bit at destination --- test.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/test.py b/test.py index 4d9b643..4314e31 100644 --- a/test.py +++ b/test.py @@ -98,16 +98,15 @@ def test_make_bag_md5_sha1_sha256_manifest(self): self.assertTrue(self.validate(bag, fast=True)) def test_make_bag_with_destination(self): - tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-') - dest_dir = j(tmp_dir_out, 'test-dest') + tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-dest') bag = bagit.make_bag( - self.tmpdir, dest_dir=dest_dir, checksums=['sha256', 'sha512'] + self.tmpdir, dest_dir=tmp_dir_out, checksums=['sha256', 'sha512'] ) subdir = os.path.basename(self.tmpdir) - self.assertTrue(os.path.isfile(j(dest_dir, subdir, 'manifest-sha256.txt'))) - self.assertTrue(os.path.isfile(j(dest_dir, subdir, 'manifest-sha512.txt'))) + self.assertTrue(os.path.isfile(j(tmp_dir_out, subdir, 'manifest-sha256.txt'))) + self.assertTrue(os.path.isfile(j(tmp_dir_out, subdir, 'manifest-sha512.txt'))) self.assertTrue(self.validate(bag, fast=True)) - diff = filecmp.dircmp(self.tmpdir, os.path.join(dest_dir, subdir, 'data')) + diff = filecmp.dircmp(self.tmpdir, os.path.join(tmp_dir_out, subdir, 'data')) self.assertTrue(len(diff.left_only+diff.right_only) == 0) shutil.rmtree(tmp_dir_out) @@ -155,6 +154,27 @@ def test_validate_flipped_bit(self): self.assertTrue(self.validate(bag, fast=True)) self.assertTrue(self.validate(bag, completeness_only=True)) + def test_validate_flipped_bit_at_destination(self): + tmp_dir_out = tempfile.mkdtemp(prefix='bagit-test-dest') + + bag = bagit.make_bag( + self.tmpdir, dest_dir=tmp_dir_out, checksums=['sha256', 'sha512'] + ) + readme = j(tmp_dir_out, os.path.basename(self.tmpdir), 'data', 'README') + txt = slurp_text_file(readme) + txt = "A" + txt[1:] + with open(readme, 'w') as r: + r.write(txt) + bag = bagit.Bag(bag.path) + self.assertRaises(bagit.BagValidationError, self.validate, bag) + + hasher = hashlib.new('sha256') + contents = slurp_text_file(j(self.tmpdir, 'README')).encode('utf-8') + hasher.update(contents) + self.assertTrue(hasher.hexdigest() == bag.entries['data/README']['sha256']) + + shutil.rmtree(tmp_dir_out) + def test_validate_fast(self): bag = bagit.make_bag(self.tmpdir) self.assertEqual(self.validate(bag, fast=True), True) From 2b8f3cbf155018ce279e13372b9a357929a22498 Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 17 Jan 2020 12:17:13 -0500 Subject: [PATCH 07/10] new copy process to use copytree for all files --- bagit.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/bagit.py b/bagit.py index b8188f9..3074dcc 100755 --- a/bagit.py +++ b/bagit.py @@ -10,6 +10,7 @@ import logging import multiprocessing import os +import random import re import signal import shutil @@ -228,9 +229,9 @@ def make_bag( # FIXME: if we calculate full paths we won't need to deal with changing directories os.chdir(source_dir) cwd = os.getcwd() - temp_data = realpath(tempfile.mkdtemp(dir=dest_dir)) if source_dir == dest_dir: + temp_data = realpath(tempfile.mkdtemp(dir=dest_dir)) for f in os.listdir("."): if realpath(f) == temp_data: continue @@ -241,16 +242,12 @@ def make_bag( ) os.rename(f, new_f) else: - for f in os.listdir("."): - new_f = os.path.join(temp_data, f) - LOGGER.info( - _("Copying %(source)s to %(destination)s"), - {"source": f, "destination": new_f}, - ) - if os.path.isdir(f): - shutil.copytree(f, new_f) - else: - shutil.copy(f, new_f) + temp_data_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz0123456789') for _ in range(6)) + temp_data = os.path.join(dest_dir, temp_data_name) + while os.path.isdir(temp_data): + temp_data = temp_data + random.choice('abcdefghijklmnopqrstuvwxyz0123456789') + + shutil.copytree(".", temp_data) LOGGER.info( _("Moving %(source)s to %(destination)s"), @@ -270,7 +267,8 @@ def make_bag( ) else: total_bytes, total_files = make_manifests( - ".", processes, algorithms=checksums, encoding=encoding, dest_dir=dest_dir, rel_path="data" + ".", processes, algorithms=checksums, encoding=encoding, + dest_dir=dest_dir, rel_path="data" ) os.chdir(dest_dir) From 75036b1f91d0f07da5f5699696c954564bca0674 Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 17 Jan 2020 12:47:27 -0500 Subject: [PATCH 08/10] update dest-dir check for consideration listdir performance --- bagit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bagit.py b/bagit.py index 3074dcc..cd561a4 100755 --- a/bagit.py +++ b/bagit.py @@ -169,7 +169,7 @@ def make_bag( dest_dir = realpath(os.path.join(dest_dir, bag_name)) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) - elif len(os.listdir(dest_dir)) > 0: + elif os.stat(dest_dir).st_nlink > 0: raise RuntimeError(_("The following directory already exists and contains files:\n%s"), dest_dir) else: dest_dir = realpath(bag_dir) From 8ff5ad57de4e941b3ec14300c9a54eedebb5c07d Mon Sep 17 00:00:00 2001 From: nkrabben Date: Fri, 17 Jan 2020 13:06:41 -0500 Subject: [PATCH 09/10] add test for trying to bag parent dir --- test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test.py b/test.py index 4314e31..0a1fc12 100644 --- a/test.py +++ b/test.py @@ -141,6 +141,13 @@ def test_make_bag_bad_destination(self): shutil.rmtree(tmp_dir_out) + def test_make_bag_parentdir(self): + os.chdir(j(self.tmpdir, 'loc')) + self.assertRaises( + RuntimeError, bagit.make_bag, + self.tmpdir, checksum=['sha256', 'sha512'] + ) + def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") From 61769cda626c8493b6a733885a407915f4ada63f Mon Sep 17 00:00:00 2001 From: Nick Krabbenhoeft Date: Thu, 30 Jan 2020 14:47:47 -0500 Subject: [PATCH 10/10] Fix bad arg reference --- bagit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bagit.py b/bagit.py index cd561a4..9ee2786 100755 --- a/bagit.py +++ b/bagit.py @@ -1651,7 +1651,7 @@ def main(): bag_info=args.bag_info, processes=args.processes, checksums=args.checksums, - dest_dir=args.destination + dest_dir=args.dest_dir ) except Exception as exc: if args.dest_dir: