Skip to content

Commit

Permalink
Rename fetch_files_to_be_fetched -> fetch, force option
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Dec 7, 2018
1 parent 76fb26e commit 5e05700
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
7 changes: 5 additions & 2 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,12 @@ def files_to_be_fetched(self):
for _, _, filename in self.fetch_entries():
yield filename

def fetch_files_to_be_fetched(self):
def fetch(self, force=False):
"""
Fetches files from the fetch.txt
Arguments:
force (boolean): Fetch files even when they are present in the data directory
"""
proxy_handler = ProxyHandler() # will default to adhere to *_proxy env vars
opener = build_opener(proxy_handler)
Expand All @@ -599,7 +602,7 @@ def fetch_files_to_be_fetched(self):
if not fnmatch_any(url, self.fetch_url_whitelist):
raise BagError(_("Malformed URL in fetch.txt: %s, matches none of the whitelisted URL patterns %s") % (url, self.fetch_url_whitelist))
expected_size = -1 if expected_size == '-' else int(expected_size)
if filename in self.payload_files():
if filename in self.payload_files() and not force:
LOGGER.info(_("File already fetched: %s"), filename)
continue
req = Request(url)
Expand Down
25 changes: 24 additions & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,30 @@ def test_fetching_payload_file(self):
with self.assertRaises(bagit.BagError):
self.bag.validate()
# re-fetch it
self.bag.fetch_files_to_be_fetched()
self.bag.fetch()
# should be valid again
self.bag.validate()
self.assertEqual(len(self.bag.compare_fetch_with_fs()), 0, 'complete')

def test_force_fetching(self):
test_payload = 'loc/2478433644_2839c5e8b8_o_d.jpg'
with open(j(self.tmpdir, "fetch.txt"), "w") as fetch_txt:
print("https://github.com/LibraryOfCongress/bagit-python/raw/master/test-data/%s %s data/%s" % (
test_payload, 139367, test_payload), file=fetch_txt)
self.bag.save(manifests=True)
# now replace one payload file with an empty string
with open(j(self.tmpdir, "data", test_payload), 'w') as payload:
payload.write('')
# should be invalid now
with self.assertRaisesRegexp(bagit.BagError, "^Payload-Oxum validation failed."):
self.bag.validate()
# non-forcefully downloading should not help
self.bag.fetch()
# should **still* be invalid now
with self.assertRaisesRegexp(bagit.BagError, "^Payload-Oxum validation failed."):
self.bag.validate()
# fetch with force
self.bag.fetch(force=True)
# should be valid again
self.bag.validate()
self.assertEqual(len(self.bag.compare_fetch_with_fs()), 0, 'complete')
Expand Down

0 comments on commit 5e05700

Please sign in to comment.