diff --git a/bagit.py b/bagit.py index 1194c61..4aa6186 100755 --- a/bagit.py +++ b/bagit.py @@ -588,9 +588,12 @@ def files_to_be_fetched(self): for _, _, filename in self.fetch_entries(): yield filename - def fetch_files_to_be_fetched(self): + def fetch(self, force=False): """ Fetches files from the fetch.txt + + Arguments: + force (boolean): Fetch files even when they are present in the data directory """ proxy_handler = ProxyHandler() # will default to adhere to *_proxy env vars opener = build_opener(proxy_handler) @@ -599,7 +602,7 @@ def fetch_files_to_be_fetched(self): if not fnmatch_any(url, self.fetch_url_whitelist): raise BagError(_("Malformed URL in fetch.txt: %s, matches none of the whitelisted URL patterns %s") % (url, self.fetch_url_whitelist)) expected_size = -1 if expected_size == '-' else int(expected_size) - if filename in self.payload_files(): + if filename in self.payload_files() and not force: LOGGER.info(_("File already fetched: %s"), filename) continue req = Request(url) diff --git a/test.py b/test.py index 268394d..8fcc7c8 100644 --- a/test.py +++ b/test.py @@ -1130,7 +1130,30 @@ def test_fetching_payload_file(self): with self.assertRaises(bagit.BagError): self.bag.validate() # re-fetch it - self.bag.fetch_files_to_be_fetched() + self.bag.fetch() + # should be valid again + self.bag.validate() + self.assertEqual(len(self.bag.compare_fetch_with_fs()), 0, 'complete') + + def test_force_fetching(self): + test_payload = 'loc/2478433644_2839c5e8b8_o_d.jpg' + with open(j(self.tmpdir, "fetch.txt"), "w") as fetch_txt: + print("https://github.com/LibraryOfCongress/bagit-python/raw/master/test-data/%s %s data/%s" % ( + test_payload, 139367, test_payload), file=fetch_txt) + self.bag.save(manifests=True) + # now replace one payload file with an empty string + with open(j(self.tmpdir, "data", test_payload), 'w') as payload: + payload.write('') + # should be invalid now + with self.assertRaisesRegexp(bagit.BagError, "^Payload-Oxum validation failed."): + self.bag.validate() + # non-forcefully downloading should not help + self.bag.fetch() + # should **still* be invalid now + with self.assertRaisesRegexp(bagit.BagError, "^Payload-Oxum validation failed."): + self.bag.validate() + # fetch with force + self.bag.fetch(force=True) # should be valid again self.bag.validate() self.assertEqual(len(self.bag.compare_fetch_with_fs()), 0, 'complete')