-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy paths3_copy_all.py
executable file
·78 lines (64 loc) · 2.43 KB
/
s3_copy_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
# Copy all items from one bucket to another, adding md5 tags as necessary.
# If md5 is not present, calculate from ETag, but if ETag is not md5, flag for later.
__author__ = 'daisie'
import os
import re
import sys
import json
import hashlib
def main():
if len(sys.argv) < 3:
print "Source and target buckets are required as arguments"
exit(0)
bucket = sys.argv[1]
target_bucket = sys.argv[2]
keys = []
if len(sys.argv) == 4:
keys_file = sys.argv[3]
with open(keys_file) as f:
keys = f.read().splitlines()
else:
# collect all the keys from the bucket:
next_token = " "
while next_token is not "":
base_cmd = 'aws s3api list-objects-v2 --bucket %s --page-size 1000 --max-items 1000 %s' % (bucket, next_token)
results = json.load(os.popen(base_cmd))
if results is None:
print "empty bucket"
exit(1)
if 'NextToken' in results:
next_token = '--starting-token ' + results['NextToken']
else:
next_token = ""
for object in results['Contents']:
keys.append(object['Key'])
print "added %s keys" % (len(keys))
sys.stdout.flush()
# now that we have all the keys, check their md5 tags:
count = 0
for key in keys:
count = count + 1
cmd = 'aws s3api head-object --bucket %s --key "%s"' % (target_bucket, key)
if (os.popen(cmd).read() != ""):
print "%s: %s already exists in target" % (count, key)
sys.stdout.flush()
continue
if (re.match("^\.", key) is None):
print '%s: %s' % (count, key)
cmd = 'aws s3api head-object --bucket %s --key "%s"' % (bucket, key)
metadata = json.load(os.popen(cmd))
if ('md5' not in metadata['Metadata']):
md5 = metadata['ETag']
if "-" in md5:
print " ETag is not an md5"
else:
print " added md5 tag of %s" % (md5)
else:
md5 = metadata['Metadata']['md5']
print " md5 tag is %s" % (md5)
cmd = 'aws s3 cp "s3://%s/%s" "s3://%s/%s" --metadata md5=%s' % (bucket, key, target_bucket, key, md5)
os.popen(cmd)
sys.stdout.flush()
if __name__ == '__main__':
main()