-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathanonymize.py
24 lines (22 loc) · 1.06 KB
/
anonymize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import argparse
import csv
import hmac
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Anonymize user ids')
parser.add_argument('urls', help='Output of crawl.py --type urls')
parser.add_argument('anonymized', help='Anonymized output filename')
parser.add_argument('key', help='Secret key for hashing of user ids')
args = parser.parse_args()
with open(args.urls, 'r') as fin:
with open(args.anonymized, 'w') as fout:
reader = csv.reader(fin, delimiter='|', quoting=csv.QUOTE_MINIMAL,
quotechar='\\')
writer = csv.writer(fout, delimiter='|', quoting=csv.QUOTE_MINIMAL,
quotechar='\\')
writer.writerow(['modelname', 'mid', 'uid'])
key = bytes(args.key, 'utf-8')
for row in reader:
# Throw away user name
modelname, mid, uid = row[0], row[1], row[2]
uid = hmac.new(key, bytes(uid, 'utf-8')).hexdigest()
writer.writerow([modelname, mid, uid])