-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextractOccupations.py
64 lines (49 loc) · 1.83 KB
/
extractOccupations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from utils import loadSnacData
def getoccupations(constellations):
"""
Extract a list of occupations from a list of SNAC constellation JSONs
@Param: constellations, a list of SNAC constellation JSONs in dict form
@Returns: a dict of the form {(occu heading, occu SNAC ID): # occurrences}
"""
# Start by initializing the dict of occupations we'll eventually return
occupations = {}
# Loop over constellations:
for constellation in constellations:
# If the constellation doesn't have a "occupations" entry, skip it
if "occupations" not in constellation:
continue
# Also skip it if there aren't any entries in "occupations":
if len(constellation["occupations"]) == 0:
continue
# Now that we know there are occupations, let's loop over them:
for occupation in constellation["occupations"]:
# Unpack occupation
id = occupation["term"]["id"]
heading = occupation["term"]["term"]
# Check for this occu in the dict
if (id, heading) in occupations:
# Increase the count if it's there
occupations[(id, heading)] += 1
else:
# Otherwise, create an entry in the dict w/ value 1
occupations[(id, heading)] = 1
return occupations
def writeTable(dict, filename, headerRow):
"""Write a dict of form {(x,y):z} to a tsv of form x\ty\tz"""
with open(filename, "w") as f:
f.write(headerRow)
for entry in dict:
row = "\t".join([entry[0], entry[1], str(dict[entry])]) + "\n"
f.write(row)
def main():
print("\n")
constellations = loadSnacData()
print("Extracting occupations from constellations...")
occupations = getoccupations(constellations)
print("occupations successfully extracted.\n")
print("Writing occupations to snacOccupations.tsv...")
headers = "SNAC Heading\tSNAC ID\tCount\n"
writeTable(occupations, "snacOccupations.tsv", headers)
print("File successfully written.")
print("\n")
main()