-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathget_affiliations.py
executable file
·128 lines (116 loc) · 5.92 KB
/
get_affiliations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# Report affiliations for authors of packages in Dryad using Crossref API
# Either run with --list to get a fresh list of all possible DOIs
# or with --report [funder/affiliation/publisher] --doi [doi]
__author__ = 'daisieh'
import re
import os
import sys
import requests
import json
import optparse
from sql_utils import rows_from_query, get_field_id, var_from_query
def main():
parser = optparse.OptionParser()
parser.add_option("--doi", dest="doi", help="doi to get crossref report for")
parser.add_option("--list", dest="list_mode", action="store_true", help="list all available dois")
parser.add_option("--report", dest="report", help="funder/affiliation/publisher")
(options, args) = parser.parse_args()
pub_doi_field = get_field_id('dc.relation.isreferencedby')
if options.list_mode:
sql = 'select mdv.text_value, item.item_id from metadatavalue as mdv, item where item.item_id = mdv.item_id and mdv.metadata_field_id = %s and item.owning_collection = 2 order by item.item_id desc' % (pub_doi_field)
pub_doi_list = rows_from_query(sql)
pub_doi_list.pop(0)
pub_doi_list.pop()
for pub_doi_item in pub_doi_list:
print pub_doi_item[0]
else:
if options.doi is None or options.report is None:
print "Either use `--list` or `--report [funder/affiliation/publisher] --doi [doi]`"
exit
sql = "select mdv.text_value, item.item_id from metadatavalue as mdv, item where item.item_id = mdv.item_id and mdv.metadata_field_id = %s and item.owning_collection = 2 and mdv.text_value='%s'" % (pub_doi_field, options.doi)
pub_doi_list = rows_from_query(sql)
pub_doi_list.pop(0)
pub_doi_list.pop()
if pub_doi_list is not None:
process_pub_doi(pub_doi_list[0], options.report)
def process_pub_doi(pub_doi_item, report_type):
dryad_doi_field = get_field_id('dc.identifier')
title_field = get_field_id('dc.title')
pub_name_field = get_field_id('prism.publicationName')
pub_date_field = get_field_id('dc.date.accessioned')
m = re.search('^doi:(.+)', pub_doi_item[0])
if m is not None:
pub_doi = m.group(0)
item_id = pub_doi_item[1]
dryad_doi = var_from_query('select text_value from metadatavalue where item_id = %s and metadata_field_id = %s' % (item_id, dryad_doi_field), 'text_value')
title = var_from_query('select text_value from metadatavalue where item_id = %s and metadata_field_id = %s' % (item_id, title_field), 'text_value')
pub_name = var_from_query('select text_value from metadatavalue where item_id = %s and metadata_field_id = %s' % (item_id, pub_name_field), 'text_value')
pub_date = var_from_query('select text_value from metadatavalue where item_id = %s and metadata_field_id = %s' % (item_id, pub_date_field), 'text_value')
try:
r = requests.get('https://api.crossref.org/works/%[email protected]' % pub_doi)
if r.status_code == 200:
if report_type == 'affiliation':
authors = find_authors(r.json())
for author in authors:
author = author.replace('\n', ' ')
print '%s\t%s\t%s\t%s\t%s\t%s\t%s' % (item_id, dryad_doi, title, pub_doi, pub_name, pub_date, author.encode('utf-8'))
elif report_type == 'funder':
funders = find_funders(r.json())
for funder in funders:
funder = funder.replace('\n', ' ')
print '%s\t%s\t%s\t%s\t%s\t%s\t\t\t\t%s' % (item_id, dryad_doi, title, pub_doi, pub_name, pub_date, funder.encode('utf-8'))
elif report_type == 'publisher':
publisher = find_publisher(r.json()).replace('\n', ' ')
print '%s\t%s\t%s\t%s\t%s\t%s\t\t\t\t%s' % (item_id, dryad_doi, title, pub_doi, pub_name, pub_date, publisher.encode('utf-8'))
else:
print "%s\tno result for %s: %s" % (item_id, pub_doi, r.status_code)
except:
print "error occurred while executing " + pub_doi
sys.stdout.flush()
def find_funders(pub_json):
funder_list = []
if 'message' in pub_json:
if 'funder' in pub_json['message']:
funders = pub_json['message']['funder']
for funder in funders:
if 'DOI' in funder:
funder_doi = funder['DOI']
else:
funder_doi = ''
if 'name' in funder:
funder_name = funder['name']
else:
funder_name = ''
if 'award' in funder:
funder_awards = ','.join(funder['award'])
else:
funder_awards = ''
funder_list.append('\t'.join([funder_doi,funder_name,funder_awards]))
return funder_list
def find_authors(pub_json):
author_list = []
if 'message' in pub_json:
if 'author' in pub_json['message']:
authors = pub_json['message']['author']
for author in authors:
affiliations = author['affiliation']
if len(affiliations) > 0:
for affiliation in affiliations:
family = ""
given = ""
name = ""
if 'family' in author:
family = author['family']
if 'given' in author:
given = author['given']
if 'name' in affiliation:
name = affiliation['name']
author_list.append('\t'.join([family, given, name]))
return author_list
def find_publisher(pub_json):
if 'message' in pub_json:
if 'publisher' in pub_json['message']:
return pub_json['message']['publisher']
if __name__ == '__main__':
main()