-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcheck_gender_in_cat.py
46 lines (41 loc) · 995 Bytes
/
check_gender_in_cat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pywikibot
from pywikibot import pagegenerators
from pywikibot.data import api
enwiki = pywikibot.Site('en', 'wikipedia')
enwiki_repo = enwiki.data_repository()
targetcat = 'Category:Violence against women in the United States'
cat = pywikibot.Category(enwiki, targetcat)
pages = pagegenerators.CategorizedPageGenerator(cat, recurse=False);
for page in pages:
try:
wd_item = pywikibot.ItemPage.fromPage(page)
item_dict = wd_item.get()
except:
# print("No Wikidata sitelink found")
# continue
pass
try:
P21 = item_dict['claims']['P21']
except:
P21 = ''
# print(P21)
gender = ''
for clm in P21:
try:
gender = clm.getTarget()
except:
gender = ''
if gender == '':
gender = '-'
elif gender.title() == 'Q6581072':
gender = 'Female'
elif gender.title() == 'Q6581097':
gender = 'M'
else:
gender = gender.title()
print('|-')
print('| [[' + page.title() + ']] || ' + gender)
# if P21 != '':
# exit()