-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcommons_photoofday_deprecate.py
187 lines (176 loc) · 5.64 KB
/
commons_photoofday_deprecate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Remove uses of {{PhotoOfTheDay}} from Commons, to replace them with the Infobox
# Mike Peel 28-Dec-2021 v1 - start
# Import modules
import pywikibot
from pywikibot import pagegenerators
import re
# Connect to Commons and Wikidata
commons = pywikibot.Site('commons', 'commons')
wikidata = commons.data_repository()
# Function to migrate the templates
def migrate_photooftheday(target):
print('\n\n https://commons.wikimedia.org/wiki/'+target.title())
# Only want to try this if we're already matched up with Wikidata
try:
wd_item = pywikibot.ItemPage.fromPage(target)
item_dict = wd_item.get()
qid = wd_item.title()
except:
print('No Wikidata item')
return 0
# If we have a P301 value, switch to using that Wikidata item
try:
existing_id = item_dict['claims']['P301']
print('P301 exists, following that.')
for clm2 in existing_id:
wd_item = clm2.getTarget()
item_dict = wd_item.get()
qid = wd_item.title()
print(wd_item.title())
except:
# No downside if we didn't find the value
pass
print('https://www.wikidata.org/wiki/'+qid)
# Check that we're looking at a calendar day item
calday = False
P31 = ''
try:
P31 = item_dict['claims']['P31']
except:
print('No P31, skipping')
return 0
if P31 != '':
for clm in P31:
if clm.getTarget().title() == 'Q47150325':
calday = True
if not calday:
print('Wikidata item is not a calendar day, skipping')
return 0
# See if we already have a photo
hasphoto = False
P18 = ''
try:
P18 = item_dict['claims']['P18']
hasphoto = True
except:
pass
# To implement - is it the same photo we want to add?
# if P31 != '':
# for clm in P31:
# if clm.getTarget().title() == 'Q47150325':
# calday = True
if hasphoto:
print('Wikidata item already has a photo')
# return 0
# Find the template, and do the migration
filename = ''
caption = ''
template_params = target.templatesWithParams()
for template_info in template_params:
if template_info[0].title() == 'Template:PhotoOfTheDay':
# print(template_info)
toreplace = []
for info in template_info[1]:
# print(info)
if 'file name=' in info:
filename = info.replace('file name=','')
if 'file description=' in info or 'text=' in info:
try:
info = info.split('<br />')
info = info[0]
except:
pass
newtext = info.replace('file description=','').replace('text=','')
testing = False
count = 0
while testing == False:
count += 1
if count > 10:
# This hasn't worked
return 0
print(newtext)
startindex = newtext.find('{{w|')
print(startindex)
if startindex == -1:
testing = True
else:
endindex = newtext[startindex:].find('}}')
print(endindex)
if endindex == -1:
testing = False
else:
toreplace.append([newtext[startindex:startindex+endindex+2],newtext[startindex+4:startindex+endindex]])
newtext = newtext.replace(newtext[startindex:startindex+endindex+2], newtext[startindex+4:startindex+endindex])
# if newtext[0] == '[':
# Avoid cases that are just wikilinks
# continue
# Try to strip out wikilinks
# newtext = re.sub('\[\[([:^\]\|]*)\]\]', '\\1', newtext)
# newtext = re.sub('\[\[([^\]\|]*)\]\]', '\\1', newtext)
if caption == '':
caption += newtext
else:
caption = newtext + ' ' + caption
print('File name: ' + filename)
caption = caption.strip()
print('Caption: ' + caption)
if filename != '':
test = input('Save?')
else:
test = 'n'
if test == 'y':
try:
targetimage = pywikibot.FilePage(commons, 'File:'+filename)
# print(targetimage)
if targetimage.text == '':
print('Target image not found, skipping')
return 0
newclaim = pywikibot.Claim(wikidata, 'P18')
newclaim.setTarget(targetimage)
wd_item.addClaim(newclaim, summary='Importing image from Commons {{PhotoOfTheDay}} template as part of migration to Wikidata')
caption = caption.strip()
if caption != '':
qualifier = pywikibot.Claim(wikidata, 'P2096')
newqual = pywikibot.WbMonolingualText(caption, 'en')
qualifier.setTarget(newqual)
newclaim.addQualifier(qualifier, summary='Also importing a media legend from Commons')
except:
return 0
newtext = target.text
for case in toreplace:
newtext = newtext.replace(case[0], case[1])
# Prepare the replacement text for the category
startindex = newtext.find('{{PhotoOfTheDay')
endindex = newtext[startindex:].find('}}')
if '{{Wikidata Infobox}}' not in target.text:
newtext = newtext[0:startindex] + '{{Wikidata Infobox}}' + newtext[endindex+2:]
else:
newtext = newtext[0:startindex] + newtext[endindex+2:]
# Do some extra tidying up
newtext = newtext.replace('{{Interwiki from wikidata}}','')
newtext = newtext.replace('\n\n\n','\n')
newtext = newtext.replace('\n\n\n','\n')
newtext = newtext.replace('\n\n{{Wikidata Infobox','\n{{Wikidata Infobox')
print(newtext)
if filename != '':
test = input('Save?')
else:
test = 'y'
if test == 'y':
target.text = newtext
target.save('Migrating from {{PhotoOfTheDay}} to {{Wikidata Infobox}}, matching ID is ' + str(qid))
elif test == 'b':
target.text = newtext
target.save("Migrating from {{PhotoOfTheDay}} to {{Wikidata Infobox}} - not migrating bad image for the day. Matching ID is " + str(qid))
# join(lines)
return 1
# If we're here, then things didn't work.
return 0
template = pywikibot.Page(commons, 'Template:PhotoOfTheDay')
targets = template.embeddedin()
for target in targets:
test = migrate_photooftheday(target)
# if test == 1:
# exit()