-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwd_utils.py
220 lines (195 loc) · 9.42 KB
/
wd_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/usr/bin/env python
from pywikibot.page import ItemPage
from __init__ import *
from wd_work import props, Get_claims, Claim_instance
class WD_utils:
user = 'TextworkerBot'
sites = {'ruwikisource': pwb.Site('ru', 'wikisource', user=user),
'ruwikipedia': pwb.Site('ru', 'wikipedia', user=user)}
WS = sites['ruwikisource']
WP = sites['ruwikipedia']
WD = WS.data_repository()
re_is_item_id = re.compile(r'^Q\d+$')
def __init__(self, as_bot: bool = True, test_run: bool = False):
self.as_bot = as_bot
self.test_run = test_run
self.get_claims = Get_claims()
self.claim_instance = Claim_instance(self.WD)
# def get_topic_items(self, itemWD: ItemPage):
# return [i.target for i in self.get_claims.topics(itemWD) if isinstance(i.target, ItemPage)]
@staticmethod
def is_link(item_id: str, items: Union[list, tuple]):
for i in items:
if i.id == item_id:
return True
# def link_(self, item_id: str = None, title: str = None):
# items = self.get_items(itemWD, is_author_tpl)
# if item_id:
# for i in items:
# if i.id == item_id:
# return True
def is_id_in_item_describes(self, p, search_id: str, item: ItemPage) -> bool:
rootpagename = p.rootpagename
if rootpagename == 'Лентапедия' and p.title.endswith('/Полная версия'):
rootpagename = 'Лентапедия2'
enc_item = p.enc_meta['wditem']
if enc_item:
for c in self.get_claims.described_by_source(item):
if enc_item.id == c.target.id:
for q in self.get_claims.get_qualifiers_dedicated_article(c):
if q.target.id == search_id:
return True
def is_id_in_item_topics(self, item: ItemPage, search_id: str) -> bool:
for i in self.get_claims.topics(item):
if i.target and i.target.id == search_id:
return True
def is_another_id_in_item_topics(self, item: ItemPage, search_id: str) -> bool:
if self.get_claims.topics(item) and not self.is_id_in_item_topics(item, search_id):
return True
def is_param_value_equal_item(self, p, m_wp_pagename: str, itemWD: ItemPage,
m_wp_page_item: ItemPage) -> bool:
if self.is_id_in_item_describes(p, itemWD.id, m_wp_page_item) \
and self.is_id_in_item_topics(itemWD, m_wp_page_item.id):
pwb.stdout(
f'значение параметра ("{m_wp_pagename}") совпадает с item (label {m_wp_page_item.labels.get("ru")})')
return True
def is_item_of_disambig(self, item: ItemPage) -> bool:
for e in self.get_claims.item_type(item):
if e.target and e.target.id == props.disambig:
return True
# def _join_items_article_and_subject(self, pname: str, subject_item_id: str, target_item: ItemPage):
# # создать ссылку на элемент темы
# wditem_subject = self.add_main_subject(target_id=subject_item_id)
#
# # создать "описывается в источниках" в элементе темы
# if wditem_subject:
# self.add_article_in_subjectitem(wditem_subject, pname, target_item)
def add_link_to_main_subject(self, p, m_wp_page_item: ItemPage, make_wd_links: bool, skip_existing_topics: bool):
"""Добавить свойство "основная тема"""
if make_wd_links:
if skip_existing_topics:
if self.is_another_id_in_item_topics(p.itemWD, m_wp_page_item.id):
pwb.stdout(
'Item уже имеет темы, отличные от ручной ссылки. Возможно в ручной ссылке - дизамбиг')
return
if not self.is_id_in_item_topics(p.itemWD, m_wp_page_item.id):
self.add_main_subject(p.itemWD, target=m_wp_page_item)
if not self.is_id_in_item_describes(p, p.itemWD.id, m_wp_page_item):
self.add_article_in_subjectitem(p, m_wp_page_item, p.itemWD)
def add_main_subject(self, itemWD: ItemPage, target_id: str = None, target: ItemPage = None):
""" создать ссылку на элемент темы """
claim_topic_subject = self.claim_instance.claim_main_subject()
# pwb.Claim(self.WD, props.topic_subject)
if target_id:
wditem_subject = pwb.ItemPage(self.WD, target_id)
elif target:
wditem_subject = target
else:
return
# target = wd_item_ids[0]
claim_topic_subject.setTarget(wditem_subject)
if self.test_run:
return
itemWD.addClaim(claim_topic_subject, bot=self.as_bot, summary='moved from ruwikisource')
pwb.stdout(f'added main subject in item')
def add_article_in_subjectitem(self, p, subject_item: ItemPage, target_item: ItemPage):
""" создать "описывается в источниках" в элементе темы """
# s = get_item_from_listdict(other_sources, 'argument', m_item_id)
# [i.target for i in self.wd_item.claims.get(self.main_subject, [])]
claim_described_by = self.claim_instance.claim_described_by_source()
rootpagename = p.rootpagename
if p.rootpagename == 'Лентапедия' and p.title.endswith('/Полная версия'):
rootpagename = 'Лентапедия2'
target = p.enc_meta['wditem']
claim_described_by.setTarget(target)
qualifier = self.claim_instance.claim_dedicated_article()
# qualifier_target = pwb.ItemPage(self.WD, m_item_id)
qualifier.setTarget(target_item)
claim_described_by.addQualifier(qualifier)
if self.test_run:
return
subject_item.addClaim(claim_described_by, bot=self.as_bot, summary='moved from ruwikisource')
pwb.stdout(f'added item of article in subject item')
# def get_item(self, site, item_id: str = None, title: str = None, page=None):
# item = None
# try:
# if item_id:
# item = pwb.ItemPage(site, item_id)
# else:
# if title:
# page = self.get_page(site, title=title)
# elif page:
# page = self.get_page(site, page=page)
# if page and page.exists():
# item = page.data_item()
# if item:
# item.get()
# except pwb.exceptions.NoPageError:
# item = None
# return item
def get_item_by_title(self, site: pwb.Site, title: str) -> Optional[pwb.ItemPage]:
page = pwb.Page(site, title)
item = self.get_item_by_page(page)
return item
def get_item_by_id(self, item_id: str) -> Optional[pwb.ItemPage]:
if pwb.ItemPage.is_valid_id(item_id):
item = pwb.ItemPage(self.WD, item_id)
return self.load_item(item)
def get_item_by_page(self, page: pwb.Page) -> Optional[pwb.ItemPage]:
if not isinstance(page, pwb.Page): return
try:
item = page.data_item()
except pwb.exceptions.NoPageError:
pass
else:
return self.load_item(item)
def load_item(self, item: Optional[pwb.ItemPage]) -> Optional[pwb.ItemPage]:
if isinstance(item, pwb.ItemPage) and item.exists():
while item.isRedirectPage():
item = item.getRedirectTarget()
return item
def _get_item(self, site: pwb.Site, pg: Union[str, pwb.Page]) -> Optional[pwb.ItemPage]:
"""
:param site: pwb.Site
:param pg: (str): название страницы, или id элемента ('Qxxx'), или pwb.Page
:return: элемент или None
"""
item = None
if isinstance(pg, str):
if pwb.ItemPage.is_valid_id(pg):
item_id = pg
item = pwb.ItemPage(site, item_id)
else:
title = pg
pg = pwb.Page(site, title)
if isinstance(pg, pwb.Page):
try:
# item = pwb.ItemPage.fromPage(pg, lazy_load=True)
item = pg.data_item()
except pwb.exceptions.NoPageError:
pass
if item and item.exists():
item.get()
return item
def get_WPsite(self, pagename_raw: str) -> Tuple[Optional[pwb.Site], Optional[str]]:
# .target.title(with_ns=False)
pagename = None
site = self.WP
for _l in ('be-tarask', 'be-x-old'):
if _l in pagename_raw:
site = pwb.Site(_l, 'wikipedia')
pagename = pagename_raw.rpartition(_l + ':')[-1]
break
lnk_tmp = pwb.Link(pagename or pagename_raw, source=site)
lang_tmp = lnk_tmp.parse_site()[1]
try:
title_tmp = lnk_tmp.title
# except (pwb.exceptions.SiteDefinitionError, pwb.exceptions.InvalidTitle):
except:
'''Вероятно нестандартный языковый код страницы'''
return None, None
WP = self.sites.get(lang_tmp + 'wikipedia')
if not WP:
WP = self.sites[lang_tmp + 'wikipedia'] = pwb.Site(lang_tmp, 'wikipedia')
return WP, title_tmp
# def create_item_article(self):