From c8413cf5619677c9fdae817b69ce9e9f832b2c1d Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 29 Aug 2023 00:33:24 +0200 Subject: [PATCH 01/64] wikidata proof of concept --- openlibrary/core/models.py | 4 ++++ openlibrary/plugins/wikidata/code.py | 24 +++++++++++++++++++++ openlibrary/templates/type/author/view.html | 12 +++++++++++ 3 files changed, 40 insertions(+) create mode 100644 openlibrary/plugins/wikidata/code.py diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index b109ff4a629..9d5ff537413 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -38,6 +38,7 @@ from .waitinglist import WaitingLoan from ..accounts import OpenLibraryAccount from ..plugins.upstream.utils import get_coverstore_url, get_coverstore_public_url +from ..plugins.wikidata.code import get_wikidata_entity logger = logging.getLogger("openlibrary.core") @@ -756,6 +757,9 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" + def wikidata(self, QID: str = "Q44"): + return get_wikidata_entity(QID) + def __repr__(self): return "" % repr(self.key) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py new file mode 100644 index 00000000000..c43e6593433 --- /dev/null +++ b/openlibrary/plugins/wikidata/code.py @@ -0,0 +1,24 @@ +""" +First pass for incorporating wikidata into OpenLibrary author Pages. + +TODO: +- Cache responses from Wikidata (preferably in a table) + +""" +import requests +from dataclasses import dataclass + + +@dataclass +class WikiDataEntity: + descriptions: dict[str, str] + + def description(self, language: str = 'en') -> str | None: + return self.descriptions[language] + + +def get_wikidata_entity(QID: str) -> WikiDataEntity: + response = requests.get( + "https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/" + QID + ).json() + return WikiDataEntity(descriptions=response["descriptions"]) diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index d9588e80785..7fd3c455f7a 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -154,6 +154,18 @@

$:render_template("covers/change", page, ".bookCover img") + $if 'wikidata' in page.remote_ids: + $ wd_id = page.remote_ids["wikidata"] + $ wd_entity = page.wikidata(wd_id) +
+

Short Description: $wd_entity.description()

+ Powered by + Wikidata + + + +
+ $def render_subjects(label, subjects, prefix): $if subjects:
From a2c0f63fd2fd8b86de7b925c3184c291570f8b51 Mon Sep 17 00:00:00 2001 From: RayBB Date: Fri, 1 Sep 2023 02:03:38 +0200 Subject: [PATCH 02/64] first pass with postgres --- openlibrary/core/schema.sql | 6 ++++++ openlibrary/core/wikidata.py | 30 ++++++++++++++++++++++++++++ openlibrary/plugins/wikidata/code.py | 16 +++++++++++---- 3 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 openlibrary/core/wikidata.py diff --git a/openlibrary/core/schema.sql b/openlibrary/core/schema.sql index 8663cac577a..9f63082d59e 100644 --- a/openlibrary/core/schema.sql +++ b/openlibrary/core/schema.sql @@ -90,3 +90,9 @@ CREATE TABLE yearly_reading_goals ( updated timestamp without time zone default (current_timestamp at time zone 'utc'), primary key (username, year) ); + +CREATE TABLE wikidata ( + id text not null primary key, + data json, + updated timestamp without time zone default (current_timestamp at time zone 'utc') +) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py new file mode 100644 index 00000000000..07bb5dd30c5 --- /dev/null +++ b/openlibrary/core/wikidata.py @@ -0,0 +1,30 @@ +from openlibrary.core import db + + +class WikidataEntities(db.CommonExtras): + TABLENAME = "wikidata" + PRIMARY_KEY = "id" + + @classmethod + def get_by_id(cls, id) -> dict | None: + result = cls.get_by_ids([id]) + if len(result) > 0: + return result[0] + return None + + @classmethod + def get_by_ids(cls, ids: list[str]) -> list: # TODO typing??... + oldb = db.get_db() + query = 'select * from wikidata where id IN ($ids)' + return list(oldb.query(query, vars={'ids': ids})) + + @classmethod + def add(cls, id: str, data: dict) -> None: + oldb = db.get_db() + + wikidata_entities = cls.get_by_ids([id]) + if len(wikidata_entities) == 0: + return oldb.insert(cls.TABLENAME, id=id, data=data) + else: + where = "id=$id" + return oldb.update(cls.TABLENAME, where=where, id=id, data=data) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index c43e6593433..2d423eeecd6 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -8,6 +8,8 @@ import requests from dataclasses import dataclass +from openlibrary.core.wikidata import WikidataEntities + @dataclass class WikiDataEntity: @@ -18,7 +20,13 @@ def description(self, language: str = 'en') -> str | None: def get_wikidata_entity(QID: str) -> WikiDataEntity: - response = requests.get( - "https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/" + QID - ).json() - return WikiDataEntity(descriptions=response["descriptions"]) + entity = WikidataEntities.get_by_id(QID) + if not entity: + response = requests.get( + 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' + ).json() + # TODO check for 200? + WikidataEntities.add(QID, response) + return WikiDataEntity(descriptions=response["descriptions"]) + else: + return WikiDataEntity(descriptions=entity["descriptions"]) From 8039cb1f579fd2baf94e7bbadea850b2705bc86c Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 01:27:14 +0000 Subject: [PATCH 03/64] cleanup naming and add comments --- openlibrary/core/models.py | 6 +++-- openlibrary/core/wikidata.py | 29 ++++++++++++++------- openlibrary/plugins/wikidata/code.py | 21 ++++++++------- openlibrary/templates/type/author/view.html | 7 +++-- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 9d5ff537413..26bdc48b861 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -757,8 +757,10 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, QID: str = "Q44"): - return get_wikidata_entity(QID) + def wikidata(self): + if wd_id := self.remote_ids.get("wikidata"): + return get_wikidata_entity(wd_id) + return None def __repr__(self): return "" % repr(self.key) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 07bb5dd30c5..a2374e3871f 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -1,30 +1,41 @@ +""" +The purpose of this file is to interact with postgres in relation to Wikidata. +""" + +import json from openlibrary.core import db +class WikidataRow: + id: str + data: dict + updated: str + + class WikidataEntities(db.CommonExtras): TABLENAME = "wikidata" PRIMARY_KEY = "id" @classmethod - def get_by_id(cls, id) -> dict | None: - result = cls.get_by_ids([id]) - if len(result) > 0: + def get_by_id(cls, id) -> WikidataRow | None: + if len(result := cls.get_by_ids([id])) > 0: return result[0] return None @classmethod - def get_by_ids(cls, ids: list[str]) -> list: # TODO typing??... + def get_by_ids(cls, ids: list[str]) -> list[WikidataRow]: oldb = db.get_db() query = 'select * from wikidata where id IN ($ids)' return list(oldb.query(query, vars={'ids': ids})) @classmethod def add(cls, id: str, data: dict) -> None: + # TODO: when we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() + json_data = json.dumps(data) - wikidata_entities = cls.get_by_ids([id]) - if len(wikidata_entities) == 0: - return oldb.insert(cls.TABLENAME, id=id, data=data) - else: + if cls.get_by_id(id) is None: where = "id=$id" - return oldb.update(cls.TABLENAME, where=where, id=id, data=data) + return oldb.update(cls.TABLENAME, where=where, id=id, data=json_data) + else: + return oldb.insert(cls.TABLENAME, id=id, data=json_data) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 2d423eeecd6..9c541edd22e 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -1,9 +1,8 @@ """ -First pass for incorporating wikidata into OpenLibrary author Pages. - -TODO: -- Cache responses from Wikidata (preferably in a table) - +The purpose of this file is to: +1. Interact with the Wikidata API +2. Store the results +3. Make the results easy to access from other files """ import requests from dataclasses import dataclass @@ -13,6 +12,7 @@ @dataclass class WikiDataEntity: + id: str descriptions: dict[str, str] def description(self, language: str = 'en') -> str | None: @@ -20,13 +20,14 @@ def description(self, language: str = 'en') -> str | None: def get_wikidata_entity(QID: str) -> WikiDataEntity: - entity = WikidataEntities.get_by_id(QID) - if not entity: + if entity := WikidataEntities.get_by_id(QID): + return WikiDataEntity( + id=entity.data["id"], descriptions=entity.data["descriptions"] + ) + else: response = requests.get( 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' ).json() # TODO check for 200? WikidataEntities.add(QID, response) - return WikiDataEntity(descriptions=response["descriptions"]) - else: - return WikiDataEntity(descriptions=entity["descriptions"]) + return WikiDataEntity(id=response["id"], descriptions=response["descriptions"]) diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index 7fd3c455f7a..53086e43544 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -154,13 +154,12 @@

$:render_template("covers/change", page, ".bookCover img")

- $if 'wikidata' in page.remote_ids: - $ wd_id = page.remote_ids["wikidata"] - $ wd_entity = page.wikidata(wd_id) + $ wd_entity = page.wikidata() + $if wd_entity:

Short Description: $wd_entity.description()

Powered by - Wikidata + Wikidata From cd4acbc83dea86deec6779ad877dbc964f7aed49 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 01:50:13 +0000 Subject: [PATCH 04/64] add ttl check --- openlibrary/core/helpers.py | 5 +++++ openlibrary/core/wikidata.py | 3 ++- openlibrary/plugins/wikidata/code.py | 9 +++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/openlibrary/core/helpers.py b/openlibrary/core/helpers.py index 0d2f8f4318b..23ff43fbc09 100644 --- a/openlibrary/core/helpers.py +++ b/openlibrary/core/helpers.py @@ -148,6 +148,11 @@ def days_since(then, now=None): return abs(delta.days) +def seconds_since(then, now=None): + delta = then - (now or datetime.now()) + return abs(delta.seconds) + + def datestr(then, now=None, lang=None, relative=True): """Internationalized version of web.datestr.""" lang = lang or web.ctx.lang diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index a2374e3871f..ba6c9796883 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -2,6 +2,7 @@ The purpose of this file is to interact with postgres in relation to Wikidata. """ +from datetime import datetime import json from openlibrary.core import db @@ -9,7 +10,7 @@ class WikidataRow: id: str data: dict - updated: str + updated: datetime class WikidataEntities(db.CommonExtras): diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 9c541edd22e..4f154b88bc2 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -6,9 +6,12 @@ """ import requests from dataclasses import dataclass +from openlibrary.core.helpers import seconds_since from openlibrary.core.wikidata import WikidataEntities +MONTH_IN_SECONDS = 60 * 60 * 24 * 30 + @dataclass class WikiDataEntity: @@ -19,8 +22,10 @@ def description(self, language: str = 'en') -> str | None: return self.descriptions[language] -def get_wikidata_entity(QID: str) -> WikiDataEntity: - if entity := WikidataEntities.get_by_id(QID): +# ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache +def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity | None: + entity = WikidataEntities.get_by_id(QID) + if entity and seconds_since(entity.updated) < ttl: return WikiDataEntity( id=entity.data["id"], descriptions=entity.data["descriptions"] ) From dd61d6c7e6f8a6c3642c90e9169d686621b9f237 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 02:08:48 +0000 Subject: [PATCH 05/64] move wikidata to template and setup caching for templates --- openlibrary/core/models.py | 7 +++++-- openlibrary/templates/type/author/view.html | 9 +-------- openlibrary/templates/wikidata_author.html | 12 ++++++++++++ 3 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 openlibrary/templates/wikidata_author.html diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 26bdc48b861..8adfe08c1c1 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -757,9 +757,12 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self): + def wikidata(self, use_cache: bool = True): if wd_id := self.remote_ids.get("wikidata"): - return get_wikidata_entity(wd_id) + if use_cache: + return get_wikidata_entity(wd_id) + else: + return get_wikidata_entity(wd_id, 0) return None def __repr__(self): diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index 53086e43544..afffedf57e5 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -156,14 +156,7 @@

$ wd_entity = page.wikidata() $if wd_entity: -
-

Short Description: $wd_entity.description()

- Powered by - Wikidata - - - -
+ $:render_template("wikidata_author", wd_entity) $def render_subjects(label, subjects, prefix): $if subjects: diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html new file mode 100644 index 00000000000..9a19a6b1446 --- /dev/null +++ b/openlibrary/templates/wikidata_author.html @@ -0,0 +1,12 @@ +$def with (wd_entity) + +$if wd_entity: +
+

Short Description: $wd_entity.description()

+ Powered by + Wikidata + + + + +
\ No newline at end of file From ae14380a721fe29a9b03bebf1c550ade6823a71d Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 02:26:30 +0000 Subject: [PATCH 06/64] prettier infobox, user language --- openlibrary/plugins/wikidata/code.py | 3 ++- openlibrary/templates/type/author/edit.html | 7 ++++-- openlibrary/templates/wikidata_author.html | 28 +++++++++++++++------ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 4f154b88bc2..40279718f64 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -19,7 +19,8 @@ class WikiDataEntity: descriptions: dict[str, str] def description(self, language: str = 'en') -> str | None: - return self.descriptions[language] + # If a description isn't available in the requested language default to English + return self.descriptions.get(language) or self.descriptions.get('en') # ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index dcd1ee6ebc6..611333c5e09 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -22,8 +22,8 @@

$_("Edit Author")

-
-
+
+
@@ -35,6 +35,9 @@

$_("Edit Author")

+ $ wd_entity = page.wikidata() + $if wd_entity: + $:render_template("wikidata_author", wd_entity)
diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index 9a19a6b1446..cbb5bfed9b9 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -1,12 +1,24 @@ $def with (wd_entity) $if wd_entity: -
-

Short Description: $wd_entity.description()

- Powered by - Wikidata - - - - +
+ +

Quick Info

+
+

Short Description: $wd_entity.description(i18n.get_locale())

+ Powered by + Wikidata + + + + +
\ No newline at end of file From 7bd2c969e903d383b486ebdeb2f11029e75d3900 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 02:30:22 +0000 Subject: [PATCH 07/64] simplify renter template --- openlibrary/core/schema.sql | 2 +- openlibrary/templates/type/author/edit.html | 4 +--- openlibrary/templates/type/author/view.html | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/openlibrary/core/schema.sql b/openlibrary/core/schema.sql index 9f63082d59e..94b53ad00d6 100644 --- a/openlibrary/core/schema.sql +++ b/openlibrary/core/schema.sql @@ -92,7 +92,7 @@ CREATE TABLE yearly_reading_goals ( ); CREATE TABLE wikidata ( - id text not null primary key, + id text not null primary key, data json, updated timestamp without time zone default (current_timestamp at time zone 'utc') ) diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 611333c5e09..d2702657b4c 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -35,9 +35,7 @@

$_("Edit Author")

- $ wd_entity = page.wikidata() - $if wd_entity: - $:render_template("wikidata_author", wd_entity) + $:render_template("wikidata_author", page.wikidata())
diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index afffedf57e5..dc0505ed295 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -154,9 +154,7 @@

$:render_template("covers/change", page, ".bookCover img")

- $ wd_entity = page.wikidata() - $if wd_entity: - $:render_template("wikidata_author", wd_entity) + $:render_template("wikidata_author", page.wikidata()) $def render_subjects(label, subjects, prefix): $if subjects: From 23cb83b7d329c182b876f6cf0a6a6255fc2a7946 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 02:35:43 +0000 Subject: [PATCH 08/64] 200 check --- openlibrary/plugins/wikidata/code.py | 15 +++++++++++---- openlibrary/templates/type/author/edit.html | 2 +- openlibrary/templates/wikidata_author.html | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 40279718f64..e5410130608 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -33,7 +33,14 @@ def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity else: response = requests.get( 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' - ).json() - # TODO check for 200? - WikidataEntities.add(QID, response) - return WikiDataEntity(id=response["id"], descriptions=response["descriptions"]) + ) + if response.status_code == 200: + response_json = response.json() + WikidataEntities.add(QID, response_json) + return WikiDataEntity( + id=response_json["id"], descriptions=response_json["descriptions"] + ) + else: + return None + # TODO: What should we do in non-200 cases? + # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index d2702657b4c..5caf28c7f10 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -21,7 +21,7 @@

$_("Edit Author")

- +
diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index cbb5bfed9b9..d38e4de68e9 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -21,4 +21,4 @@

Quick Info

-
\ No newline at end of file +
From b826f6aab949ef43a9b5f4bea63050ea4df67640 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 03:15:08 +0000 Subject: [PATCH 09/64] fix inserting --- openlibrary/core/wikidata.py | 5 ++--- openlibrary/plugins/wikidata/code.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index ba6c9796883..9880bf63946 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -15,7 +15,6 @@ class WikidataRow: class WikidataEntities(db.CommonExtras): TABLENAME = "wikidata" - PRIMARY_KEY = "id" @classmethod def get_by_id(cls, id) -> WikidataRow | None: @@ -36,7 +35,7 @@ def add(cls, id: str, data: dict) -> None: json_data = json.dumps(data) if cls.get_by_id(id) is None: + return oldb.insert(cls.TABLENAME, id=id, data=json_data) + else: where = "id=$id" return oldb.update(cls.TABLENAME, where=where, id=id, data=json_data) - else: - return oldb.insert(cls.TABLENAME, id=id, data=json_data) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index e5410130608..b81e87136c4 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -32,7 +32,7 @@ def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity ) else: response = requests.get( - 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' + f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' ) if response.status_code == 200: response_json = response.json() From f6df97fdf5a2a022783087a61fb42f43601e5045 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 03:52:20 +0000 Subject: [PATCH 10/64] fix bug with inserting vars --- openlibrary/core/wikidata.py | 9 +++++---- openlibrary/plugins/wikidata/code.py | 10 +++------- openlibrary/templates/type/author/edit.html | 2 +- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 9880bf63946..425ad21d27b 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -34,8 +34,9 @@ def add(cls, id: str, data: dict) -> None: oldb = db.get_db() json_data = json.dumps(data) - if cls.get_by_id(id) is None: - return oldb.insert(cls.TABLENAME, id=id, data=json_data) + if cls.get_by_id(id): + return oldb.update( + cls.TABLENAME, where="id=$id", vars={'id': id}, data=json_data + ) else: - where = "id=$id" - return oldb.update(cls.TABLENAME, where=where, id=id, data=json_data) + return oldb.insert(cls.TABLENAME, id=id, data=json_data) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index b81e87136c4..c4fa94c1d70 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -27,19 +27,15 @@ def description(self, language: str = 'en') -> str | None: def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity | None: entity = WikidataEntities.get_by_id(QID) if entity and seconds_since(entity.updated) < ttl: - return WikiDataEntity( - id=entity.data["id"], descriptions=entity.data["descriptions"] - ) + return WikiDataEntity(id=QID, descriptions=entity.data["descriptions"]) else: response = requests.get( f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' ) if response.status_code == 200: response_json = response.json() - WikidataEntities.add(QID, response_json) - return WikiDataEntity( - id=response_json["id"], descriptions=response_json["descriptions"] - ) + WikidataEntities.add(id=QID, data=response_json) + return WikiDataEntity(id=QID, descriptions=response_json["descriptions"]) else: return None # TODO: What should we do in non-200 cases? diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 5caf28c7f10..27255aa6a30 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -35,7 +35,7 @@

$_("Edit Author")

- $:render_template("wikidata_author", page.wikidata()) + $:render_template("wikidata_author", page.wikidata(use_cache=False))
From 8438f71c2f4116b6425e73554165f4ba3f8bed2d Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 11:26:16 +0000 Subject: [PATCH 11/64] use Optional[] --- openlibrary/core/models.py | 6 +++--- openlibrary/core/wikidata.py | 3 ++- openlibrary/plugins/wikidata/code.py | 7 +++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 8adfe08c1c1..a2cb644f1d7 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -8,7 +8,7 @@ import web import json import requests -from typing import Any +from typing import Any, Optional from collections import defaultdict from dataclasses import dataclass, field @@ -38,7 +38,7 @@ from .waitinglist import WaitingLoan from ..accounts import OpenLibraryAccount from ..plugins.upstream.utils import get_coverstore_url, get_coverstore_public_url -from ..plugins.wikidata.code import get_wikidata_entity +from ..plugins.wikidata.code import WikiDataEntity, get_wikidata_entity logger = logging.getLogger("openlibrary.core") @@ -757,7 +757,7 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, use_cache: bool = True): + def wikidata(self, use_cache: bool = True) -> Optional[WikiDataEntity]: if wd_id := self.remote_ids.get("wikidata"): if use_cache: return get_wikidata_entity(wd_id) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 425ad21d27b..29427cf0a0e 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -4,6 +4,7 @@ from datetime import datetime import json +from typing import Optional from openlibrary.core import db @@ -17,7 +18,7 @@ class WikidataEntities(db.CommonExtras): TABLENAME = "wikidata" @classmethod - def get_by_id(cls, id) -> WikidataRow | None: + def get_by_id(cls, id: str) -> Optional[WikidataRow]: if len(result := cls.get_by_ids([id])) > 0: return result[0] return None diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index c4fa94c1d70..54b1f6837e7 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -4,6 +4,7 @@ 2. Store the results 3. Make the results easy to access from other files """ +from typing import Optional import requests from dataclasses import dataclass from openlibrary.core.helpers import seconds_since @@ -18,13 +19,15 @@ class WikiDataEntity: id: str descriptions: dict[str, str] - def description(self, language: str = 'en') -> str | None: + def description(self, language: str = 'en') -> Optional[str]: # If a description isn't available in the requested language default to English return self.descriptions.get(language) or self.descriptions.get('en') # ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache -def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity | None: +def get_wikidata_entity( + QID: str, ttl: int = MONTH_IN_SECONDS +) -> Optional[WikiDataEntity]: entity = WikidataEntities.get_by_id(QID) if entity and seconds_since(entity.updated) < ttl: return WikiDataEntity(id=QID, descriptions=entity.data["descriptions"]) From 58842ecc1ca4711fa5609e76a188dc5653e33280 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 11:27:58 +0000 Subject: [PATCH 12/64] move svg to file --- openlibrary/templates/wikidata_author.html | 3 +-- static/images/icons/edit.svg | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 static/images/icons/edit.svg diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index d38e4de68e9..a0406fa0911 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -16,8 +16,7 @@

Quick InfoShort Description: $wd_entity.description(i18n.get_locale())

Powered by Wikidata - - +

diff --git a/static/images/icons/edit.svg b/static/images/icons/edit.svg new file mode 100644 index 00000000000..f287625205f --- /dev/null +++ b/static/images/icons/edit.svg @@ -0,0 +1,2 @@ + + \ No newline at end of file From b52940285514da77661ca6e6f5c1e0428143322a Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 11:41:58 +0000 Subject: [PATCH 13/64] note about QIDs --- openlibrary/core/models.py | 4 ++-- openlibrary/plugins/wikidata/code.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index a2cb644f1d7..f78e8669ece 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -760,9 +760,9 @@ def get_url_suffix(self): def wikidata(self, use_cache: bool = True) -> Optional[WikiDataEntity]: if wd_id := self.remote_ids.get("wikidata"): if use_cache: - return get_wikidata_entity(wd_id) + return get_wikidata_entity(QID=wd_id) else: - return get_wikidata_entity(wd_id, 0) + return get_wikidata_entity(QID=wd_id, ttl=0) return None def __repr__(self): diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 54b1f6837e7..8540a29f682 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -24,10 +24,13 @@ def description(self, language: str = 'en') -> Optional[str]: return self.descriptions.get(language) or self.descriptions.get('en') -# ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache def get_wikidata_entity( QID: str, ttl: int = MONTH_IN_SECONDS ) -> Optional[WikiDataEntity]: + """ + This only supports QIDs, if we want to support PIDs we need to use different endpoints + ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache + """ entity = WikidataEntities.get_by_id(QID) if entity and seconds_since(entity.updated) < ttl: return WikiDataEntity(id=QID, descriptions=entity.data["descriptions"]) From ff83ce4a388a23b3590f4d7e346b752694015d47 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Fri, 1 Sep 2023 11:44:53 +0000 Subject: [PATCH 14/64] comment to docstring --- openlibrary/plugins/wikidata/code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 8540a29f682..bc5478b4cd5 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -20,7 +20,7 @@ class WikiDataEntity: descriptions: dict[str, str] def description(self, language: str = 'en') -> Optional[str]: - # If a description isn't available in the requested language default to English + """If a description isn't available in the requested language default to English""" return self.descriptions.get(language) or self.descriptions.get('en') From 3e7fe15d6ed196e16eaa1870b82d0df6cf2bb5da Mon Sep 17 00:00:00 2001 From: RayBB Date: Fri, 1 Sep 2023 17:48:51 +0200 Subject: [PATCH 15/64] remove optionals --- openlibrary/core/models.py | 4 ++-- openlibrary/core/wikidata.py | 3 +-- openlibrary/plugins/wikidata/code.py | 7 ++----- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index f78e8669ece..b772a17f9ea 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -8,7 +8,7 @@ import web import json import requests -from typing import Any, Optional +from typing import Any from collections import defaultdict from dataclasses import dataclass, field @@ -757,7 +757,7 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, use_cache: bool = True) -> Optional[WikiDataEntity]: + def wikidata(self, use_cache: bool = True) -> WikiDataEntity | None: if wd_id := self.remote_ids.get("wikidata"): if use_cache: return get_wikidata_entity(QID=wd_id) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 29427cf0a0e..314dd3bf8c9 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -4,7 +4,6 @@ from datetime import datetime import json -from typing import Optional from openlibrary.core import db @@ -18,7 +17,7 @@ class WikidataEntities(db.CommonExtras): TABLENAME = "wikidata" @classmethod - def get_by_id(cls, id: str) -> Optional[WikidataRow]: + def get_by_id(cls, id: str) -> WikidataRow | None: if len(result := cls.get_by_ids([id])) > 0: return result[0] return None diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index bc5478b4cd5..546cdb90618 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -4,7 +4,6 @@ 2. Store the results 3. Make the results easy to access from other files """ -from typing import Optional import requests from dataclasses import dataclass from openlibrary.core.helpers import seconds_since @@ -19,14 +18,12 @@ class WikiDataEntity: id: str descriptions: dict[str, str] - def description(self, language: str = 'en') -> Optional[str]: + def description(self, language: str = 'en') -> str | None: """If a description isn't available in the requested language default to English""" return self.descriptions.get(language) or self.descriptions.get('en') -def get_wikidata_entity( - QID: str, ttl: int = MONTH_IN_SECONDS -) -> Optional[WikiDataEntity]: +def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache From d775c72c596607548dd5ed5f1f22ccebf3dc2b6a Mon Sep 17 00:00:00 2001 From: RayBB Date: Fri, 1 Sep 2023 23:11:44 +0200 Subject: [PATCH 16/64] use the read-options css --- openlibrary/templates/wikidata_author.html | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index a0406fa0911..3d4a0ea0299 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -1,19 +1,15 @@ $def with (wd_entity) $if wd_entity: -
+

Quick Info

-

Short Description: $wd_entity.description(i18n.get_locale())

+

$wd_entity.description(i18n.get_locale())

Powered by Wikidata From 2523f5beb90bbe26779a0a77c42e01be1b2577d9 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sat, 2 Sep 2023 00:43:20 +0200 Subject: [PATCH 17/64] add __init__.py --- openlibrary/plugins/wikidata/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 openlibrary/plugins/wikidata/__init__.py diff --git a/openlibrary/plugins/wikidata/__init__.py b/openlibrary/plugins/wikidata/__init__.py new file mode 100644 index 00000000000..5c7a3ff6c0b --- /dev/null +++ b/openlibrary/plugins/wikidata/__init__.py @@ -0,0 +1 @@ +'wikidata plugin.' From d5ad3c3028f515704f45e8baa6e6a95dcf724e07 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sun, 24 Sep 2023 22:44:49 +0200 Subject: [PATCH 18/64] address some small feedback --- openlibrary/core/wikidata.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 314dd3bf8c9..b28ab787f40 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -13,7 +13,7 @@ class WikidataRow: updated: datetime -class WikidataEntities(db.CommonExtras): +class WikidataEntities: TABLENAME = "wikidata" @classmethod @@ -24,9 +24,12 @@ def get_by_id(cls, id: str) -> WikidataRow | None: @classmethod def get_by_ids(cls, ids: list[str]) -> list[WikidataRow]: - oldb = db.get_db() - query = 'select * from wikidata where id IN ($ids)' - return list(oldb.query(query, vars={'ids': ids})) + return list( + db.get_db().query( + 'select * from wikidata where id IN ($ids)', + vars={'ids': ids}, + ) + ) @classmethod def add(cls, id: str, data: dict) -> None: From bb7359a29d30a353b0bbd34307c4135f7f6002be Mon Sep 17 00:00:00 2001 From: Raymond Berger Date: Sun, 24 Sep 2023 22:47:06 +0200 Subject: [PATCH 19/64] Update openlibrary/templates/wikidata_author.html Co-authored-by: Drini Cami --- openlibrary/templates/wikidata_author.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index 3d4a0ea0299..d2a3081f478 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -7,7 +7,7 @@ margin-bottom: 0; } -

Quick Info

+

$_('Quick Info')

From bfb87581c0323bde46afc28864d0d2938bbe7374 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sun, 24 Sep 2023 23:53:18 +0200 Subject: [PATCH 21/64] move css to less file --- openlibrary/core/wikidata.py | 1 + openlibrary/templates/wikidata_author.html | 11 +++-------- static/css/page-user.less | 2 ++ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index b28ab787f40..4af91a65463 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -24,6 +24,7 @@ def get_by_id(cls, id: str) -> WikidataRow | None: @classmethod def get_by_ids(cls, ids: list[str]) -> list[WikidataRow]: + # TODO: convert to WikidataRow ? return list( db.get_db().query( 'select * from wikidata where id IN ($ids)', diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index 2e4f419507e..c8d76ad701a 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -2,15 +2,10 @@ $if wd_entity:
- -

$_('Quick Info')

-
+

$_('Quick Info')

+

$wd_entity.description(i18n.get_locale())

- $_('Powered by Wikidata') + $_('Powered by Wikidata')
diff --git a/static/css/page-user.less b/static/css/page-user.less index 0f06bcb795a..58cb1cd1f0c 100644 --- a/static/css/page-user.less +++ b/static/css/page-user.less @@ -235,3 +235,5 @@ tr.table-row.selected{ // Import styles for want-to-read buttons @import (less) "legacy-tools.less"; @import (less) "components/mybooks-dropper.less"; +// Import styles for wikidatabox +@import (less) "components/wikidatabox.less"; From 2da53d97fa6e14fe0ccb47cf50da712a61168860 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sun, 24 Sep 2023 23:57:04 +0200 Subject: [PATCH 22/64] add less file --- static/css/components/wikidatabox.less | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 static/css/components/wikidatabox.less diff --git a/static/css/components/wikidatabox.less b/static/css/components/wikidatabox.less new file mode 100644 index 00000000000..f62a72ce510 --- /dev/null +++ b/static/css/components/wikidatabox.less @@ -0,0 +1,15 @@ +.wikidatabox{ + p { + margin-bottom: 0; + } + h3 { + margin-top: 2px; + margin-bottom: 2px; + } + div { + margin: 5px; + } + .powered-by-link{ + font-size: .6em; + } +} From 4172212f7e1fea8337b27af0450af8c9586872f0 Mon Sep 17 00:00:00 2001 From: RayBB Date: Mon, 25 Sep 2023 00:39:21 +0200 Subject: [PATCH 23/64] move css to less --- openlibrary/templates/type/author/edit.html | 5 ++--- static/css/components/form.olform.less | 11 +++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 27255aa6a30..eef9e5f5bd3 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -21,9 +21,8 @@

$_("Edit Author")

- -
-
+
+
diff --git a/static/css/components/form.olform.less b/static/css/components/form.olform.less index f87423c3913..1e6d6405195 100644 --- a/static/css/components/form.olform.less +++ b/static/css/components/form.olform.less @@ -193,6 +193,10 @@ color: @grey; font-family: @lucida_sans_serif-1 !important; } + .nameAndWikidata { + display: flex; + flex-direction: column; + } } .olform__input--large { @@ -260,6 +264,13 @@ } } } + + .nameAndWikidata { + flex-direction: row; + .formElement{ + flex-basis: calc(2 / 3 * 100%); + } + } } /* stylelint-enable selector-max-specificity */ } From 17dd9d54567b87456a4c5ff02bc0a59670494822 Mon Sep 17 00:00:00 2001 From: RayBB Date: Mon, 25 Sep 2023 00:50:02 +0200 Subject: [PATCH 24/64] simplify cache --- openlibrary/core/helpers.py | 5 ----- openlibrary/plugins/wikidata/code.py | 8 +++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/openlibrary/core/helpers.py b/openlibrary/core/helpers.py index 23ff43fbc09..0d2f8f4318b 100644 --- a/openlibrary/core/helpers.py +++ b/openlibrary/core/helpers.py @@ -148,11 +148,6 @@ def days_since(then, now=None): return abs(delta.days) -def seconds_since(then, now=None): - delta = then - (now or datetime.now()) - return abs(delta.seconds) - - def datestr(then, now=None, lang=None, relative=True): """Internationalized version of web.datestr.""" lang = lang or web.ctx.lang diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index 546cdb90618..f521b34f65f 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -6,12 +6,10 @@ """ import requests from dataclasses import dataclass -from openlibrary.core.helpers import seconds_since +from openlibrary.core.helpers import days_since from openlibrary.core.wikidata import WikidataEntities -MONTH_IN_SECONDS = 60 * 60 * 24 * 30 - @dataclass class WikiDataEntity: @@ -23,13 +21,13 @@ def description(self, language: str = 'en') -> str | None: return self.descriptions.get(language) or self.descriptions.get('en') -def get_wikidata_entity(QID: str, ttl: int = MONTH_IN_SECONDS) -> WikiDataEntity | None: +def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache """ entity = WikidataEntities.get_by_id(QID) - if entity and seconds_since(entity.updated) < ttl: + if entity and days_since(entity.updated) < ttl_days: return WikiDataEntity(id=QID, descriptions=entity.data["descriptions"]) else: response = requests.get( From 31280cf17c9f11cb9f8c2349f6e9385d1c491b95 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 01:31:10 +0200 Subject: [PATCH 25/64] first steps to refactor python --- openlibrary/core/models.py | 3 +- openlibrary/core/wikidata.py | 112 +++++++++++++++++++++++---- openlibrary/plugins/wikidata/code.py | 43 ---------- 3 files changed, 101 insertions(+), 57 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index b772a17f9ea..3f1605c20cd 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -28,6 +28,7 @@ from openlibrary.core.ratings import Ratings from openlibrary.utils import extract_numeric_id_from_olid, dateutil from openlibrary.utils.isbn import to_isbn_13, isbn_13_to_isbn_10, canonical +from openlibrary.core.wikidata import WikiDataEntity, get_wikidata_entity from . import cache, waitinglist @@ -38,7 +39,7 @@ from .waitinglist import WaitingLoan from ..accounts import OpenLibraryAccount from ..plugins.upstream.utils import get_coverstore_url, get_coverstore_public_url -from ..plugins.wikidata.code import WikiDataEntity, get_wikidata_entity + logger = logging.getLogger("openlibrary.core") diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 4af91a65463..858e8267307 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -1,12 +1,96 @@ """ -The purpose of this file is to interact with postgres in relation to Wikidata. +The purpose of this file is to: +1. Interact with the Wikidata API +2. Store the results +3. Make the results easy to access from other files """ +import requests +import web +from dataclasses import dataclass +from openlibrary.core.helpers import days_since from datetime import datetime import json from openlibrary.core import db +@dataclass +class WikiDataAPIResponse: + type: str + labels: dict + descriptions: dict + aliases: dict + statements: dict + sitelinks: dict + id: str + + @classmethod + def from_dict(cls, data: dict): + return cls( + type=data.get('type'), + labels=data.get('labels'), + descriptions=data.get('descriptions'), + aliases=data.get('aliases'), + statements=data.get('statements'), + sitelinks=data.get('sitelinks'), + id=data.get('id'), + ) + + +@dataclass +class WikiDataEntity: + id: str + data: WikiDataAPIResponse + updated: datetime + + def description(self, language: str = 'en') -> str | None: + """If a description isn't available in the requested language default to English""" + return self.data.descriptions.get(language) or self.data.descriptions.get('en') + + @classmethod + def from_db_query(cls, data: web.utils.Storage): + return cls( + id=data.id, + data=data.data, + updated=data.updated, + ) + + +def _get_from_web(id: str) -> WikiDataEntity | None: + response = requests.get( + f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' + ) + if response.status_code == 200: + response_json = response.json() + _add_to_cache(id=id, data=response_json) + return WikiDataEntity( + id=id, + data=WikiDataAPIResponse.from_dict(response_json), + updated=datetime.now(), + ) + else: + return None + # TODO: What should we do in non-200 cases? + # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ + + +def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: + """ + This only supports QIDs, if we want to support PIDs we need to use different endpoints + ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache + """ + + entity = WikidataEntities.get_by_id(QID) + if entity and days_since(entity.updated) < ttl_days: + return WikiDataEntity( + id=QID, + data=WikiDataAPIResponse.from_dict(entity.data), + updated=datetime.now(), + ) + else: + return _get_from_web(QID) + + class WikidataRow: id: str data: dict @@ -32,15 +116,17 @@ def get_by_ids(cls, ids: list[str]) -> list[WikidataRow]: ) ) - @classmethod - def add(cls, id: str, data: dict) -> None: - # TODO: when we upgrade to postgres 9.5+ we should use upsert here - oldb = db.get_db() - json_data = json.dumps(data) - - if cls.get_by_id(id): - return oldb.update( - cls.TABLENAME, where="id=$id", vars={'id': id}, data=json_data - ) - else: - return oldb.insert(cls.TABLENAME, id=id, data=json_data) + +# TODO: typehint the data? +def _add_to_cache(id: str, data: dict) -> None: + # TODO: when we upgrade to postgres 9.5+ we should use upsert here + oldb = db.get_db() + json_data = json.dumps(data) + cls = WikidataEntities + + if cls.get_by_id(id): + return oldb.update( + cls.TABLENAME, where="id=$id", vars={'id': id}, data=json_data + ) + else: + return oldb.insert(cls.TABLENAME, id=id, data=json_data) diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py index f521b34f65f..e69de29bb2d 100644 --- a/openlibrary/plugins/wikidata/code.py +++ b/openlibrary/plugins/wikidata/code.py @@ -1,43 +0,0 @@ -""" -The purpose of this file is to: -1. Interact with the Wikidata API -2. Store the results -3. Make the results easy to access from other files -""" -import requests -from dataclasses import dataclass -from openlibrary.core.helpers import days_since - -from openlibrary.core.wikidata import WikidataEntities - - -@dataclass -class WikiDataEntity: - id: str - descriptions: dict[str, str] - - def description(self, language: str = 'en') -> str | None: - """If a description isn't available in the requested language default to English""" - return self.descriptions.get(language) or self.descriptions.get('en') - - -def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: - """ - This only supports QIDs, if we want to support PIDs we need to use different endpoints - ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache - """ - entity = WikidataEntities.get_by_id(QID) - if entity and days_since(entity.updated) < ttl_days: - return WikiDataEntity(id=QID, descriptions=entity.data["descriptions"]) - else: - response = requests.get( - f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{QID}' - ) - if response.status_code == 200: - response_json = response.json() - WikidataEntities.add(id=QID, data=response_json) - return WikiDataEntity(id=QID, descriptions=response_json["descriptions"]) - else: - return None - # TODO: What should we do in non-200 cases? - # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ From a646b4b5c4e8cace25a0d09753b3152055d77d31 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 01:35:08 +0200 Subject: [PATCH 26/64] get rid of WikidataEntities --- openlibrary/core/wikidata.py | 39 +++++++++++++++--------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 858e8267307..6cec40a97d6 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -80,7 +80,7 @@ def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache """ - entity = WikidataEntities.get_by_id(QID) + entity = _get_from_cache(QID) if entity and days_since(entity.updated) < ttl_days: return WikiDataEntity( id=QID, @@ -97,24 +97,20 @@ class WikidataRow: updated: datetime -class WikidataEntities: - TABLENAME = "wikidata" +def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataRow]: + # TODO: convert to WikidataRow ? + return list( + db.get_db().query( + 'select * from wikidata where id IN ($ids)', + vars={'ids': ids}, + ) + ) - @classmethod - def get_by_id(cls, id: str) -> WikidataRow | None: - if len(result := cls.get_by_ids([id])) > 0: - return result[0] - return None - @classmethod - def get_by_ids(cls, ids: list[str]) -> list[WikidataRow]: - # TODO: convert to WikidataRow ? - return list( - db.get_db().query( - 'select * from wikidata where id IN ($ids)', - vars={'ids': ids}, - ) - ) +def _get_from_cache(id: str) -> WikiDataEntity | None: + if len(result := _get_from_cache_by_ids([id])) > 0: + return result[0] + return None # TODO: typehint the data? @@ -122,11 +118,8 @@ def _add_to_cache(id: str, data: dict) -> None: # TODO: when we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() json_data = json.dumps(data) - cls = WikidataEntities - if cls.get_by_id(id): - return oldb.update( - cls.TABLENAME, where="id=$id", vars={'id': id}, data=json_data - ) + if _get_from_cache(id): + return oldb.update("wikidata", where="id=$id", vars={'id': id}, data=json_data) else: - return oldb.insert(cls.TABLENAME, id=id, data=json_data) + return oldb.insert("wikidata", id=id, data=json_data) From ddc5e9ff200a2ce3afd886fc7f79d1ef2a53cf92 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 01:53:24 +0200 Subject: [PATCH 27/64] remove wikidatarow --- openlibrary/core/models.py | 2 +- openlibrary/core/wikidata.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 3f1605c20cd..876f49514e5 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -763,7 +763,7 @@ def wikidata(self, use_cache: bool = True) -> WikiDataEntity | None: if use_cache: return get_wikidata_entity(QID=wd_id) else: - return get_wikidata_entity(QID=wd_id, ttl=0) + return get_wikidata_entity(QID=wd_id, ttl_days=0) return None def __repr__(self): diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 6cec40a97d6..6d9dc3225f0 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -6,6 +6,7 @@ """ import requests import web +import dataclasses from dataclasses import dataclass from openlibrary.core.helpers import days_since @@ -62,12 +63,13 @@ def _get_from_web(id: str) -> WikiDataEntity | None: ) if response.status_code == 200: response_json = response.json() - _add_to_cache(id=id, data=response_json) - return WikiDataEntity( + entity = WikiDataEntity( id=id, data=WikiDataAPIResponse.from_dict(response_json), updated=datetime.now(), ) + _add_to_cache(id=id, data=dataclasses.asdict(entity.data)) + return entity else: return None # TODO: What should we do in non-200 cases? @@ -91,20 +93,14 @@ def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: return _get_from_web(QID) -class WikidataRow: - id: str - data: dict - updated: datetime - - -def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataRow]: - # TODO: convert to WikidataRow ? - return list( +def _get_from_cache_by_ids(ids: list[str]) -> list[WikiDataEntity]: + response = list( db.get_db().query( 'select * from wikidata where id IN ($ids)', vars={'ids': ids}, ) ) + return [WikiDataEntity.from_db_query(r) for r in response] def _get_from_cache(id: str) -> WikiDataEntity | None: From e4f25563192d7bc3a1a72218b788337c87375a09 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:03:56 +0200 Subject: [PATCH 28/64] cache typehints --- openlibrary/core/wikidata.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 6d9dc3225f0..bb1b4e27f61 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -49,11 +49,11 @@ def description(self, language: str = 'en') -> str | None: return self.data.descriptions.get(language) or self.data.descriptions.get('en') @classmethod - def from_db_query(cls, data: web.utils.Storage): + def from_db_query(cls, response: web.utils.Storage): return cls( - id=data.id, - data=data.data, - updated=data.updated, + id=response.id, + data=response.data, # TODO: convert this? + updated=response.updated, ) @@ -68,7 +68,7 @@ def _get_from_web(id: str) -> WikiDataEntity | None: data=WikiDataAPIResponse.from_dict(response_json), updated=datetime.now(), ) - _add_to_cache(id=id, data=dataclasses.asdict(entity.data)) + _add_to_cache(entity) return entity else: return None @@ -84,11 +84,7 @@ def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: entity = _get_from_cache(QID) if entity and days_since(entity.updated) < ttl_days: - return WikiDataEntity( - id=QID, - data=WikiDataAPIResponse.from_dict(entity.data), - updated=datetime.now(), - ) + return entity else: return _get_from_web(QID) @@ -109,13 +105,14 @@ def _get_from_cache(id: str) -> WikiDataEntity | None: return None -# TODO: typehint the data? -def _add_to_cache(id: str, data: dict) -> None: +def _add_to_cache(entity: WikiDataEntity) -> None: # TODO: when we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() - json_data = json.dumps(data) + json_data = json.dumps(dataclasses.asdict(entity.data)) - if _get_from_cache(id): - return oldb.update("wikidata", where="id=$id", vars={'id': id}, data=json_data) + if _get_from_cache(entity.id): + return oldb.update( + "wikidata", where="id=$id", vars={'id': entity.id}, data=json_data + ) else: - return oldb.insert("wikidata", id=id, data=json_data) + return oldb.insert("wikidata", id=entity.id, data=json_data) From 5fb0af661ac1ce54d397f78f6ab43f727bc13bac Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:15:00 +0200 Subject: [PATCH 29/64] fix from_db_query --- openlibrary/core/wikidata.py | 40 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index bb1b4e27f61..db2bc0a765f 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -52,28 +52,18 @@ def description(self, language: str = 'en') -> str | None: def from_db_query(cls, response: web.utils.Storage): return cls( id=response.id, - data=response.data, # TODO: convert this? + data=WikiDataAPIResponse.from_dict(response.data), updated=response.updated, ) - -def _get_from_web(id: str) -> WikiDataEntity | None: - response = requests.get( - f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' - ) - if response.status_code == 200: - response_json = response.json() - entity = WikiDataEntity( - id=id, - data=WikiDataAPIResponse.from_dict(response_json), - updated=datetime.now(), + @classmethod + def from_web(cls, response: dict): + data = WikiDataAPIResponse.from_dict(response.data) + return cls( + id=data.id, + data=data, + updated=datetime.now, ) - _add_to_cache(entity) - return entity - else: - return None - # TODO: What should we do in non-200 cases? - # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: @@ -89,6 +79,20 @@ def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: return _get_from_web(QID) +def _get_from_web(id: str) -> WikiDataEntity | None: + response = requests.get( + f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' + ) + if response.status_code == 200: + entity = WikiDataEntity.from_web(response.json()) + _add_to_cache(entity) + return entity + else: + return None + # TODO: What should we do in non-200 cases? + # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ + + def _get_from_cache_by_ids(ids: list[str]) -> list[WikiDataEntity]: response = list( db.get_db().query( From c50a3b5a783ef0dcd5cd6cd643337d5e77753271 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:18:03 +0200 Subject: [PATCH 30/64] fix datetime --- openlibrary/core/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index db2bc0a765f..85cf290de3f 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -62,7 +62,7 @@ def from_web(cls, response: dict): return cls( id=data.id, data=data, - updated=datetime.now, + updated=datetime.now(), ) From 32c19179d503afac51f3907485e13f06a05b5e2c Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:22:02 +0200 Subject: [PATCH 31/64] fix dict --- openlibrary/core/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 85cf290de3f..4fa398d8e83 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -58,7 +58,7 @@ def from_db_query(cls, response: web.utils.Storage): @classmethod def from_web(cls, response: dict): - data = WikiDataAPIResponse.from_dict(response.data) + data = WikiDataAPIResponse.from_dict(response) return cls( id=data.id, data=data, From e24ffcfb5ba271602033f326fee0d796bcae7f42 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:25:58 +0200 Subject: [PATCH 32/64] use [] --- openlibrary/core/wikidata.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 4fa398d8e83..6fe70e4f2c8 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -27,14 +27,15 @@ class WikiDataAPIResponse: @classmethod def from_dict(cls, data: dict): + # use [''] instead of get so this fails if fields are missing return cls( - type=data.get('type'), - labels=data.get('labels'), - descriptions=data.get('descriptions'), - aliases=data.get('aliases'), - statements=data.get('statements'), - sitelinks=data.get('sitelinks'), - id=data.get('id'), + type=data['type'], + labels=data['labels'], + descriptions=data['descriptions'], + aliases=data['aliases'], + statements=data['statements'], + sitelinks=data['sitelinks'], + id=data['id'], ) From cffc7d6fec7eb6fefe7f4b520a33141175c256c5 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 26 Sep 2023 02:28:09 +0200 Subject: [PATCH 33/64] remove extra blank line --- openlibrary/core/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 876f49514e5..58c98e38d91 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -40,7 +40,6 @@ from ..accounts import OpenLibraryAccount from ..plugins.upstream.utils import get_coverstore_url, get_coverstore_public_url - logger = logging.getLogger("openlibrary.core") From aee7edd5441b79d1a82ae96fe6f9041bbfe0f045 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 24 Oct 2023 15:56:30 +0200 Subject: [PATCH 34/64] ttl -> use_cache --- openlibrary/core/models.py | 2 +- openlibrary/core/wikidata.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 58c98e38d91..33f441879b3 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -762,7 +762,7 @@ def wikidata(self, use_cache: bool = True) -> WikiDataEntity | None: if use_cache: return get_wikidata_entity(QID=wd_id) else: - return get_wikidata_entity(QID=wd_id, ttl_days=0) + return get_wikidata_entity(QID=wd_id, use_cache=False) return None def __repr__(self): diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 6fe70e4f2c8..c39925f07de 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -14,6 +14,8 @@ import json from openlibrary.core import db +WIKIDATA_CACHE_TTL_DAYS = 30 + @dataclass class WikiDataAPIResponse: @@ -67,14 +69,14 @@ def from_web(cls, response: dict): ) -def get_wikidata_entity(QID: str, ttl_days: int = 30) -> WikiDataEntity | None: +def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikiDataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache """ entity = _get_from_cache(QID) - if entity and days_since(entity.updated) < ttl_days: + if entity and use_cache and days_since(entity.updated) < WIKIDATA_CACHE_TTL_DAYS: return entity else: return _get_from_web(QID) @@ -105,6 +107,9 @@ def _get_from_cache_by_ids(ids: list[str]) -> list[WikiDataEntity]: def _get_from_cache(id: str) -> WikiDataEntity | None: + """ + The cache is OpenLibrary's Postgres instead of calling the Wikidata API + """ if len(result := _get_from_cache_by_ids([id])) > 0: return result[0] return None From 739f374bc3aa6973657ca3f8482201908c3ef3d9 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 24 Oct 2023 18:00:11 +0200 Subject: [PATCH 35/64] rename to APIResponse --- openlibrary/core/wikidata.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index c39925f07de..574dc186b7d 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -18,7 +18,7 @@ @dataclass -class WikiDataAPIResponse: +class APIResponse: type: str labels: dict descriptions: dict @@ -44,7 +44,7 @@ def from_dict(cls, data: dict): @dataclass class WikiDataEntity: id: str - data: WikiDataAPIResponse + data: APIResponse updated: datetime def description(self, language: str = 'en') -> str | None: @@ -55,13 +55,13 @@ def description(self, language: str = 'en') -> str | None: def from_db_query(cls, response: web.utils.Storage): return cls( id=response.id, - data=WikiDataAPIResponse.from_dict(response.data), + data=APIResponse.from_dict(response.data), updated=response.updated, ) @classmethod def from_web(cls, response: dict): - data = WikiDataAPIResponse.from_dict(response) + data = APIResponse.from_dict(response) return cls( id=data.id, data=data, From 45cc3fd3cf4beb30c198751d65202e7e4f9b8bee Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 24 Oct 2023 18:00:50 +0200 Subject: [PATCH 36/64] fix capitalization --- openlibrary/core/models.py | 4 ++-- openlibrary/core/wikidata.py | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 33f441879b3..507c225c109 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -28,7 +28,7 @@ from openlibrary.core.ratings import Ratings from openlibrary.utils import extract_numeric_id_from_olid, dateutil from openlibrary.utils.isbn import to_isbn_13, isbn_13_to_isbn_10, canonical -from openlibrary.core.wikidata import WikiDataEntity, get_wikidata_entity +from openlibrary.core.wikidata import WikidataEntity, get_wikidata_entity from . import cache, waitinglist @@ -757,7 +757,7 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, use_cache: bool = True) -> WikiDataEntity | None: + def wikidata(self, use_cache: bool = True) -> WikidataEntity | None: if wd_id := self.remote_ids.get("wikidata"): if use_cache: return get_wikidata_entity(QID=wd_id) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 574dc186b7d..ddadb249100 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -19,6 +19,11 @@ @dataclass class APIResponse: + """ + This is the model of the api response from WikiData + https://www.wikidata.org/wiki/Wikidata:REST_API + """ + type: str labels: dict descriptions: dict @@ -42,7 +47,7 @@ def from_dict(cls, data: dict): @dataclass -class WikiDataEntity: +class WikidataEntity: id: str data: APIResponse updated: datetime @@ -69,7 +74,7 @@ def from_web(cls, response: dict): ) -def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikiDataEntity | None: +def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache @@ -82,12 +87,12 @@ def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikiDataEntity | No return _get_from_web(QID) -def _get_from_web(id: str) -> WikiDataEntity | None: +def _get_from_web(id: str) -> WikidataEntity | None: response = requests.get( f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' ) if response.status_code == 200: - entity = WikiDataEntity.from_web(response.json()) + entity = WikidataEntity.from_web(response.json()) _add_to_cache(entity) return entity else: @@ -96,17 +101,17 @@ def _get_from_web(id: str) -> WikiDataEntity | None: # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ -def _get_from_cache_by_ids(ids: list[str]) -> list[WikiDataEntity]: +def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataEntity]: response = list( db.get_db().query( 'select * from wikidata where id IN ($ids)', vars={'ids': ids}, ) ) - return [WikiDataEntity.from_db_query(r) for r in response] + return [WikidataEntity.from_db_query(r) for r in response] -def _get_from_cache(id: str) -> WikiDataEntity | None: +def _get_from_cache(id: str) -> WikidataEntity | None: """ The cache is OpenLibrary's Postgres instead of calling the Wikidata API """ @@ -115,7 +120,7 @@ def _get_from_cache(id: str) -> WikiDataEntity | None: return None -def _add_to_cache(entity: WikiDataEntity) -> None: +def _add_to_cache(entity: WikidataEntity) -> None: # TODO: when we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() json_data = json.dumps(dataclasses.asdict(entity.data)) From a846401adfccf093a7376d26c01adb3a51de035f Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 16:38:19 +0200 Subject: [PATCH 37/64] merge wikidata classes --- openlibrary/core/wikidata.py | 74 +++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index ddadb249100..637db70c56b 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -18,61 +18,62 @@ @dataclass -class APIResponse: +class WikidataEntity: """ - This is the model of the api response from WikiData + This is the model of the api response from WikiData plus the updated field https://www.wikidata.org/wiki/Wikidata:REST_API """ + id: str type: str labels: dict descriptions: dict aliases: dict statements: dict sitelinks: dict - id: str - - @classmethod - def from_dict(cls, data: dict): - # use [''] instead of get so this fails if fields are missing - return cls( - type=data['type'], - labels=data['labels'], - descriptions=data['descriptions'], - aliases=data['aliases'], - statements=data['statements'], - sitelinks=data['sitelinks'], - id=data['id'], - ) - - -@dataclass -class WikidataEntity: - id: str - data: APIResponse - updated: datetime + updated: datetime # This is when we fetched the data, not when the entity was changed in Wikidata def description(self, language: str = 'en') -> str | None: """If a description isn't available in the requested language default to English""" - return self.data.descriptions.get(language) or self.data.descriptions.get('en') + return self.descriptions.get(language) or self.descriptions.get('en') @classmethod - def from_db_query(cls, response: web.utils.Storage): + def from_db_query(cls, db_response: web.utils.Storage): + response = db_response.data return cls( - id=response.id, - data=APIResponse.from_dict(response.data), - updated=response.updated, + id=response['id'], + type=response['type'], + labels=response['labels'], + descriptions=response['descriptions'], + aliases=response['aliases'], + statements=response['statements'], + sitelinks=response['sitelinks'], + updated=db_response['updated'], ) @classmethod def from_web(cls, response: dict): - data = APIResponse.from_dict(response) return cls( - id=data.id, - data=data, + id=response['id'], + type=response['type'], + labels=response['labels'], + descriptions=response['descriptions'], + aliases=response['aliases'], + statements=response['statements'], + sitelinks=response['sitelinks'], updated=datetime.now(), ) + def as_api_response_str(self) -> str: + """ + Transforms the dataclass a JSON string like we get from the Wikidata API. + This is used for staring the json in the database. + """ + self_dict = dataclasses.asdict(self) + # remove the updated field because it's not part of the API response and is stored in its own column + self_dict.pop('updated') + return json.dumps(self_dict) + def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | None: """ @@ -121,13 +122,18 @@ def _get_from_cache(id: str) -> WikidataEntity | None: def _add_to_cache(entity: WikidataEntity) -> None: - # TODO: when we upgrade to postgres 9.5+ we should use upsert here + # TODO: after we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() - json_data = json.dumps(dataclasses.asdict(entity.data)) + json_data = entity.as_api_response_str() if _get_from_cache(entity.id): return oldb.update( - "wikidata", where="id=$id", vars={'id': entity.id}, data=json_data + "wikidata", + where="id=$id", + vars={'id': entity.id}, + data=json_data, + updated=datetime.now(), ) else: + # We don't provide the updated column on insert because postgres defaults to the current time return oldb.insert("wikidata", id=entity.id, data=json_data) From ceba66d08aed43734f807efc0b7a6bb0aff8ebfb Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 16:46:31 +0200 Subject: [PATCH 38/64] simplify with one from_dict method --- openlibrary/core/wikidata.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 637db70c56b..f5e92d88539 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -38,8 +38,7 @@ def description(self, language: str = 'en') -> str | None: return self.descriptions.get(language) or self.descriptions.get('en') @classmethod - def from_db_query(cls, db_response: web.utils.Storage): - response = db_response.data + def from_dict(cls, response: dict, updated: datetime): return cls( id=response['id'], type=response['type'], @@ -48,20 +47,7 @@ def from_db_query(cls, db_response: web.utils.Storage): aliases=response['aliases'], statements=response['statements'], sitelinks=response['sitelinks'], - updated=db_response['updated'], - ) - - @classmethod - def from_web(cls, response: dict): - return cls( - id=response['id'], - type=response['type'], - labels=response['labels'], - descriptions=response['descriptions'], - aliases=response['aliases'], - statements=response['statements'], - sitelinks=response['sitelinks'], - updated=datetime.now(), + updated=updated, ) def as_api_response_str(self) -> str: @@ -93,7 +79,9 @@ def _get_from_web(id: str) -> WikidataEntity | None: f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' ) if response.status_code == 200: - entity = WikidataEntity.from_web(response.json()) + entity = WikidataEntity.from_dict( + response=response.json(), updated=datetime.now() + ) _add_to_cache(entity) return entity else: @@ -109,7 +97,9 @@ def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataEntity]: vars={'ids': ids}, ) ) - return [WikidataEntity.from_db_query(r) for r in response] + return [ + WikidataEntity.from_dict(response=r.data, updated=r.updated) for r in response + ] def _get_from_cache(id: str) -> WikidataEntity | None: From 19d179f3dd4407b52fb3cc05c1a2752564aa8491 Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 16:48:39 +0200 Subject: [PATCH 39/64] improve when we call cache --- openlibrary/core/wikidata.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index f5e92d88539..8870f9106b0 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -64,14 +64,14 @@ def as_api_response_str(self) -> str: def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints - ttl (time to live) inspired by the cachetools api https://cachetools.readthedocs.io/en/latest/#cachetools.TTLCache """ - entity = _get_from_cache(QID) - if entity and use_cache and days_since(entity.updated) < WIKIDATA_CACHE_TTL_DAYS: - return entity - else: - return _get_from_web(QID) + if use_cache: + entity = _get_from_postgres_cache(QID) + if entity and days_since(entity.updated) < WIKIDATA_CACHE_TTL_DAYS: + return entity + + return _get_from_web(QID) def _get_from_web(id: str) -> WikidataEntity | None: @@ -102,7 +102,7 @@ def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataEntity]: ] -def _get_from_cache(id: str) -> WikidataEntity | None: +def _get_from_postgres_cache(id: str) -> WikidataEntity | None: """ The cache is OpenLibrary's Postgres instead of calling the Wikidata API """ @@ -116,7 +116,7 @@ def _add_to_cache(entity: WikidataEntity) -> None: oldb = db.get_db() json_data = entity.as_api_response_str() - if _get_from_cache(entity.id): + if _get_from_postgres_cache(entity.id): return oldb.update( "wikidata", where="id=$id", From fb0326c3a4159ae18ed6e32289d7032eb97921ec Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 17:03:10 +0200 Subject: [PATCH 40/64] move endpoint to const --- openlibrary/core/wikidata.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 8870f9106b0..23cef92ab33 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -5,7 +5,6 @@ 3. Make the results easy to access from other files """ import requests -import web import dataclasses from dataclasses import dataclass from openlibrary.core.helpers import days_since @@ -14,6 +13,7 @@ import json from openlibrary.core import db +WIKIDATA_API_URL = 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/' WIKIDATA_CACHE_TTL_DAYS = 30 @@ -55,10 +55,16 @@ def as_api_response_str(self) -> str: Transforms the dataclass a JSON string like we get from the Wikidata API. This is used for staring the json in the database. """ - self_dict = dataclasses.asdict(self) - # remove the updated field because it's not part of the API response and is stored in its own column - self_dict.pop('updated') - return json.dumps(self_dict) + entity_dict = { + 'id': self.id, + 'type': self.type, + 'labels': self.labels, + 'descriptions': self.descriptions, + 'aliases': self.aliases, + 'statements': self.statements, + 'sitelinks': self.sitelinks, + } + return json.dumps(entity_dict) def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | None: @@ -67,7 +73,7 @@ def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | No """ if use_cache: - entity = _get_from_postgres_cache(QID) + entity = _get_from_cache(QID) if entity and days_since(entity.updated) < WIKIDATA_CACHE_TTL_DAYS: return entity @@ -75,9 +81,7 @@ def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | No def _get_from_web(id: str) -> WikidataEntity | None: - response = requests.get( - f'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/{id}' - ) + response = requests.get(f'{WIKIDATA_API_URL}{id}') if response.status_code == 200: entity = WikidataEntity.from_dict( response=response.json(), updated=datetime.now() @@ -102,7 +106,7 @@ def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataEntity]: ] -def _get_from_postgres_cache(id: str) -> WikidataEntity | None: +def _get_from_cache(id: str) -> WikidataEntity | None: """ The cache is OpenLibrary's Postgres instead of calling the Wikidata API """ @@ -116,7 +120,7 @@ def _add_to_cache(entity: WikidataEntity) -> None: oldb = db.get_db() json_data = entity.as_api_response_str() - if _get_from_postgres_cache(entity.id): + if _get_from_cache(entity.id): return oldb.update( "wikidata", where="id=$id", From d970dc54923ca357fee77d9ebbfbb397c72993fd Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 17:07:37 +0200 Subject: [PATCH 41/64] remove unused import --- openlibrary/core/wikidata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 23cef92ab33..40e3b7369a4 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -5,7 +5,6 @@ 3. Make the results easy to access from other files """ import requests -import dataclasses from dataclasses import dataclass from openlibrary.core.helpers import days_since From 299dce425d15cd8f42a938cfb7d1f230e8538009 Mon Sep 17 00:00:00 2001 From: RayBB Date: Thu, 26 Oct 2023 17:09:42 +0200 Subject: [PATCH 42/64] only use datetime.now once --- openlibrary/core/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 40e3b7369a4..7080d0de72c 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -125,7 +125,7 @@ def _add_to_cache(entity: WikidataEntity) -> None: where="id=$id", vars={'id': entity.id}, data=json_data, - updated=datetime.now(), + updated=entity.updated, ) else: # We don't provide the updated column on insert because postgres defaults to the current time From c9cf97f23ee303fd66f2945a6fa0d59491cfbd56 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Mar 2024 01:00:47 +0000 Subject: [PATCH 43/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openlibrary/core/wikidata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 7080d0de72c..129564f6ec6 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -4,6 +4,7 @@ 2. Store the results 3. Make the results easy to access from other files """ + import requests from dataclasses import dataclass from openlibrary.core.helpers import days_since From 3ca35bd4d79442e179c471353dadeeb7f1a14c27 Mon Sep 17 00:00:00 2001 From: Raymond Berger Date: Mon, 25 Mar 2024 16:28:27 +0100 Subject: [PATCH 44/64] https link Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- openlibrary/templates/wikidata_author.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html index c8d76ad701a..0c628f6ca3e 100644 --- a/openlibrary/templates/wikidata_author.html +++ b/openlibrary/templates/wikidata_author.html @@ -5,7 +5,7 @@

$_('Quick Info')

$wd_entity.description(i18n.get_locale())

- $_('Powered by Wikidata') + $_('Powered by Wikidata')
From 3f50100595e5180ddab599a735ca31f34bf9664d Mon Sep 17 00:00:00 2001 From: Raymond Berger Date: Mon, 25 Mar 2024 16:28:56 +0100 Subject: [PATCH 45/64] Update openlibrary/core/models.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- openlibrary/core/models.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 507c225c109..7b5a728d2ed 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -759,10 +759,7 @@ def get_url_suffix(self): def wikidata(self, use_cache: bool = True) -> WikidataEntity | None: if wd_id := self.remote_ids.get("wikidata"): - if use_cache: - return get_wikidata_entity(QID=wd_id) - else: - return get_wikidata_entity(QID=wd_id, use_cache=False) + return get_wikidata_entity(QID=wd_id, use_cache=use_cache) return None def __repr__(self): From f29317a971208005b7083adda41cc437ae2f2292 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sun, 14 Apr 2024 23:31:13 +0200 Subject: [PATCH 46/64] default to using cache --- openlibrary/core/models.py | 4 ++-- openlibrary/core/wikidata.py | 19 ++++++++++++------- openlibrary/templates/type/author/edit.html | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 7b5a728d2ed..912b7918c24 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -757,9 +757,9 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, use_cache: bool = True) -> WikidataEntity | None: + def wikidata(self, bust_cache: bool = False) -> WikidataEntity | None: if wd_id := self.remote_ids.get("wikidata"): - return get_wikidata_entity(QID=wd_id, use_cache=use_cache) + return get_wikidata_entity(QID=wd_id, bust_cache=bust_cache) return None def __repr__(self): diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 129564f6ec6..e527c52f095 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -67,17 +67,22 @@ def as_api_response_str(self) -> str: return json.dumps(entity_dict) -def get_wikidata_entity(QID: str, use_cache: bool = True) -> WikidataEntity | None: +def _cache_expired(entity: WikidataEntity) -> bool: + return days_since(entity.updated) > WIKIDATA_CACHE_TTL_DAYS + + +def get_wikidata_entity(QID: str, bust_cache: bool = False) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints """ + if bust_cache: + _get_from_web(QID) - if use_cache: - entity = _get_from_cache(QID) - if entity and days_since(entity.updated) < WIKIDATA_CACHE_TTL_DAYS: - return entity - - return _get_from_web(QID) + if entity := _get_from_cache(QID): + if _cache_expired(entity): + return _get_from_web(QID) + return entity + return None def _get_from_web(id: str) -> WikidataEntity | None: diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index eef9e5f5bd3..4bf4bb200ab 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -34,7 +34,7 @@

$_("Edit Author")

- $:render_template("wikidata_author", page.wikidata(use_cache=False)) + $:render_template("wikidata_author", page.wikidata(bust_cache=True))
From 9521310098af50971b4aab08082221db270ab6c7 Mon Sep 17 00:00:00 2001 From: RayBB Date: Mon, 15 Apr 2024 00:24:36 +0200 Subject: [PATCH 47/64] remove visual changes --- openlibrary/templates/type/author/edit.html | 2 +- static/css/components/form.olform.less | 11 ----------- static/css/components/wikidatabox.less | 15 --------------- static/css/page-user.less | 2 -- static/images/icons/edit.svg | 2 -- 5 files changed, 1 insertion(+), 31 deletions(-) delete mode 100644 static/css/components/wikidatabox.less delete mode 100644 static/images/icons/edit.svg diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 4bf4bb200ab..d703d44e679 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -21,7 +21,7 @@

$_("Edit Author")

-
+
diff --git a/static/css/components/form.olform.less b/static/css/components/form.olform.less index 1e6d6405195..f87423c3913 100644 --- a/static/css/components/form.olform.less +++ b/static/css/components/form.olform.less @@ -193,10 +193,6 @@ color: @grey; font-family: @lucida_sans_serif-1 !important; } - .nameAndWikidata { - display: flex; - flex-direction: column; - } } .olform__input--large { @@ -264,13 +260,6 @@ } } } - - .nameAndWikidata { - flex-direction: row; - .formElement{ - flex-basis: calc(2 / 3 * 100%); - } - } } /* stylelint-enable selector-max-specificity */ } diff --git a/static/css/components/wikidatabox.less b/static/css/components/wikidatabox.less deleted file mode 100644 index f62a72ce510..00000000000 --- a/static/css/components/wikidatabox.less +++ /dev/null @@ -1,15 +0,0 @@ -.wikidatabox{ - p { - margin-bottom: 0; - } - h3 { - margin-top: 2px; - margin-bottom: 2px; - } - div { - margin: 5px; - } - .powered-by-link{ - font-size: .6em; - } -} diff --git a/static/css/page-user.less b/static/css/page-user.less index 58cb1cd1f0c..0f06bcb795a 100644 --- a/static/css/page-user.less +++ b/static/css/page-user.less @@ -235,5 +235,3 @@ tr.table-row.selected{ // Import styles for want-to-read buttons @import (less) "legacy-tools.less"; @import (less) "components/mybooks-dropper.less"; -// Import styles for wikidatabox -@import (less) "components/wikidatabox.less"; diff --git a/static/images/icons/edit.svg b/static/images/icons/edit.svg deleted file mode 100644 index f287625205f..00000000000 --- a/static/images/icons/edit.svg +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file From d903c8186fb013ebc1650c122a27c500680a0df1 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Mon, 15 Apr 2024 12:31:11 +0000 Subject: [PATCH 48/64] better comment --- openlibrary/core/wikidata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index e527c52f095..793b66738c9 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -74,6 +74,9 @@ def _cache_expired(entity: WikidataEntity) -> bool: def get_wikidata_entity(QID: str, bust_cache: bool = False) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints + By default this will only use the cache (unless it is expired). + This is to avoid overwhelming Wikidata servers with requests from every visit to an author page. + bust_cache must be set to True if you want to fetch new items from Wikidata. """ if bust_cache: _get_from_web(QID) From bab60d73d1eecc90c3c6ffffdcf55535cd5cc719 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Mon, 15 Apr 2024 12:47:07 +0000 Subject: [PATCH 49/64] add fetch_missing --- openlibrary/core/models.py | 8 ++++++-- openlibrary/core/wikidata.py | 8 +++++++- openlibrary/templates/type/author/edit.html | 2 +- openlibrary/templates/type/author/view.html | 3 +-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 912b7918c24..dffdf123f7e 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -757,9 +757,13 @@ def url(self, suffix="", **params): def get_url_suffix(self): return self.name or "unnamed" - def wikidata(self, bust_cache: bool = False) -> WikidataEntity | None: + def wikidata( + self, bust_cache: bool = False, fetch_missing: bool = False + ) -> WikidataEntity | None: if wd_id := self.remote_ids.get("wikidata"): - return get_wikidata_entity(QID=wd_id, bust_cache=bust_cache) + return get_wikidata_entity( + QID=wd_id, bust_cache=bust_cache, fetch_missing=fetch_missing + ) return None def __repr__(self): diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 793b66738c9..2dd87f526bc 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -71,7 +71,9 @@ def _cache_expired(entity: WikidataEntity) -> bool: return days_since(entity.updated) > WIKIDATA_CACHE_TTL_DAYS -def get_wikidata_entity(QID: str, bust_cache: bool = False) -> WikidataEntity | None: +def get_wikidata_entity( + QID: str, bust_cache: bool = False, fetch_missing: bool = False +) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints By default this will only use the cache (unless it is expired). @@ -85,6 +87,10 @@ def get_wikidata_entity(QID: str, bust_cache: bool = False) -> WikidataEntity | if _cache_expired(entity): return _get_from_web(QID) return entity + + if fetch_missing and not entity: + return _get_from_web(QID) + return None diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index d703d44e679..94b425dd920 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -34,7 +34,7 @@

$_("Edit Author")

- $:render_template("wikidata_author", page.wikidata(bust_cache=True)) + $:render_template("wikidata_author", page.wikidata(bust_cache=True, fetch_missing=True))
diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index dc0505ed295..3cd7c13b4e7 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -153,8 +153,7 @@

$:render_template("covers/author_photo", page) $:render_template("covers/change", page, ".bookCover img")

- - $:render_template("wikidata_author", page.wikidata()) + $:render_template("wikidata_author", page.wikidata(fetch_missing=show_librarian_extras)) $def render_subjects(label, subjects, prefix): $if subjects: From fdfbbe6a7a5914e4c0f88f47c10b51f0527fe79e Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Mon, 15 Apr 2024 13:37:50 +0000 Subject: [PATCH 50/64] simplify html --- openlibrary/templates/type/author/edit.html | 2 +- openlibrary/templates/type/author/view.html | 2 +- openlibrary/templates/wikidata_author.html | 12 ------------ 3 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 openlibrary/templates/wikidata_author.html diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 94b425dd920..05f12d90bc3 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -34,7 +34,7 @@

$_("Edit Author")

- $:render_template("wikidata_author", page.wikidata(bust_cache=True, fetch_missing=True)) +

$page.wikidata(bust_cache=True, fetch_missing=True).description(i18n.get_locale())

diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index 3cd7c13b4e7..dea32aae3e8 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -153,7 +153,7 @@

$:render_template("covers/author_photo", page) $:render_template("covers/change", page, ".bookCover img")

- $:render_template("wikidata_author", page.wikidata(fetch_missing=show_librarian_extras)) +

$page.wikidata(fetch_missing=show_librarian_extras).description(i18n.get_locale())

$def render_subjects(label, subjects, prefix): $if subjects: diff --git a/openlibrary/templates/wikidata_author.html b/openlibrary/templates/wikidata_author.html deleted file mode 100644 index 0c628f6ca3e..00000000000 --- a/openlibrary/templates/wikidata_author.html +++ /dev/null @@ -1,12 +0,0 @@ -$def with (wd_entity) - -$if wd_entity: -
-

$_('Quick Info')

-
-

$wd_entity.description(i18n.get_locale())

- $_('Powered by Wikidata') - - -
-
From d9f50c9076e6c574a1220286bc1bfc1ec4b8a8fe Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Mon, 15 Apr 2024 13:51:43 +0000 Subject: [PATCH 51/64] text align center p tags --- openlibrary/templates/type/author/edit.html | 4 +++- openlibrary/templates/type/author/view.html | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 05f12d90bc3..109a51ad5b8 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -34,7 +34,9 @@

$_("Edit Author")

-

$page.wikidata(bust_cache=True, fetch_missing=True).description(i18n.get_locale())

+

+ $page.wikidata(bust_cache=True, fetch_missing=True).description(i18n.get_locale()) +

diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index dea32aae3e8..af4aab07f69 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -153,7 +153,9 @@

$:render_template("covers/author_photo", page) $:render_template("covers/change", page, ".bookCover img")

-

$page.wikidata(fetch_missing=show_librarian_extras).description(i18n.get_locale())

+

+ $page.wikidata(fetch_missing=show_librarian_extras).description(i18n.get_locale()) +

$def render_subjects(label, subjects, prefix): $if subjects: From 03e6d28a6f8db5188ddfd1fa29da223141c8e7ec Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 16 Apr 2024 00:12:49 +0200 Subject: [PATCH 52/64] lowercase qid --- openlibrary/core/models.py | 2 +- openlibrary/core/wikidata.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index dffdf123f7e..b1b904125dd 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -762,7 +762,7 @@ def wikidata( ) -> WikidataEntity | None: if wd_id := self.remote_ids.get("wikidata"): return get_wikidata_entity( - QID=wd_id, bust_cache=bust_cache, fetch_missing=fetch_missing + qid=wd_id, bust_cache=bust_cache, fetch_missing=fetch_missing ) return None diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 2dd87f526bc..1bed82bfbd2 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -72,7 +72,7 @@ def _cache_expired(entity: WikidataEntity) -> bool: def get_wikidata_entity( - QID: str, bust_cache: bool = False, fetch_missing: bool = False + qid: str, bust_cache: bool = False, fetch_missing: bool = False ) -> WikidataEntity | None: """ This only supports QIDs, if we want to support PIDs we need to use different endpoints @@ -81,15 +81,15 @@ def get_wikidata_entity( bust_cache must be set to True if you want to fetch new items from Wikidata. """ if bust_cache: - _get_from_web(QID) + _get_from_web(qid) - if entity := _get_from_cache(QID): + if entity := _get_from_cache(qid): if _cache_expired(entity): - return _get_from_web(QID) + return _get_from_web(qid) return entity if fetch_missing and not entity: - return _get_from_web(QID) + return _get_from_web(qid) return None From 3a543bbc938dd19b8e424d670f65fe81f83cfddf Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 16 Apr 2024 00:19:30 +0200 Subject: [PATCH 53/64] add typehints --- openlibrary/core/wikidata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 1bed82bfbd2..83b44f33e25 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -26,11 +26,11 @@ class WikidataEntity: id: str type: str - labels: dict - descriptions: dict - aliases: dict - statements: dict - sitelinks: dict + labels: dict[str, str] + descriptions: dict[str, str] + aliases: dict[str, list[str]] + statements: dict[str, dict] + sitelinks: dict[str, dict] updated: datetime # This is when we fetched the data, not when the entity was changed in Wikidata def description(self, language: str = 'en') -> str | None: From 5679a67bfc2b66dada7d7132fddd9752562504d1 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 16 Apr 2024 00:20:29 +0200 Subject: [PATCH 54/64] _updated --- openlibrary/core/wikidata.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 83b44f33e25..7c93a42ea92 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -31,7 +31,7 @@ class WikidataEntity: aliases: dict[str, list[str]] statements: dict[str, dict] sitelinks: dict[str, dict] - updated: datetime # This is when we fetched the data, not when the entity was changed in Wikidata + _updated: datetime # This is when we fetched the data, not when the entity was changed in Wikidata def description(self, language: str = 'en') -> str | None: """If a description isn't available in the requested language default to English""" @@ -47,7 +47,7 @@ def from_dict(cls, response: dict, updated: datetime): aliases=response['aliases'], statements=response['statements'], sitelinks=response['sitelinks'], - updated=updated, + _updated=updated, ) def as_api_response_str(self) -> str: @@ -68,7 +68,7 @@ def as_api_response_str(self) -> str: def _cache_expired(entity: WikidataEntity) -> bool: - return days_since(entity.updated) > WIKIDATA_CACHE_TTL_DAYS + return days_since(entity._updated) > WIKIDATA_CACHE_TTL_DAYS def get_wikidata_entity( @@ -140,7 +140,7 @@ def _add_to_cache(entity: WikidataEntity) -> None: where="id=$id", vars={'id': entity.id}, data=json_data, - updated=entity.updated, + updated=entity._updated, ) else: # We don't provide the updated column on insert because postgres defaults to the current time From 31669508c201edab9f1c460fc0ded0c9ebdac3e4 Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 16 Apr 2024 00:22:27 +0200 Subject: [PATCH 55/64] get_description --- openlibrary/core/wikidata.py | 2 +- openlibrary/templates/type/author/edit.html | 2 +- openlibrary/templates/type/author/view.html | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 7c93a42ea92..f482d46fdbe 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -33,7 +33,7 @@ class WikidataEntity: sitelinks: dict[str, dict] _updated: datetime # This is when we fetched the data, not when the entity was changed in Wikidata - def description(self, language: str = 'en') -> str | None: + def get_description(self, language: str = 'en') -> str | None: """If a description isn't available in the requested language default to English""" return self.descriptions.get(language) or self.descriptions.get('en') diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index 109a51ad5b8..b79aee02f3e 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -35,7 +35,7 @@

$_("Edit Author")

- $page.wikidata(bust_cache=True, fetch_missing=True).description(i18n.get_locale()) + $page.wikidata(bust_cache=True, fetch_missing=True).get_description(i18n.get_locale())

diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index af4aab07f69..b4eae437fe1 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -154,7 +154,7 @@

$:render_template("covers/change", page, ".bookCover img")

- $page.wikidata(fetch_missing=show_librarian_extras).description(i18n.get_locale()) + $page.wikidata(fetch_missing=show_librarian_extras).get_description(i18n.get_locale())

$def render_subjects(label, subjects, prefix): From 195b0402333369ad6671f30be62078ecd1c0014c Mon Sep 17 00:00:00 2001 From: RayBB Date: Tue, 16 Apr 2024 00:27:07 +0200 Subject: [PATCH 56/64] delete empty code.py --- openlibrary/plugins/wikidata/code.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 openlibrary/plugins/wikidata/code.py diff --git a/openlibrary/plugins/wikidata/code.py b/openlibrary/plugins/wikidata/code.py deleted file mode 100644 index e69de29bb2d..00000000000 From 1683f485dc3ff5757f458068d2f064527ef5e63f Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:03:50 +0000 Subject: [PATCH 57/64] **response --- openlibrary/core/wikidata.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index f482d46fdbe..447a53e8161 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -40,13 +40,7 @@ def get_description(self, language: str = 'en') -> str | None: @classmethod def from_dict(cls, response: dict, updated: datetime): return cls( - id=response['id'], - type=response['type'], - labels=response['labels'], - descriptions=response['descriptions'], - aliases=response['aliases'], - statements=response['statements'], - sitelinks=response['sitelinks'], + **response, _updated=updated, ) From 78a6cf28282837ecfb71132936b8652dcb8f6b58 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:27:27 +0000 Subject: [PATCH 58/64] handle no wikidata case --- openlibrary/templates/type/author/edit.html | 4 +++- openlibrary/templates/type/author/view.html | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index b79aee02f3e..e52deb4719b 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -35,7 +35,9 @@

$_("Edit Author")

- $page.wikidata(bust_cache=True, fetch_missing=True).get_description(i18n.get_locale()) + $ wikidata = page.wikidata(bust_cache=True, fetch_missing=True) + $if wikidata: + $wikidata.get_description(i18n.get_locale())

diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index b4eae437fe1..461a437191c 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -154,7 +154,9 @@

$:render_template("covers/change", page, ".bookCover img")

- $page.wikidata(fetch_missing=show_librarian_extras).get_description(i18n.get_locale()) + $ wikidata = page.wikidata(fetch_missing=show_librarian_extras) + $if wikidata: + $wikidata.get_description(i18n.get_locale())

$def render_subjects(label, subjects, prefix): From 751899ef1fc76051e08f2dec1c8321a51f090fac Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:33:23 +0000 Subject: [PATCH 59/64] add error logging --- openlibrary/core/wikidata.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index 447a53e8161..b0bd7895efb 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -6,6 +6,7 @@ """ import requests +import logging from dataclasses import dataclass from openlibrary.core.helpers import days_since @@ -13,6 +14,8 @@ import json from openlibrary.core import db +logger = logging.getLogger("core.wikidata") + WIKIDATA_API_URL = 'https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/' WIKIDATA_CACHE_TTL_DAYS = 30 @@ -97,9 +100,9 @@ def _get_from_web(id: str) -> WikidataEntity | None: _add_to_cache(entity) return entity else: + logger.error(f'Wikidata Response: {response.status_code}, id: {id}') return None - # TODO: What should we do in non-200 cases? - # They're documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ + # Responses documented here https://doc.wikimedia.org/Wikibase/master/js/rest-api/ def _get_from_cache_by_ids(ids: list[str]) -> list[WikidataEntity]: From e34c6a562a951c49ff3112077ccc443a78c6819c Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:35:51 +0000 Subject: [PATCH 60/64] simplify if --- openlibrary/core/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index b0bd7895efb..ad59cce66a0 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -121,7 +121,7 @@ def _get_from_cache(id: str) -> WikidataEntity | None: """ The cache is OpenLibrary's Postgres instead of calling the Wikidata API """ - if len(result := _get_from_cache_by_ids([id])) > 0: + if result := _get_from_cache_by_ids([id]): return result[0] return None From be8656372a1597f0ae1c2c3d092fd33f029df0ee Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:40:15 +0000 Subject: [PATCH 61/64] typo --- openlibrary/core/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index ad59cce66a0..f73b81ab6c9 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -50,7 +50,7 @@ def from_dict(cls, response: dict, updated: datetime): def as_api_response_str(self) -> str: """ Transforms the dataclass a JSON string like we get from the Wikidata API. - This is used for staring the json in the database. + This is used for storing the json in the database. """ entity_dict = { 'id': self.id, From 3fd3620cabd63edd9c961896aa1c0f726aeb35f1 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:45:42 +0000 Subject: [PATCH 62/64] to_wikidata_api_json_format --- openlibrary/core/wikidata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openlibrary/core/wikidata.py b/openlibrary/core/wikidata.py index f73b81ab6c9..61812f4ec50 100644 --- a/openlibrary/core/wikidata.py +++ b/openlibrary/core/wikidata.py @@ -47,7 +47,7 @@ def from_dict(cls, response: dict, updated: datetime): _updated=updated, ) - def as_api_response_str(self) -> str: + def to_wikidata_api_json_format(self) -> str: """ Transforms the dataclass a JSON string like we get from the Wikidata API. This is used for storing the json in the database. @@ -76,6 +76,7 @@ def get_wikidata_entity( By default this will only use the cache (unless it is expired). This is to avoid overwhelming Wikidata servers with requests from every visit to an author page. bust_cache must be set to True if you want to fetch new items from Wikidata. + # TODO: After bulk data imports we should set fetch_missing to true (or remove it). """ if bust_cache: _get_from_web(qid) @@ -129,7 +130,7 @@ def _get_from_cache(id: str) -> WikidataEntity | None: def _add_to_cache(entity: WikidataEntity) -> None: # TODO: after we upgrade to postgres 9.5+ we should use upsert here oldb = db.get_db() - json_data = entity.as_api_response_str() + json_data = entity.to_wikidata_api_json_format() if _get_from_cache(entity.id): return oldb.update( From 2662f2b38f635bd0d0535d859c1ecbbac644fa63 Mon Sep 17 00:00:00 2001 From: Ray Berger Date: Tue, 16 Apr 2024 15:53:41 +0000 Subject: [PATCH 63/64] lower wikidata section for testing --- openlibrary/templates/type/author/view.html | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/openlibrary/templates/type/author/view.html b/openlibrary/templates/type/author/view.html index 461a437191c..94a2985883c 100644 --- a/openlibrary/templates/type/author/view.html +++ b/openlibrary/templates/type/author/view.html @@ -153,11 +153,6 @@

$:render_template("covers/author_photo", page) $:render_template("covers/change", page, ".bookCover img") -

- $ wikidata = page.wikidata(fetch_missing=show_librarian_extras) - $if wikidata: - $wikidata.get_description(i18n.get_locale()) -

$def render_subjects(label, subjects, prefix): $if subjects: @@ -175,6 +170,16 @@

$label
$:render_subjects(_("Time"), books.facet_counts.get('time_facet'), 'time:') +
+
TESTING ONLY WIKIDATA SECTION
+

+ + $ wikidata = page.wikidata(fetch_missing=show_librarian_extras) + $if wikidata: + $wikidata.get_description(i18n.get_locale()) +

+
+ $if "lists" in ctx.features:
$:render_template("lists/widget", page, include_rating=False, exclude_own_lists=True, show_active_lists=True) From 37f34d3df0043ca32f7875599562f18d003b2885 Mon Sep 17 00:00:00 2001 From: RayBB Date: Sun, 21 Apr 2024 01:56:16 +0200 Subject: [PATCH 64/64] restore extra line --- openlibrary/templates/type/author/edit.html | 1 + 1 file changed, 1 insertion(+) diff --git a/openlibrary/templates/type/author/edit.html b/openlibrary/templates/type/author/edit.html index e52deb4719b..efc7adb808d 100644 --- a/openlibrary/templates/type/author/edit.html +++ b/openlibrary/templates/type/author/edit.html @@ -21,6 +21,7 @@

$_("Edit Author")

+