diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 7459f64b60..deec600b87 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -27,8 +27,8 @@ from functools import cached_property, partial, total_ordering from html import unescape from http import HTTPStatus -from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator, NamedTuple -from urllib.parse import quote, urlencode, urlparse +from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple +from urllib.parse import quote, quote_plus, urlencode, urlparse import langdetect import requests @@ -399,22 +399,7 @@ def fetch( return None -class DirectBackend(Backend): - """A backend for fetching lyrics directly.""" - - URL_TEMPLATE: ClassVar[str] #: May include formatting placeholders - - @classmethod - def encode(cls, text: str) -> str: - """Encode the string for inclusion in a URL.""" - raise NotImplementedError - - @classmethod - def build_url(cls, *args: str) -> str: - return cls.URL_TEMPLATE.format(*map(cls.encode, args)) - - -class MusiXmatch(DirectBackend): +class MusiXmatch(Backend): URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}" REPLACEMENTS = { @@ -433,6 +418,10 @@ def encode(cls, text: str) -> str: return quote(unidecode(text)) + @classmethod + def build_url(cls, *args: str) -> str: + return cls.URL_TEMPLATE.format(*map(cls.encode, args)) + def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None: url = self.build_url(artist, title) @@ -611,26 +600,25 @@ def scrape(cls, html: str) -> str | None: return None -class Tekstowo(SoupMixin, DirectBackend): +class Tekstowo(SearchBackend): """Fetch lyrics from Tekstowo.pl.""" - URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html" + BASE_URL = "https://www.tekstowo.pl" + SEARCH_URL = BASE_URL + "/szukaj,{}.html" - non_alpha_to_underscore = partial(re.compile(r"\W").sub, "_") - - @classmethod - def encode(cls, text: str) -> str: - return cls.non_alpha_to_underscore(unidecode(text.lower())) + def build_url(self, artist, title): + artistitle = f"{artist.title()} {title.title()}" - def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None: - url = self.build_url(artist, title) - # We are expecting to receive a 404 since we are guessing the URL. - # Thus suppress the error so that it does not end up in the logs. - with suppress(NotFoundError): - if lyrics := self.scrape(self.fetch_text(url)): - return lyrics, url + return self.SEARCH_URL.format(quote_plus(unidecode(artistitle))) - return None + def search(self, artist: str, title: str) -> Iterable[SearchResult]: + if html := self.fetch_text(self.build_url(title, artist)): + soup = self.get_soup(html) + for tag in soup.select("div[class=flex-group] > a[title*=' - ']"): + artist, title = str(tag["title"]).split(" - ", 1) + yield SearchResult( + artist, title, f"{self.BASE_URL}{tag['href']}" + ) return None