Skip to content

Commit

Permalink
Bring back Tekstowo search
Browse files Browse the repository at this point in the history
It was my mistake to remove search earlier - I found that in many cases
it works fine.
  • Loading branch information
snejus committed Jan 20, 2025
1 parent 02c9c84 commit 86f2c0b
Showing 1 changed file with 21 additions and 33 deletions.
54 changes: 21 additions & 33 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from functools import cached_property, partial, total_ordering
from html import unescape
from http import HTTPStatus
from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator, NamedTuple
from urllib.parse import quote, urlencode, urlparse
from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple
from urllib.parse import quote, quote_plus, urlencode, urlparse

import langdetect
import requests
Expand Down Expand Up @@ -396,22 +396,7 @@ def fetch(
return None


class DirectBackend(Backend):
"""A backend for fetching lyrics directly."""

URL_TEMPLATE: ClassVar[str] #: May include formatting placeholders

@classmethod
def encode(cls, text: str) -> str:
"""Encode the string for inclusion in a URL."""
raise NotImplementedError

@classmethod
def build_url(cls, *args: str) -> str:
return cls.URL_TEMPLATE.format(*map(cls.encode, args))


class MusiXmatch(DirectBackend):
class MusiXmatch(Backend):
URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}"

REPLACEMENTS = {
Expand All @@ -430,6 +415,10 @@ def encode(cls, text: str) -> str:

return quote(unidecode(text))

@classmethod
def build_url(cls, *args: str) -> str:
return cls.URL_TEMPLATE.format(*map(cls.encode, args))

def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
url = self.build_url(artist, title)

Expand Down Expand Up @@ -608,26 +597,25 @@ def scrape(cls, html: str) -> str | None:
return None


class Tekstowo(SoupMixin, DirectBackend):
class Tekstowo(SearchBackend):
"""Fetch lyrics from Tekstowo.pl."""

URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html"
BASE_URL = "https://www.tekstowo.pl"
SEARCH_URL = BASE_URL + "/szukaj,{}.html"

non_alpha_to_underscore = partial(re.compile(r"\W").sub, "_")

@classmethod
def encode(cls, text: str) -> str:
return cls.non_alpha_to_underscore(unidecode(text.lower()))
def build_url(self, artist, title):
artistitle = f"{artist.title()} {title.title()}"

def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
url = self.build_url(artist, title)
# We are expecting to receive a 404 since we are guessing the URL.
# Thus suppress the error so that it does not end up in the logs.
with suppress(NotFoundError):
if lyrics := self.scrape(self.fetch_text(url)):
return lyrics, url
return self.SEARCH_URL.format(quote_plus(unidecode(artistitle)))

return None
def search(self, artist: str, title: str) -> Iterable[SearchResult]:
if html := self.fetch_text(self.build_url(title, artist)):
soup = self.get_soup(html)
for tag in soup.select("div[class=flex-group] > a[title*=' - ']"):
artist, title = str(tag["title"]).split(" - ", 1)
yield SearchResult(
artist, title, f"{self.BASE_URL}{tag['href']}"
)

return None

Expand Down

0 comments on commit 86f2c0b

Please sign in to comment.