Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor lyrics tests, do not search for empty metadata #5452

Merged
merged 14 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ jobs:
sudo apt update
sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository1.0-dev pandoc

- name: Get changed lyrics files
id: lyrics-update
uses: tj-actions/changed-files@v45
with:
files: |
beetsplug/lyrics.py
test/plugins/test_lyrics.py
snejus marked this conversation as resolved.
Show resolved Hide resolved

- name: Add pytest annotator
uses: liskin/gh-problem-matcher-wrap@v3
with:
Expand All @@ -44,13 +52,15 @@ jobs:
- if: ${{ env.IS_MAIN_PYTHON != 'true' }}
name: Test without coverage
run: |
poetry install --extras=autobpm
poetry install --extras=autobpm --extras=lyrics
poe test

- if: ${{ env.IS_MAIN_PYTHON == 'true' }}
name: Test with coverage
env:
LYRICS_UPDATED: ${{ steps.lyrics-update.outputs.any_changed }}
run: |
poetry install --extras=autobpm --extras=docs --extras=replaygain --extras=reflink
poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink
poe docs
poe test-with-coverage

Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ to get a basic view on how tests are written. Since we are currently migrating
the tests from `unittest`_ to `pytest`_, new tests should be written using
`pytest`_. Contributions migrating existing tests are welcome!

External API requests under test should be mocked with `requests_mock`_,
External API requests under test should be mocked with `requests-mock`_,
However, we still want to know whether external APIs are up and that they
return expected responses, therefore we test them weekly with our `integration
test`_ suite.
Expand Down
106 changes: 58 additions & 48 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@
import unicodedata
import warnings
from functools import partial
from typing import ClassVar
from typing import TYPE_CHECKING, ClassVar
from urllib.parse import quote, urlencode

import requests
from unidecode import unidecode

import beets
from beets import plugins, ui

if TYPE_CHECKING:
from beets.importer import ImportTask
from beets.library import Item

try:
import bs4
from bs4 import SoupStrainer
Expand All @@ -47,10 +54,6 @@
except ImportError:
HAS_LANGDETECT = False


import beets
from beets import plugins, ui

DIV_RE = re.compile(r"<(/?)div>?", re.I)
COMMENT_RE = re.compile(r"<!--.*-->", re.S)
TAG_RE = re.compile(r"<[^>]*>")
Expand Down Expand Up @@ -152,7 +155,13 @@ def generate_alternatives(string, patterns):
alternatives.append(match.group(1))
return alternatives

title, artist, artist_sort = item.title, item.artist, item.artist_sort
title, artist, artist_sort = (
item.title.strip(),
item.artist.strip(),
item.artist_sort.strip(),
)
if not title or not artist:
return ()

patterns = [
# Remove any featuring artists from the artists name
Expand All @@ -161,7 +170,7 @@ def generate_alternatives(string, patterns):
artists = generate_alternatives(artist, patterns)
# Use the artist_sort as fallback only if it differs from artist to avoid
# repeated remote requests with the same search terms
if artist != artist_sort:
if artist_sort and artist.lower() != artist_sort.lower():
artists.append(artist_sort)

patterns = [
Expand Down Expand Up @@ -222,7 +231,7 @@ def __init__(self, config, log):
self._log = log
self.config = config

def fetch_url(self, url):
def fetch_url(self, url, **kwargs):
"""Retrieve the content at a given URL, or return None if the source
is unreachable.
"""
Expand All @@ -240,6 +249,7 @@ def fetch_url(self, url):
"User-Agent": USER_AGENT,
},
timeout=10,
**kwargs,
)
except requests.RequestException as exc:
self._log.debug("lyrics request failed: {0}", exc)
Expand All @@ -250,20 +260,27 @@ def fetch_url(self, url):
self._log.debug("failed to fetch: {0} ({1})", url, r.status_code)
return None

def fetch(self, artist, title, album=None, length=None):
raise NotImplementedError()
def fetch(
self, artist: str, title: str, album: str, length: int
) -> str | None:
raise NotImplementedError


class LRCLib(Backend):
base_url = "https://lrclib.net/api/get"

def fetch(self, artist, title, album=None, length=None):
params = {
def fetch(
self, artist: str, title: str, album: str, length: int
) -> str | None:
params: dict[str, str | int] = {
"artist_name": artist,
"track_name": title,
"album_name": album,
"duration": length,
}
if album:
params["album_name"] = album

if length:
params["duration"] = length

try:
response = requests.get(
Expand Down Expand Up @@ -316,7 +333,7 @@ def encode(cls, text: str) -> str:

return quote(unidecode(text))

def fetch(self, artist, title, album=None, length=None):
def fetch(self, artist: str, title: str, *_) -> str | None:
url = self.build_url(artist, title)

html = self.fetch_url(url)
Expand Down Expand Up @@ -364,7 +381,7 @@ def __init__(self, config, log):
"User-Agent": USER_AGENT,
}

def fetch(self, artist, title, album=None, length=None):
def fetch(self, artist: str, title: str, *_) -> str | None:
"""Fetch lyrics from genius.com

Because genius doesn't allow accessing lyrics via the api,
Expand Down Expand Up @@ -495,7 +512,7 @@ class Tekstowo(DirectBackend):
def encode(cls, text: str) -> str:
return cls.non_alpha_to_underscore(unidecode(text.lower()))

def fetch(self, artist, title, album=None, length=None):
def fetch(self, artist: str, title: str, *_) -> str | None:
if html := self.fetch_url(self.build_url(artist, title)):
return self.extract_lyrics(html)

Expand Down Expand Up @@ -536,6 +553,8 @@ def _scrape_strip_cruft(html, plain_text_out=False):
html = BREAK_RE.sub("\n", html) # <br> eats up surrounding '\n'.
html = re.sub(r"(?s)<(script).*?</\1>", "", html) # Strip script tags.
html = re.sub("\u2005", " ", html) # replace unicode with regular space
html = re.sub("<aside .+?</aside>", "", html) # remove Google Ads tags
html = re.sub(r"</?(em|strong)[^>]*>", "", html) # remove italics / bold

if plain_text_out: # Strip remaining HTML tags
html = COMMENT_RE.sub("", html)
Expand Down Expand Up @@ -586,11 +605,7 @@ class Google(Backend):
"""Fetch lyrics from Google search results."""

REQUIRES_BS = True

def __init__(self, config, log):
super().__init__(config, log)
self.api_key = config["google_API_key"].as_str()
self.engine_id = config["google_engine_ID"].as_str()
SEARCH_URL = "https://www.googleapis.com/customsearch/v1"

def is_lyrics(self, text, artist=None):
"""Determine whether the text seems to be valid lyrics."""
Expand Down Expand Up @@ -667,15 +682,14 @@ def is_page_candidate(self, url_link, url_title, title, artist):
ratio = difflib.SequenceMatcher(None, song_title, title).ratio()
return ratio >= typo_ratio

def fetch(self, artist, title, album=None, length=None):
query = f"{artist} {title}"
url = "https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s" % (
self.api_key,
self.engine_id,
quote(query.encode("utf-8")),
)
def fetch(self, artist: str, title: str, *_) -> str | None:
params = {
"key": self.config["google_API_key"].as_str(),
"cx": self.config["google_engine_ID"].as_str(),
"q": f"{artist} {title}",
}

data = self.fetch_url(url)
data = self.fetch_url(self.SEARCH_URL, params=params)
if not data:
self._log.debug("google backend returned no data")
return None
Expand Down Expand Up @@ -877,10 +891,7 @@ def func(lib, opts, args):
for item in items:
if not opts.local_only and not self.config["local"]:
self.fetch_item_lyrics(
lib,
item,
write,
opts.force_refetch or self.config["force"],
item, write, opts.force_refetch or self.config["force"]
)
if item.lyrics:
if opts.printlyr:
Expand Down Expand Up @@ -966,15 +977,13 @@ def writerest_indexes(self, directory):
with open(conffile, "w") as output:
output.write(REST_CONF_TEMPLATE)

def imported(self, session, task):
def imported(self, _, task: ImportTask) -> None:
"""Import hook for fetching lyrics automatically."""
if self.config["auto"]:
for item in task.imported_items():
self.fetch_item_lyrics(
session.lib, item, False, self.config["force"]
)
self.fetch_item_lyrics(item, False, self.config["force"])

def fetch_item_lyrics(self, lib, item, write, force):
def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
"""Fetch and store lyrics for a single item. If ``write``, then the
lyrics will also be written to the file itself.
"""
Expand All @@ -983,18 +992,17 @@ def fetch_item_lyrics(self, lib, item, write, force):
self._log.info("lyrics already present: {0}", item)
return

lyrics = None
album = item.album
length = round(item.length)
lyrics_matches = []
album, length = item.album, round(item.length)
for artist, titles in search_pairs(item):
lyrics = [
self.get_lyrics(artist, title, album=album, length=length)
lyrics_matches = [
self.get_lyrics(artist, title, album, length)
for title in titles
]
if any(lyrics):
if any(lyrics_matches):
break

lyrics = "\n\n---\n\n".join(filter(None, lyrics))
lyrics = "\n\n---\n\n".join(filter(None, lyrics_matches))

if lyrics:
self._log.info("fetched lyrics: {0}", item)
Expand All @@ -1019,18 +1027,20 @@ def fetch_item_lyrics(self, lib, item, write, force):
item.try_write()
item.store()

def get_lyrics(self, artist, title, album=None, length=None):
def get_lyrics(self, artist: str, title: str, *args) -> str | None:
"""Fetch lyrics, trying each source in turn. Return a string or
None if no lyrics were found.
"""
for backend in self.backends:
lyrics = backend.fetch(artist, title, album=album, length=length)
lyrics = backend.fetch(artist, title, *args)
if lyrics:
self._log.debug(
"got lyrics from backend: {0}", backend.__class__.__name__
)
return _scrape_strip_cruft(lyrics, True)

return None

def append_translation(self, text, to_lang):
from xml.etree import ElementTree

Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ Bug fixes:
* Fix ambiguous column name ``sqlite3.OperationalError`` that occured in album
queries that filtered album track titles, for example ``beet list -a keyword
title:foo``.
* :doc:`plugins/lyrics`: Rewrite lyrics tests using pytest to provide isolated
configuration for each test case. This fixes the issue where some tests
failed because they read developers' local lyrics configuration.
:bug:`5133`
* :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the
artist or title is missing and ignore ``artist_sort`` value if it is empty.
:bug:`2635`

For packagers:

Expand Down
21 changes: 19 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ python3-discogs-client = ">=2.3.15"
py7zr = "*"
pyxdg = "*"
rarfile = "*"
requests-mock = ">=1.12.1"
requests_oauthlib = "*"
responses = ">=0.3.0"

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ addopts =
-ra
--strict-config
markers =
on_lyrics_update: mark a test to run only after lyrics source code is updated
integration_test: mark a test as an integration test

[coverage:run]
Expand Down
23 changes: 17 additions & 6 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,21 @@
import pytest


def pytest_runtest_setup(item: pytest.Item):
"""Skip integration tests if INTEGRATION_TEST environment variable is not set."""
if os.environ.get("INTEGRATION_TEST"):
return
def skip_marked_items(items: list[pytest.Item], marker_name: str, reason: str):
for item in (i for i in items if i.get_closest_marker(marker_name)):
test_name = item.nodeid.split("::", 1)[-1]
item.add_marker(pytest.mark.skip(f"{reason}: {test_name}"))

if next(item.iter_markers(name="integration_test"), None):
pytest.skip(f"INTEGRATION_TEST=1 required: {item.nodeid}")

def pytest_collection_modifyitems(
config: pytest.Config, items: list[pytest.Item]
):
if not os.environ.get("INTEGRATION_TEST") == "true":
skip_marked_items(
items, "integration_test", "INTEGRATION_TEST=1 required"
)

if not os.environ.get("LYRICS_UPDATED") == "true":
skip_marked_items(
items, "on_lyrics_update", "No change in lyrics source code"
)
Loading
Loading