diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index ad1df7bbe8..8d95909488 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -25,6 +25,8 @@ from __future__ import annotations import os +import re +from collections import defaultdict from functools import singledispatchmethod from pathlib import Path from typing import TYPE_CHECKING, Any @@ -33,7 +35,9 @@ from beets import config, library, plugins, ui from beets.library import Album, Item +from beets.ui import UserError from beets.util import plurality, unique_list +from beetsplug.lastgenre.utils import is_ignored from .client import LastFmClient @@ -44,6 +48,8 @@ from beets.importer import ImportSession, ImportTask from beets.library import LibModel + from .utils import Ignorelist + Whitelist = set[str] """Set of valid genre names (lowercase). Empty set means all genres allowed.""" @@ -51,6 +57,9 @@ """Genre hierarchy as list of paths from general to specific. Example: [['electronic', 'house'], ['electronic', 'techno']]""" + RawIgnorelist = dict[str, list[str]] + """Mapping of artist name to list of raw regex/string patterns.""" + # Canonicalization tree processing. @@ -130,6 +139,7 @@ def __init__(self) -> None: "prefer_specific": False, "title_case": True, "pretend": False, + "ignorelist": False, } ) self.setup() @@ -139,12 +149,13 @@ def setup(self) -> None: if self.config["auto"]: self.import_stages = [self.imported] - self.client = LastFmClient( - self._log, self.config["min_weight"].get(int) - ) self.whitelist: Whitelist = self._load_whitelist() self.c14n_branches: CanonTree self.c14n_branches, self.canonicalize = self._load_c14n_tree() + self.ignorelist: Ignorelist = self._load_ignorelist() + self.client = LastFmClient( + self._log, self.config["min_weight"].get(int), self.ignorelist + ) def _load_whitelist(self) -> Whitelist: """Load the whitelist from a text file. @@ -187,6 +198,105 @@ def _load_c14n_tree(self) -> tuple[CanonTree, bool]: flatten_tree(genres_tree, [], c14n_branches) return c14n_branches, canonicalize + def _load_ignorelist(self) -> Ignorelist: + """Load the genre ignorelist from a configured file path. + + For maximum compatibility with regex patterns, a custom format is used: + - Each section starts with an artist name, followed by a colon. + - Subsequent lines are indented (at least one space, typically 4 spaces) + and contain a regex pattern to match a genre or a literal genre name. + - A '*' key for artist can be used to specify global forbidden genres. + + Returns a compiled ignorelist dictionary mapping artist names to lists of + case-insensitive regex patterns. Returns empty dict if not configured. + + Example ignorelist file format:: + + Artist Name: + .*rock.* + .*metal.* + Another Artist Name: + ^jazz$ + *: + spoken word + comedy + + Raises: + UserError: if the ignorelist file cannot be read or if the file + format is invalid. + """ + ignorelist_raw: RawIgnorelist = defaultdict(list) + ignorelist_file = self.config["ignorelist"].get() + if not ignorelist_file: + return {} + if not isinstance(ignorelist_file, str): + raise UserError( + "Invalid value for lastgenre.ignorelist: expected path string " + f"or 'no', got {ignorelist_file!r}" + ) + + self._log.debug("Loading ignorelist file {}", ignorelist_file) + section = None + try: + with Path(ignorelist_file).expanduser().open(encoding="utf-8") as f: + for lineno, line in enumerate(f, 1): + if not line.strip() or line.lstrip().startswith("#"): + continue + if not line.startswith(" "): + # Section header + header = line.strip().lower() + if not header.endswith(":"): + raise UserError( + f"Malformed ignorelist section header " + f"at line {lineno}" + ) + section = header[:-1].rstrip() + if not section: + raise UserError( + f"Empty ignorelist section name " + f"at line {lineno}" + ) + else: + # Pattern line: must be indented (at least one space) + if section is None: + raise UserError( + f"Ignorelist pattern line before any section header " + f"at line {lineno}" + ) + ignorelist_raw[section].append(line.strip()) + except OSError as exc: + raise UserError( + f"Cannot read ignorelist file {ignorelist_file!r}: {exc}" + ) from exc + self._log.extra_debug("Ignorelist: {}", ignorelist_raw) + return self._compile_ignorelist_patterns(ignorelist_raw) + + @staticmethod + def _compile_ignorelist_patterns( + ignorelist: RawIgnorelist, + ) -> Ignorelist: + """Compile ignorelist patterns into regex objects. + + Tries regex compilation first; if the pattern is not valid regex, + falls back to treating it as a literal string. Either way the pattern + must match the entire genre string (full match). To match substrings, + write e.g. ``.*rock.*``. All patterns are case-insensitive. + """ + compiled_ignorelist: defaultdict[str, list[re.Pattern[str]]] = ( + defaultdict(list) + ) + for artist, patterns in ignorelist.items(): + compiled_patterns = [] + for pattern in patterns: + try: + compiled_patterns.append(re.compile(pattern, re.IGNORECASE)) + except re.error: + compiled_patterns.append( + re.compile(re.escape(pattern), re.IGNORECASE) + ) + compiled_ignorelist[artist] = compiled_patterns + return compiled_ignorelist + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track', @@ -202,7 +312,9 @@ def sources(self) -> tuple[str, ...]: # Genre list processing. - def _resolve_genres(self, tags: list[str]) -> list[str]: + def _resolve_genres( + self, tags: list[str], artist: str | None = None + ) -> list[str]: """Canonicalize, sort and filter a list of genres. - Returns an empty list if the input tags list is empty. @@ -217,6 +329,9 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: by the specificity (depth in the canonicalization tree) of the genres. - Finally applies whitelist filtering to ensure that only valid genres are kept. (This may result in no genres at all being retained). + - Ignorelist is applied at each stage: ignored input tags skip ancestry + entirely, ignored ancestor tags are dropped, and ignored tags are + removed in the final filter. - Returns the filtered list of genres, limited to the configured count. """ if not tags: @@ -229,14 +344,29 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: # Extend the list to consider tags parents in the c14n tree tags_all = [] for tag in tags: - # Add parents that are in the whitelist, or add the oldest - # ancestor if no whitelist + # Skip ignored tags entirely — don't walk their ancestry. + if is_ignored(self._log, self.ignorelist, tag, artist): + continue + + # Add parents that pass whitelist (and are not ignored, which + # is checked in _filter_valid). With whitelist, we may include + # multiple parents if self.whitelist: parents = self._filter_valid( - find_parents(tag, self.c14n_branches) + find_parents(tag, self.c14n_branches), + artist=artist, ) else: - parents = [find_parents(tag, self.c14n_branches)[-1]] + # No whitelist: take only the oldest ancestor, skipping it + # if it is in the ignorelist + oldest = find_parents(tag, self.c14n_branches)[-1] + parents = ( + [] + if is_ignored( + self._log, self.ignorelist, oldest, artist + ) + else [oldest] + ) tags_all += parents # Stop if we have enough tags already, unless we need to find @@ -254,24 +384,34 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: if self.config["prefer_specific"]: tags = sort_by_depth(tags, self.c14n_branches) - # c14n only adds allowed genres but we may have had forbidden genres in - # the original tags list - valid_tags = self._filter_valid(tags) + # Final filter: applies when c14n is disabled, or when c14n ran without + # whitelist filtering in the loop (no-whitelist path). + valid_tags = self._filter_valid(tags, artist=artist) return valid_tags[:count] - def _filter_valid(self, genres: Iterable[str]) -> list[str]: - """Filter genres based on whitelist. + def _filter_valid( + self, genres: Iterable[str], artist: str | None = None + ) -> list[str]: + """Filter genres through whitelist and ignorelist. - Returns all genres if no whitelist is configured, otherwise returns - only genres that are in the whitelist. + Drops empty/whitespace-only strings, then applies whitelist and + ignorelist checks. Returns all genres if neither is configured. + Whitelist is checked first for performance reasons (ignorelist regex + matching is more expensive and for some call sites ignored genres were + already filtered). """ - # First, drop any falsy or whitespace-only genre strings to avoid - # retaining empty tags from multi-valued fields. cleaned = [g for g in genres if g and g.strip()] - if not self.whitelist: + if not self.whitelist and not self.ignorelist: return cleaned - return [g for g in cleaned if g.lower() in self.whitelist] + result = [] + for genre in cleaned: + if self.whitelist and genre.lower() not in self.whitelist: + continue + if is_ignored(self._log, self.ignorelist, genre, artist): + continue + result.append(genre) + return result # Genre resolution pipeline. @@ -292,13 +432,13 @@ def _get_existing_genres(self, obj: LibModel) -> list[str]: return genres_list def _combine_resolve_and_log( - self, old: list[str], new: list[str] + self, old: list[str], new: list[str], artist: str | None = None ) -> list[str]: """Combine old and new genres and process via _resolve_genres.""" self._log.debug("raw last.fm tags: {}", new) self._log.debug("existing genres taken into account: {}", old) combined = old + new - return self._resolve_genres(combined) + return self._resolve_genres(combined, artist=artist) def _get_genre(self, obj: LibModel) -> tuple[list[str], str]: """Get the final genre list for an Album or Item object. @@ -320,12 +460,21 @@ def _get_genre(self, obj: LibModel) -> tuple[list[str], str]: and the whitelist feature was disabled. """ + def _fallback_stage() -> tuple[list[str], str]: + """Return the fallback genre and label.""" + if fallback := self.config["fallback"].get(): + return [fallback], "fallback" + return [], "fallback unconfigured" + def _try_resolve_stage( - stage_label: str, keep_genres: list[str], new_genres: list[str] + stage_label: str, + keep_genres: list[str], + new_genres: list[str], + artist: str | None = None, ) -> tuple[list[str], str] | None: """Try to resolve genres for a given stage and log the result.""" resolved_genres = self._combine_resolve_and_log( - keep_genres, new_genres + keep_genres, new_genres, artist=artist ) if resolved_genres: suffix = "whitelist" if self.whitelist else "any" @@ -345,11 +494,15 @@ def _try_resolve_stage( # If none are found, we use the fallback (if set). if self.config["cleanup_existing"]: keep_genres = [g.lower() for g in genres] - if result := _try_resolve_stage("cleanup", keep_genres, []): + cleanup_artist = getattr(obj, "albumartist", None) or getattr( + obj, "artist", None + ) + if result := _try_resolve_stage( + "cleanup", keep_genres, [], artist=cleanup_artist + ): return result - # Return fallback string (None if not set). - return self.config["fallback"].get(), "fallback" + return _fallback_stage() # If cleanup_existing is not set, the pre-populated tags are # returned as-is. @@ -368,7 +521,7 @@ def _try_resolve_stage( obj.artist, obj.title ): if result := _try_resolve_stage( - "track", keep_genres, new_genres + "track", keep_genres, new_genres, artist=obj.artist ): return result @@ -377,18 +530,21 @@ def _try_resolve_stage( obj.albumartist, obj.album ): if result := _try_resolve_stage( - "album", keep_genres, new_genres + "album", keep_genres, new_genres, artist=obj.albumartist ): return result if "artist" in self.sources: new_genres = [] + stage_artist: str | None = None if isinstance(obj, library.Item): new_genres = self.client.fetch_artist_genre(obj.artist) stage_label = "artist" + stage_artist = obj.artist elif obj.albumartist != config["va_name"].as_str(): new_genres = self.client.fetch_artist_genre(obj.albumartist) stage_label = "album artist" + stage_artist = obj.albumartist if not new_genres: self._log.extra_debug( 'No album artist genre found for "{}", ' @@ -431,27 +587,30 @@ def _try_resolve_stage( if new_genres: if result := _try_resolve_stage( - stage_label, keep_genres, new_genres + stage_label, keep_genres, new_genres, artist=stage_artist ): return result # Nothing found, leave original if configured and valid. - if genres and self.config["keep_existing"]: - if valid_genres := self._filter_valid(genres): + if genres and self.config["keep_existing"].get(): + if isinstance(obj, library.Item): + # For track items, use track artist (important for compilations). + artist = getattr(obj, "artist", None) + else: + # For albums, prefer albumartist, fall back to artist. + artist = getattr(obj, "albumartist", None) or getattr( + obj, "artist", None + ) + if valid_genres := self._filter_valid(genres, artist=artist): return valid_genres, "original fallback" # If the original genre doesn't match a whitelisted genre, check # if we can canonicalize it to find a matching, whitelisted genre! if result := _try_resolve_stage( - "original fallback", keep_genres, [] + "original fallback", keep_genres, [], artist=artist ): return result - # Return fallback as a list. - if fallback := self.config["fallback"].get(): - return [fallback], "fallback" - - # No fallback configured. - return [], "fallback unconfigured" + return _fallback_stage() # Beets plugin hooks and CLI. diff --git a/beetsplug/lastgenre/client.py b/beetsplug/lastgenre/client.py index 727702f2f4..31469ccff1 100644 --- a/beetsplug/lastgenre/client.py +++ b/beetsplug/lastgenre/client.py @@ -25,17 +25,20 @@ from beets import plugins +from .utils import is_ignored + if TYPE_CHECKING: from collections.abc import Callable from beets.logging import BeetsLogger + from .utils import Ignorelist + GenreCache = dict[str, list[str]] """Cache mapping entity keys to their genre lists. Keys are formatted as 'entity.arg1-arg2-...' (e.g., 'album.artist-title'). Values are lists of lowercase genre strings.""" - LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) PYLAST_EXCEPTIONS = ( @@ -48,13 +51,17 @@ class LastFmClient: """Client for fetching genres from Last.fm.""" - def __init__(self, log: BeetsLogger, min_weight: int): + def __init__( + self, log: BeetsLogger, min_weight: int, ignorelist: Ignorelist + ): """Initialize the client. The min_weight parameter filters tags by their minimum weight. + The ignorelist filters forbidden genres directly after Last.fm lookup. """ self._log = log self._min_weight = min_weight + self._ignorelist: Ignorelist = ignorelist self._genre_cache: GenreCache = {} def fetch_genre( @@ -120,14 +127,24 @@ def _last_lookup( if any(not s for s in args): return [] - key = f"{entity}.{'-'.join(str(a) for a in args)}" + args_replaced = [a.replace("\u2010", "-") for a in args] + key = f"{entity}.{'-'.join(str(a) for a in args_replaced)}" if key not in self._genre_cache: - args_replaced = [a.replace("\u2010", "-") for a in args] self._genre_cache[key] = self.fetch_genre(method(*args_replaced)) - genre = self._genre_cache[key] - self._log.extra_debug("last.fm (unfiltered) {} tags: {}", entity, genre) - return genre + genres = self._genre_cache[key] + + self._log.extra_debug( + "last.fm (unfiltered) {} tags: {}", entity, genres + ) + + # Filter forbidden genres on every call so ignorelist hits are logged. + # Artist is always the first element in args (album, artist, track lookups). + return [ + g + for g in genres + if not is_ignored(self._log, self._ignorelist, g, args[0]) + ] def fetch_album_genre(self, albumartist: str, albumtitle: str) -> list[str]: """Return genres from Last.fm for the album by albumartist.""" diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py new file mode 100644 index 0000000000..fcca828672 --- /dev/null +++ b/beetsplug/lastgenre/utils.py @@ -0,0 +1,49 @@ +# This file is part of beets. +# Copyright 2026, J0J0 Todos. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Lastgenre plugin shared utilities and types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import re + + from beets.logging import BeetsLogger + + Ignorelist = dict[str, list[re.Pattern[str]]] + """Mapping of artist name to list of compiled case-insensitive patterns.""" + + +def is_ignored( + logger: BeetsLogger, + ignorelist: Ignorelist, + genre: str, + artist: str | None = None, +) -> bool: + """Check if genre tag should be ignored.""" + if not ignorelist: + return False + genre_lower = genre.lower() + for pattern in ignorelist.get("*") or []: + if pattern.fullmatch(genre_lower): + logger.extra_debug("ignored (global): {}", genre) + return True + for pattern in ignorelist.get((artist or "").lower()) or []: + if pattern.fullmatch(genre_lower): + logger.extra_debug("ignored (artist: {}): {}", artist, genre) + return True + return False diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index b0c01eabd2..5450ae82c7 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -160,6 +160,50 @@ genres remain, set ``whitelist: no``). If ``force`` is disabled the ``keep_existing`` option is simply ignored (since ``force: no`` means ``not touching`` existing tags anyway). +Genre Ignorelist +---------------- + +Last.fm tags are crowd-sourced, so they can be wrong — especially for artists +whose names are shared with or confused with others. For example, a "Drum And +Bass" artist named "Fracture" might incorrectly receive "Metal" tags. The +ignorelist lets you reject specific genres globally or per-artist. + +Another example for the ignorelist is to exclude genres that are technically +correct but not useful to you. For example, you might want to exclude "Ska" for +"Bob Marley", even though it is a valid genre for his music. + +Filtering is done in two places: when fetching genres from Last.fm and when +resolving to a final genre list (during canonicalization and whitelisting). + +This means that existing genres are also filtered when the ``force`` and +``keep_existing`` options are enabled (or ``cleanup_existing`` is enabled with +``force: no``). + +A possible ``ignorelist`` file would look like this: + +.. code-block:: text + + fracture: + ^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$ + progressive metal + bob marley: + ska + *: + electronic + +A combination of regex patterns and plain genre names is possible. The ``*`` key +applies globally to all artists — use it to block genres you never want, +regardless of artist. Patterns are matched against the full genre string, so a +plain ``metal`` will not match ``heavy metal`` unless you write a regex like +``.*metal.*``. + +Set the ``ignorelist`` option to the path of a file containing such entries to +enable this feature. + +.. attention:: + + Do not use single or double quotes around the genre names or regex patterns. + Configuration ------------- @@ -200,6 +244,9 @@ file. The available options are: internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``. - **title_case**: Convert the new tags to TitleCase before saving. Default: ``yes``. +- **ignorelist**: The path to a file that contains genres to exclude from being + set as genres for specific artists. See `Genre Ignorelist`_ for more details. + Default: ``no``. Running Manually ---------------- diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index a619fd1b1b..bb993b2955 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,13 +14,19 @@ """Tests for the 'lastgenre' plugin.""" +import os +import re +import tempfile +from collections import defaultdict from unittest.mock import Mock, patch import pytest from beets.test import _common from beets.test.helper import IOMixin, PluginTestCase +from beets.ui import UserError from beetsplug import lastgenre +from beetsplug.lastgenre.utils import is_ignored class LastGenrePluginTest(IOMixin, PluginTestCase): @@ -202,6 +208,80 @@ def test_sort_by_depth(self): res = lastgenre.sort_by_depth(tags, self.plugin.c14n_branches) assert res == ["ambient", "electronic"] + # Ignorelist tests in resolve_genres and _is_ignored + + def test_ignorelist_filters_genres_in_resolve(self): + """Ignored genres are stripped by _resolve_genres (no c14n). + + Artist-specific and global patterns are both applied. + """ + self._setup_config(whitelist=False, canonical=False) + self.plugin.ignorelist = defaultdict( + list, + { + "the artist": [re.compile(r"^metal$", re.IGNORECASE)], + "*": [re.compile(r"^rock$", re.IGNORECASE)], + }, + ) + result = self.plugin._resolve_genres( + ["metal", "rock", "jazz"], artist="the artist" + ) + assert "metal" not in result, ( + "artist-specific ignored genre must be removed" + ) + assert "rock" not in result, "globally ignored genre must be removed" + assert "jazz" in result, "non-ignored genre must survive" + + def test_ignorelist_stops_c14n_ancestry_walk(self): + """An ignored tag's c14n parents don't bleed into the result. + + Without ignorelist, 'delta blues' canonicalizes to 'blues'. + With 'delta blues' ignored the tag is skipped entirely in the + c14n loop, so 'blues' must not appear either. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + self.plugin.ignorelist = defaultdict( + list, + { + "the artist": [re.compile(r"^delta blues$", re.IGNORECASE)], + }, + ) + result = self.plugin._resolve_genres( + ["delta blues"], artist="the artist" + ) + assert result == [], ( + "ignored tag must not contribute c14n parents to the result" + ) + + def test_ignorelist_c14n_no_whitelist_keeps_oldest_ancestor(self): + """With c14n on and whitelist off, ignorelist must not change the + parent-selection rule: only the oldest ancestor is returned. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + # ignorelist targets an unrelated genre — must not affect parent walking + self.plugin.ignorelist = defaultdict( + list, + {"*": [re.compile(r"^jazz$", re.IGNORECASE)]}, + ) + result = self.plugin._resolve_genres(["delta blues"]) + assert result == ["blues"], ( + "oldest ancestor only must be returned, not the full parent chain" + ) + + def test_ignorelist_c14n_no_whitelist_drops_ignored_ancestor(self): + """With c14n on and whitelist off, if the oldest ancestor itself is + ignored it must be dropped and the tag contributes nothing. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + self.plugin.ignorelist = defaultdict( + list, + {"*": [re.compile(r"^blues$", re.IGNORECASE)]}, + ) + result = self.plugin._resolve_genres(["delta blues"]) + assert result == [], ( + "ignored oldest ancestor must not appear in the result" + ) + @pytest.fixture def config(config): @@ -614,3 +694,200 @@ def mock_fetch_artist_genre(self, artist): # Run assert plugin._get_genre(item) == expected_result + + +# Ignorelist pattern matching tests for _is_ignored, independent of _resolve_genres + + +@pytest.mark.parametrize( + "ignorelist_dict, artist, genre, expected_forbidden", + [ + # Global ignorelist - simple word + ({"*": ["spoken word"]}, "Any Artist", "spoken word", True), + ({"*": ["spoken word"]}, "Any Artist", "jazz", False), + # Global ignorelist - regex pattern + ({"*": [".*electronic.*"]}, "Any Artist", "ambient electronic", True), + ({"*": [".*electronic.*"]}, "Any Artist", "jazz", False), + # Artist-specific ignorelist + ({"metallica": ["metal"]}, "Metallica", "metal", True), + ({"metallica": ["metal"]}, "Iron Maiden", "metal", False), + # Case insensitive matching + ({"metallica": ["metal"]}, "METALLICA", "METAL", True), + # Full-match behavior: plain "metal" must not match "heavy metal" + ({"metallica": ["metal"]}, "Metallica", "heavy metal", False), + # Regex behavior: explicit pattern ".*metal.*" may match "heavy metal" + ({"metallica": [".*metal.*"]}, "Metallica", "heavy metal", True), + # Artist-specific ignorelist - exact match + ({"metallica": ["^Heavy Metal$"]}, "Metallica", "classic metal", False), + # Combined global and artist-specific + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "spoken word", + True, + ), + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "metal", + True, + ), + # Complex regex pattern with multiple features (raw string) + ( + { + "fracture": [ + r"^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$" + ] + }, + "Fracture", + "power metal", + True, + ), + # Complex regex pattern with multiple features (regular string) + ( + {"amon tobin": ["d(rum)?[ n/]*b(ass)?"]}, + "Amon Tobin", + "dnb", + True, + ), + # Empty ignorelist + ({}, "Any Artist", "any genre", False), + ], +) +def test_ignorelist_patterns( + config, ignorelist_dict, artist, genre, expected_forbidden +): + """Test ignorelist pattern matching logic directly.""" + + # Disable file-based ignorelist to avoid depending on global config state. + config["lastgenre"]["ignorelist"] = False + + # Initialize plugin + plugin = lastgenre.LastGenrePlugin() + + # Set up compiled ignorelist directly (skipping file parsing) + compiled_ignorelist = defaultdict(list) + for artist_name, patterns in ignorelist_dict.items(): + compiled_ignorelist[artist_name.lower()] = [ + re.compile(pattern, re.IGNORECASE) for pattern in patterns + ] + + plugin.ignorelist = compiled_ignorelist + + result = is_ignored(plugin._log, plugin.ignorelist, genre, artist) + assert result == expected_forbidden + + +def test_ignorelist_literal_fallback_uses_fullmatch(config): + """An invalid-regex pattern falls back to a literal string and must use + full-match semantics: the pattern must equal the entire genre string, + not just appear as a substring. + """ + # Disable file-based ignorelist to avoid depending on global config state. + config["lastgenre"]["ignorelist"] = False + plugin = lastgenre.LastGenrePlugin() + # "[not valid regex" is not valid regex, so _compile_ignorelist_patterns + # escapes and compiles it as a literal. + plugin.ignorelist = lastgenre.LastGenrePlugin._compile_ignorelist_patterns( + {"*": ["[not valid regex"]} + ) + # Exact match must be caught. + assert ( + is_ignored(plugin._log, plugin.ignorelist, "[not valid regex", "") + is True + ) + # Substring must NOT be caught (would have passed with old .search()). + assert ( + is_ignored( + plugin._log, + plugin.ignorelist, + "contains [not valid regex inside", + "", + ) + is False + ) + + +@pytest.mark.parametrize( + "file_content, expected_ignorelist", + [ + # Basic artist with pattern + ("metallica:\n metal", {"metallica": ["metal"]}), + # Global ignorelist + ("*:\n spoken word", {"*": ["spoken word"]}), + # Multiple patterns per artist + ( + "metallica:\n metal\n .*rock.*", + {"metallica": ["metal", ".*rock.*"]}, + ), + # Comments and empty lines skipped + ( + "# comment\n*:\n spoken word\n\nmetallica:\n metal", + {"*": ["spoken word"], "metallica": ["metal"]}, + ), + # Case insensitive artist names — key lowercased, pattern kept as-is + # (patterns compiled with re.IGNORECASE so case doesn't matter for matching) + ("METALLICA:\n METAL", {"metallica": ["METAL"]}), + # Invalid regex pattern that gets escaped + ("artist:\n [invalid(regex", {"artist": ["\\[invalid\\(regex"]}), + # Empty file + ("", {}), + ], +) +def test_ignorelist_file_format(config, file_content, expected_ignorelist): + """Test ignorelist file format parsing.""" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".txt", delete=False, encoding="utf-8" + ) as f: + f.write(file_content) + ignorelist_file = f.name + + try: + config["lastgenre"]["ignorelist"] = ignorelist_file + plugin = lastgenre.LastGenrePlugin() + + # Convert compiled regex patterns back to strings for comparison + string_ignorelist = { + artist: [p.pattern for p in patterns] + for artist, patterns in plugin.ignorelist.items() + } + + assert string_ignorelist == expected_ignorelist + + finally: + os.unlink(ignorelist_file) + + +@pytest.mark.parametrize( + "invalid_content, expected_error_message", + [ + # Missing colon + ("metallica\n metal", "Malformed ignorelist section header"), + # Pattern before section + (" metal\nmetallica:\n heavy metal", "before any section header"), + # Unindented pattern + ("metallica:\nmetal", "Malformed ignorelist section header"), + ], +) +def test_ignorelist_file_format_errors( + config, invalid_content, expected_error_message +): + """Test ignorelist file format error handling.""" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".txt", delete=False, encoding="utf-8" + ) as f: + f.write(invalid_content) + ignorelist_file = f.name + + try: + config["lastgenre"]["ignorelist"] = ignorelist_file + + with pytest.raises(UserError) as exc_info: + lastgenre.LastGenrePlugin() + + assert expected_error_message in str(exc_info.value) + + finally: + os.unlink(ignorelist_file)