From b2ece1ed0a4e9cded03ff41bd3704a1dc2dd5d06 Mon Sep 17 00:00:00 2001 From: David Bishop Date: Sun, 26 Apr 2026 19:03:00 -0700 Subject: [PATCH 1/3] Surface Bandcamp label-released performers as their own artists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Albums published on a band's page by a different performer (Mortaja on audiophob, The Green Arrows on Analog Africa) used to be filed under the page owner. They now get a synthetic artist ID `{band_id}:{performer_slug}` — scoped per band page so cross-page same-name performers stay distinct. Synthetic IDs that match the page-owner's own slug short-circuit back to the real band on resolve, collapsing any search-time drift before it can persist. Refs music-assistant/support#5389. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../providers/bandcamp/__init__.py | 244 +++++++++++- music_assistant/providers/bandcamp/_ids.py | 76 ++++ .../providers/bandcamp/converters.py | 146 ++++++- .../providers/bandcamp/manifest.json | 2 +- requirements_all.txt | 2 +- tests/providers/bandcamp/test_converters.py | 166 +++++++- tests/providers/bandcamp/test_ids.py | 98 +++++ tests/providers/bandcamp/test_init.py | 9 +- tests/providers/bandcamp/test_provider.py | 368 +++++++++++++++++- 9 files changed, 1068 insertions(+), 43 deletions(-) create mode 100644 music_assistant/providers/bandcamp/_ids.py create mode 100644 tests/providers/bandcamp/test_ids.py diff --git a/music_assistant/providers/bandcamp/__init__.py b/music_assistant/providers/bandcamp/__init__.py index faea640c6a..f11fefe4e6 100644 --- a/music_assistant/providers/bandcamp/__init__.py +++ b/music_assistant/providers/bandcamp/__init__.py @@ -3,7 +3,7 @@ import asyncio from collections.abc import AsyncGenerator, AsyncIterator, Sequence from contextlib import asynccontextmanager, suppress -from typing import cast +from typing import Any, cast from bandcamp_async_api import ( BandcampAPIClient, @@ -58,6 +58,7 @@ from music_assistant.models import ProviderInstanceType from music_assistant.models.music_provider import MusicProvider +from ._ids import make_artist_id, parse_artist_id, slugify_performer from .constants import ( BROWSE_FANS, BROWSE_FOLLOWERS, @@ -73,7 +74,7 @@ PERSON_SUB_ROUTES, SUPPORTED_FEATURES, ) -from .converters import BandcampConverters +from .converters import BandcampConverters, DiscographyItem async def setup( @@ -180,7 +181,18 @@ def is_streaming_provider(self) -> bool: async def search( self, search_query: str, media_types: list[MediaType], limit: int = 50 ) -> SearchResults: - """Perform search on music provider.""" + """Perform search on music provider. + + Bandcamp's autocomplete returns three result kinds: bands/labels (b), + albums (a), and tracks (t). For album/track results, ``band_id`` is + the page owner (could be a label) and ``band_name`` is the *performer* + credit — which for label-released albums differs from the page + owner's own name. We map performer-without-a-band-page entries onto + synthetic artists keyed by ``{band_id}:{slug}`` and emit them in + artist results too, so searching for e.g. "Mortaja" surfaces + Mortaja-on-audiophob even though the performer has no band page of + their own. See :mod:`._ids` for ID semantics. + """ results = SearchResults() if not media_types: return results @@ -196,20 +208,86 @@ async def search( except BandcampAPIError as error: raise InvalidDataError("Unexpected error during Bandcamp search") from error - for item in search_results[:limit]: + capped = search_results[:limit] + # Map band_id -> SearchResultArtist for cross-result dedup. When an + # album/track's `band_name` slug matches the band's own slug, the + # album is by the band itself and we use the plain `{band_id}` ID; + # otherwise we synthesize `{band_id}:{slug}`. + bands_by_id: dict[int, SearchResultArtist] = { + item.id: item for item in capped if isinstance(item, SearchResultArtist) + } + artist_ids_seen: set[str] = set() + synthetic_artists: list[Artist] = [] + + for item in capped: try: if isinstance(item, SearchResultTrack) and MediaType.TRACK in media_types: - results.tracks = [*results.tracks, self._converters.track_from_search(item)] + artist_item_id = self._resolve_search_artist_id(item, bands_by_id) + results.tracks = [ + *results.tracks, + self._converters.track_from_search(item, artist_item_id=artist_item_id), + ] elif isinstance(item, SearchResultAlbum) and MediaType.ALBUM in media_types: - results.albums = [*results.albums, self._converters.album_from_search(item)] + artist_item_id = self._resolve_search_artist_id(item, bands_by_id) + results.albums = [ + *results.albums, + self._converters.album_from_search(item, artist_item_id=artist_item_id), + ] elif isinstance(item, SearchResultArtist) and MediaType.ARTIST in media_types: + artist_ids_seen.add(str(item.id)) results.artists = [*results.artists, self._converters.artist_from_search(item)] except BandcampAPIError as error: self.logger.warning("Failed to convert search result item: %s", error) continue + if MediaType.ARTIST in media_types: + for item in capped: + if not isinstance(item, (SearchResultAlbum, SearchResultTrack)): + continue + if not item.artist_name: + continue + artist_item_id = self._resolve_search_artist_id(item, bands_by_id) + if artist_item_id in artist_ids_seen: + continue + artist_ids_seen.add(artist_item_id) + # Plain (non-synthetic) IDs that weren't already surfaced + # mean a `b` result for this band wasn't in the capped + # window. Skip — the user is searching by performer name, + # so synthetics are what they'd expect to see; a band that + # didn't make the cap shouldn't be reintroduced here. + if ":" not in artist_item_id: + continue + synthetic_artists.append( + self._converters.synthetic_artist( + band_id=item.artist_id, + performer_name=item.artist_name, + url=item.artist_url or None, + image_url=item.image_url, + ) + ) + + if synthetic_artists: + results.artists = [*results.artists, *synthetic_artists] + return results + def _resolve_search_artist_id( + self, + item: SearchResultAlbum | SearchResultTrack, + bands_by_id: dict[int, SearchResultArtist], + ) -> str: + """Decide whether an album/track's artist link is real or synthetic. + + Compares the per-result performer name (autocomplete's ``band_name``, + exposed as ``item.artist_name``) against any matching ``b`` result + in the same search. Same slug → real ``{band_id}``; different → + synthetic ``{band_id}:{slug}``. + """ + band = bands_by_id.get(item.artist_id) + if band and slugify_performer(band.name) == slugify_performer(item.artist_name or ""): + return str(item.artist_id) + return make_artist_id(item.artist_id, item.artist_name) + @throttle_with_retries async def _fetch_collection_page( self, @@ -338,10 +416,76 @@ async def get_library_tracks(self) -> AsyncGenerator[Track, None]: @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist(self, prov_artist_id: str) -> Artist: - """Get full artist details by id.""" + """Get full artist details by id. + + Accepts both forms: ``"{band_id}"`` (a real band/label page) and + ``"{band_id}:{slug}"`` (a synthetic per-page performer that has + no Bandcamp page of its own — see :mod:`._ids`). + """ try: - api_artist = await self._client.get_artist(prov_artist_id) + band_id, performer_slug = parse_artist_id(prov_artist_id) + except ValueError as error: + raise InvalidDataError(f"Malformed Bandcamp artist ID: {prov_artist_id}") from error + + if performer_slug is None: + try: + api_artist = await self._client.get_artist(band_id) + return self._converters.artist_from_api(api_artist) + except BandcampNotFoundError as error: + raise MediaNotFoundError( + f"Artist {prov_artist_id} not found on Bandcamp" + ) from error + except BandcampRateLimitError as error: + raise ResourceTemporarilyUnavailable( + "Bandcamp rate limit reached", backoff_time=error.retry_after + ) from error + except BandcampAPIError as error: + raise MediaNotFoundError(f"Failed to get artist {prov_artist_id}") from error + + # Synthetic: locate matching items in the band's discography and + # build an artist scoped to that performer. Falls back to the real + # band when the slug actually matches the band's own name (e.g. + # cached IDs constructed before disambiguation was reliable). + return await self._get_synthetic_artist(prov_artist_id, band_id, performer_slug) + + async def _get_synthetic_artist( + self, prov_artist_id: str, band_id: int, performer_slug: str + ) -> Artist: + """Resolve a synthetic ID to either the real band or a per-page performer. + + Order matters. Bandcamp's autocomplete sometimes returns ``a``/``t`` + rows for a band without the matching ``b`` row in the same response + (typical for queries that are album/track titles rather than the + band's name). The search-time path mints a synthetic + ``{band_id}:slug-of-band-own-name`` in that case because it can't + tell band-by-itself from label-release without the ``b`` row. We + collapse that drift here, on the navigation/persistence path, by + resolving the synthetic to the real band whenever the slug equals + the band's own slug — BEFORE consulting the discography. Otherwise + an album whose performer-name happens to match the band's own name + (band-by-itself items with ``artist_name=null``) would build a + shadow synthetic that lives parallel to the real band in MA's + library and produces duplicate artist entries. + """ + try: + api_artist = await self._client.get_artist(band_id) + except BandcampNotFoundError as error: + raise MediaNotFoundError(f"Artist {prov_artist_id} not found on Bandcamp") from error + except BandcampRateLimitError as error: + raise ResourceTemporarilyUnavailable( + "Bandcamp rate limit reached", backoff_time=error.retry_after + ) from error + except BandcampAPIError as error: + raise MediaNotFoundError(f"Failed to get artist {prov_artist_id}") from error + + if slugify_performer(api_artist.name) == performer_slug: return self._converters.artist_from_api(api_artist) + + # Genuine label-style synthetic: the slug names a per-page performer + # distinct from the band itself. Find the credit in the discography + # and synthesize an artist scoped to that performer. + try: + api_discography = await self._fetch_discography(band_id) except BandcampNotFoundError as error: raise MediaNotFoundError(f"Artist {prov_artist_id} not found on Bandcamp") from error except BandcampRateLimitError as error: @@ -351,6 +495,42 @@ async def get_artist(self, prov_artist_id: str) -> Artist: except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get artist {prov_artist_id}") from error + matching = [ + item + for item in api_discography + if slugify_performer(str(item.get("artist_name") or item.get("band_name") or "")) + == performer_slug + ] + if not matching: + raise MediaNotFoundError(f"Artist {prov_artist_id} not found on Bandcamp") + + first = matching[0] + performer_name = str(first.get("artist_name") or first.get("band_name") or "") + art_id = first.get("art_id") + image_url = f"https://f4.bcbits.com/img/a{art_id}_0.jpg" if art_id else None + # The performer doesn't have their own Bandcamp page; surface the + # hosting band's URL so the artist tile links somewhere meaningful + # (matching what the search-emission path passes through). + return self._converters.synthetic_artist( + band_id=band_id, + performer_name=performer_name, + url=api_artist.url, + image_url=image_url, + ) + + @use_cache(CACHE_METADATA) + @throttle_with_retries + async def _fetch_discography(self, band_id: int) -> list[dict[str, Any]]: + """Fetch a band's discography keyed by band_id (cached). + + Real artist (``"{band_id}"``) and synthetic performer + (``"{band_id}:{slug}"``) lookups both go through this so the + underlying ``mobile/24/band_details`` call hits once per band per + cache window, not once per ``prov_artist_id``. + """ + result: list[dict[str, Any]] = await self._client.get_artist_discography(band_id) + return result + @use_cache(CACHE_METADATA) @throttle_with_retries async def get_album(self, prov_album_id: str) -> Album: @@ -409,13 +589,17 @@ async def get_track(self, prov_track_id: str) -> Track: track=api_track, album_id=api_album.id, album_name=api_album.title, - album_image_url=api_album.art_url, + album_image_url=api_album.art_url or "", + tralbum_artist=api_album.tralbum_artist, ) + # Standalone tracks (album_id=0) carry the performer credit on + # the track itself when fetched directly from tralbum_details. return self._converters.track_from_api( track=api_track, album_id=api_track.album.id if api_track.album else None, album_name=api_track.album.title if api_track.album else "", - album_image_url=api_track.album.art_url if api_track.album else "", + album_image_url=(api_track.album.art_url if api_track.album else "") or "", + tralbum_artist=api_track.tralbum_artist, ) @use_cache(CACHE_METADATA) @@ -431,7 +615,8 @@ async def get_album_tracks(self, prov_album_id: str) -> list[Track]: track=track, album_id=album_id, album_name=api_album.title, - album_image_url=api_album.art_url, + album_image_url=api_album.art_url or "", + tralbum_artist=api_album.tralbum_artist, ) for track in api_album.tracks if track.streaming_url # Only include tracks with streaming URLs @@ -453,14 +638,20 @@ async def get_album_tracks(self, prov_album_id: str) -> list[Track]: @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist_albums(self, prov_artist_id: str) -> list[Album]: - """Get albums by an artist.""" + """Get albums by an artist. + + For real artist IDs this returns the band's full discography (the + original behavior). For synthetic IDs (``{band_id}:{slug}``) this + filters the band's discography to only the items where the + performer matches. + """ try: - api_discography = await self._client.get_artist_discography(prov_artist_id) - return [ - self._converters.album_from_discography_item(item) - for item in api_discography - if item.get("item_type") == "album" and item.get("item_id") - ] + band_id, performer_slug = parse_artist_id(prov_artist_id) + except ValueError as error: + raise InvalidDataError(f"Malformed Bandcamp artist ID: {prov_artist_id}") from error + + try: + api_discography = await self._fetch_discography(band_id) except BandcampNotFoundError as error: raise MediaNotFoundError( f"Artist {prov_artist_id} albums not found on Bandcamp" @@ -472,6 +663,23 @@ async def get_artist_albums(self, prov_artist_id: str) -> list[Album]: except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get albums for artist {prov_artist_id}") from error + items = [ + item + for item in api_discography + if item.get("item_type") == "album" and item.get("item_id") + ] + if performer_slug is not None: + items = [ + item + for item in items + if slugify_performer(str(item.get("artist_name") or item.get("band_name") or "")) + == performer_slug + ] + return [ + self._converters.album_from_discography_item(cast("DiscographyItem", item)) + for item in items + ] + @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist_toptracks(self, prov_artist_id: str) -> list[Track]: diff --git a/music_assistant/providers/bandcamp/_ids.py b/music_assistant/providers/bandcamp/_ids.py new file mode 100644 index 0000000000..dbe46ccaa6 --- /dev/null +++ b/music_assistant/providers/bandcamp/_ids.py @@ -0,0 +1,76 @@ +"""Composite artist ID utilities for the Bandcamp provider. + +Bandcamp's data model treats labels as bands: an album published on a +label's page (e.g. ``audiophob``) reports ``band_id`` = the label, while the +actual performer (e.g. ``Mortaja``) lives in a separate ``tralbum_artist`` +(or ``artist_name`` / autocomplete-``band_name``) field. Performers without +their own Bandcamp page exist only as a per-album credit on someone else's +page — there is no real ``band_id`` to link to. + +We model these performers as *synthetic artists scoped to a band page*. The +artist ``item_id`` stored on Music Assistant media items is one of: + +* ``"{band_id}"`` — a real Bandcamp band/label page (existing behavior). +* ``"{band_id}:{slug}"`` — a performer credit on the band page identified + by ``{band_id}``. The ``slug`` is derived from the performer name. + +Two performers with the same name on different band pages produce different +synthetic IDs (e.g. Mortaja-on-audiophob vs. Mortaja-on-AdiósMundoCruel), +so the cross-label collision problem ALERTua flagged in +music-assistant/support#5389 doesn't apply. +""" + +from __future__ import annotations + +import re + +# Slugs are lowercase alphanumerics joined by single hyphens. +# The colon separator between band_id and slug is unambiguous because +# band_ids are pure digits and slugs contain no colons. +_SLUG_NON_ALNUM = re.compile(r"[^a-z0-9]+") + + +def slugify_performer(name: str) -> str: + """Reduce a performer name to a stable lowercase slug. + + Two performer names compare equal as artists when their slugs match. + The transform is intentionally lossy — punctuation and case are + discarded — so minor variations like ``"&"`` vs ``"and"`` collapse + together. Bandcamp's per-album performer field is consistent within a + band page in practice, so within-band collisions are rare; cross-band + collisions are not a concern because ``band_id`` is part of the key. + + :param name: Raw performer name from the Bandcamp API. + :returns: A slug containing only ``[a-z0-9-]`` with no leading or + trailing hyphens. Empty string if ``name`` has no slug-eligible + characters. + """ + return _SLUG_NON_ALNUM.sub("-", name.lower()).strip("-") + + +def make_artist_id(band_id: int | str, performer: str | None = None) -> str: + """Build the artist ``item_id`` used on Music Assistant media items. + + :param band_id: The Bandcamp ``band_id`` (page owner). + :param performer: Optional performer name. If provided and produces a + non-empty slug, the result is the synthetic form + ``"{band_id}:{slug}"``. Otherwise the plain ``"{band_id}"`` form + is returned (the page itself). + """ + if not performer: + return str(band_id) + slug = slugify_performer(performer) + if not slug: + return str(band_id) + return f"{band_id}:{slug}" + + +def parse_artist_id(artist_id: str) -> tuple[int, str | None]: + """Split an artist ``item_id`` into ``(band_id, performer_slug)``. + + :returns: A tuple where the second element is ``None`` for plain + ``"{band_id}"`` IDs and a non-empty slug for synthetic ones. + :raises ValueError: If the band-id portion is not a valid integer. + """ + band, _, slug = artist_id.partition(":") + return int(band), (slug or None) diff --git a/music_assistant/providers/bandcamp/converters.py b/music_assistant/providers/bandcamp/converters.py index 4838c39f9d..6d8f8203ac 100644 --- a/music_assistant/providers/bandcamp/converters.py +++ b/music_assistant/providers/bandcamp/converters.py @@ -24,6 +24,8 @@ ) from music_assistant_models.media_items import Track as MATrack +from ._ids import make_artist_id, slugify_performer + class DiscographyItem(TypedDict, total=False): """Raw discography item dict from the band_details API.""" @@ -72,9 +74,19 @@ def streaming_url_from_api( content_type = ContentType.UNKNOWN return streaming_url, bitrate, content_type - def track_from_search(self, item: SearchResultTrack) -> MATrack: - """Create a Track from new API SearchResultTrack.""" + def track_from_search( + self, item: SearchResultTrack, *, artist_item_id: str | None = None + ) -> MATrack: + """Create a Track from new API SearchResultTrack. + + :param artist_item_id: Resolved artist item_id chosen by the provider's + cross-result dedup. If omitted, defaults to the synthetic form + ``"{artist_id}:{slug(artist_name)}"`` so callers without + disambiguation context still get correct labels-vs-performers + behavior. + """ track_id = f"{item.artist_id}-{item.album_id or 0}-{item.id}" + artist_item_id = artist_item_id or make_artist_id(item.artist_id, item.artist_name) return MATrack( item_id=track_id, provider=self.instance_id, @@ -83,7 +95,7 @@ def track_from_search(self, item: SearchResultTrack) -> MATrack: [ ItemMapping( media_type=MediaType.ARTIST, - item_id=str(item.artist_id), + item_id=artist_item_id, provider=self.instance_id, name=item.artist_name, ) @@ -109,9 +121,17 @@ def track_from_search(self, item: SearchResultTrack) -> MATrack: }, ) - def album_from_search(self, item: SearchResultAlbum) -> MAAlbum: - """Create an Album from new API SearchResultAlbum.""" + def album_from_search( + self, item: SearchResultAlbum, *, artist_item_id: str | None = None + ) -> MAAlbum: + """Create an Album from new API SearchResultAlbum. + + :param artist_item_id: Resolved artist item_id chosen by the provider's + cross-result dedup. If omitted, defaults to the synthetic form + ``"{artist_id}:{slug(artist_name)}"``. + """ album_id = f"{item.artist_id}-{item.id}" + artist_item_id = artist_item_id or make_artist_id(item.artist_id, item.artist_name) output = MAAlbum( item_id=album_id, provider=self.instance_id, @@ -121,7 +141,7 @@ def album_from_search(self, item: SearchResultAlbum) -> MAAlbum: [ ItemMapping( media_type=MediaType.ARTIST, - item_id=str(item.artist_id), + item_id=artist_item_id, provider=self.instance_id, name=item.artist_name, uri=item.artist_url, @@ -182,10 +202,25 @@ def track_from_api( album_id: str | int | None = None, album_name: str = "", album_image_url: str = "", + *, + tralbum_artist: str | None = None, ) -> MATrack: - """Convert a Track object from the API to MA Track format.""" + """Convert a Track object from the API to MA Track format. + + :param tralbum_artist: The per-album performer credit + (``BCAlbum.tralbum_artist`` for tracks within an album, or + ``BCTrack.tralbum_artist`` for standalone tracks). When set + and different from the band's own name, the artist link uses + a synthetic ``{band_id}:{slug}`` ID and the displayed artist + name is the performer rather than the band. + """ album_id = album_id or 0 _, bitrate, content_type = self.streaming_url_from_api(track.streaming_url or {}) + band_name = track.artist.name + display_name = tralbum_artist or band_name + artist_item_id = _resolve_artist_id( + band_id=track.artist.id, performer=tralbum_artist, band_name=band_name + ) output = MATrack( item_id=f"{track.artist.id}-{album_id}-{track.id}", provider=self.instance_id, @@ -194,9 +229,9 @@ def track_from_api( [ ItemMapping( media_type=MediaType.ARTIST, - item_id=str(track.artist.id), + item_id=artist_item_id, provider=self.instance_id, - name=track.artist.name, + name=display_name, ) ] ), @@ -283,7 +318,14 @@ def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: band_id = item.get("band_id", 0) item_id = item.get("item_id", 0) album_id = f"{band_id}-{item_id}" - artist_name = item.get("artist_name") or item.get("band_name") or "" + # `artist_name` (when set) is the per-album performer; `band_name` + # is the page owner. They differ on label-released albums. + performer = item.get("artist_name") + band_name = item.get("band_name") or "" + display_name = performer or band_name + artist_item_id = _resolve_artist_id( + band_id=band_id, performer=performer, band_name=band_name + ) # Build art URL from art_id (matches _build_art_url in parsers.py) art_id = item.get("art_id") @@ -305,9 +347,9 @@ def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: [ ItemMapping( media_type=MediaType.ARTIST, - item_id=str(band_id), + item_id=artist_item_id, provider=self.instance_id, - name=artist_name, + name=display_name, ) ] ), @@ -334,6 +376,11 @@ def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: def album_from_api(self, album: APIAlbum) -> MAAlbum: """Convert an API Album object to MA Album format.""" album_id = f"{album.artist.id}-{album.id}" + band_name = album.artist.name + display_name = album.tralbum_artist or band_name + artist_item_id = _resolve_artist_id( + band_id=album.artist.id, performer=album.tralbum_artist, band_name=band_name + ) output = MAAlbum( item_id=album_id, provider=self.instance_id, @@ -342,9 +389,9 @@ def album_from_api(self, album: APIAlbum) -> MAAlbum: [ ItemMapping( media_type=MediaType.ARTIST, - item_id=str(album.artist.id), + item_id=artist_item_id, provider=self.instance_id, - name=album.artist.name, + name=display_name, image=MediaItemImage( path=album.art_url, type=ImageType.THUMB, @@ -374,3 +421,74 @@ def album_from_api(self, album: APIAlbum) -> MAAlbum: ) output.metadata.description = f"{album.url}\n{album.about or ''}".strip() return output + + def synthetic_artist( + self, + band_id: int, + performer_name: str, + *, + url: str | None = None, + image_url: str | None = None, + ) -> MAArtist: + """Build an Artist for a per-album performer that has no band page. + + These performers exist on Bandcamp only as a credit on a band's + page (typically a label), so we synthesize an Artist scoped to the + ``(band_id, performer)`` pair. See :mod:`._ids` for the ID format. + + :param band_id: The hosting Bandcamp band's id. + :param performer_name: The performer credit (display name). + :param url: Optional URL to attach. Usually the band page; the + performer doesn't have their own. + :param image_url: Optional artwork to use as the artist's thumb, + typically borrowed from one of their albums. + """ + item_id = make_artist_id(band_id, performer_name) + output = MAArtist( + item_id=item_id, + provider=self.instance_id, + name=performer_name, + uri=url, + provider_mappings={ + ProviderMapping( + item_id=item_id, + provider_domain=self.domain, + provider_instance=self.instance_id, + url=url, + ) + }, + ) + if url: + output.metadata.description = url + if image_url: + output.metadata.add_image( + MediaItemImage( + type=ImageType.THUMB, + path=image_url, + provider=self.instance_id, + remotely_accessible=True, + ) + ) + return output + + +def _resolve_artist_id( + *, + band_id: int | str, + performer: str | None, + band_name: str | None, +) -> str: + """Choose between a real or synthetic artist item_id. + + Returns the synthetic ``{band_id}:{slug(performer)}`` form only when + the performer is set AND its slug differs from the band's own slug; + otherwise returns the plain ``{band_id}``. Missing inputs collapse + to the plain form rather than fabricating a meaningless synthetic. + """ + if ( + not performer + or not band_name + or slugify_performer(performer) == slugify_performer(band_name) + ): + return str(band_id) + return make_artist_id(band_id, performer) diff --git a/music_assistant/providers/bandcamp/manifest.json b/music_assistant/providers/bandcamp/manifest.json index ac6447a977..6a6b70a90e 100644 --- a/music_assistant/providers/bandcamp/manifest.json +++ b/music_assistant/providers/bandcamp/manifest.json @@ -5,7 +5,7 @@ "name": "Bandcamp", "description": "Stream music from Bandcamp's catalog.", "codeowners": ["@ALERTua", "@teancom"], - "requirements": ["bandcamp-async-api==0.1.1"], + "requirements": ["bandcamp-async-api==0.2.1"], "documentation": "https://music-assistant.io/music-providers/bandcamp/", "multi_instance": true } diff --git a/requirements_all.txt b/requirements_all.txt index 2122c875aa..7af7c911b2 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -24,7 +24,7 @@ audible==0.10.0 auntie-sounds==1.1.8 av==16.1.0 awesomeversion>=24.6.0 -bandcamp-async-api==0.1.1 +bandcamp-async-api==0.2.1 beat-this==1.1.0 bidict==0.23.1 certifi==2025.11.12 diff --git a/tests/providers/bandcamp/test_converters.py b/tests/providers/bandcamp/test_converters.py index 88551f3312..6c9211a628 100644 --- a/tests/providers/bandcamp/test_converters.py +++ b/tests/providers/bandcamp/test_converters.py @@ -51,6 +51,50 @@ def test_album_from_search(converters: BandcampConverters) -> None: assert result.item_id == "123-456" assert result.name == "Test Album" assert result.provider == "bandcamp_test" + # Without disambiguation context, the converter conservatively + # synthesizes a `{band_id}:{slug}` artist ID — the provider's search + # method overrides this with a real ID when a `b` result confirms + # the page owner's name matches. + artist = next(iter(result.artists)) + assert artist.item_id == "123:test-artist" + + +def test_album_from_search_uses_provided_artist_item_id( + converters: BandcampConverters, +) -> None: + """When the provider resolves the artist ID, the converter must honor it.""" + search_result = Mock() + search_result.artist_id = 123 + search_result.id = 456 + search_result.name = "Test Album" + search_result.artist_name = "Test Artist" + search_result.image_url = None + search_result.url = "https://test.bandcamp.com/album/test-album" + search_result.artist_url = "https://test.bandcamp.com" + + result = converters.album_from_search(search_result, artist_item_id="123") + + artist = next(iter(result.artists)) + assert artist.item_id == "123" + + +def test_track_from_search_uses_provided_artist_item_id( + converters: BandcampConverters, +) -> None: + """Same dedup path for track results.""" + search_result = Mock() + search_result.artist_id = 441379041 + search_result.album_id = 1938115920 + search_result.id = 2114682405 + search_result.name = "Haunted" + search_result.artist_name = "Mortaja" + search_result.album_name = "Combined Minds" + search_result.url = "https://audiophob.bandcamp.com/track/haunted" + + result = converters.track_from_search(search_result, artist_item_id="441379041:mortaja") + + artist = next(iter(result.artists)) + assert artist.item_id == "441379041:mortaja" def test_artist_from_search(converters: BandcampConverters) -> None: @@ -98,6 +142,43 @@ def test_track_from_api(converters: BandcampConverters) -> None: assert result.item_id == "123-456-789" assert result.name == "Test Track" assert result.provider == "bandcamp_test" + # Without tralbum_artist there's no separate performer credit, so the + # artist link is the plain band ID and the display name is the band. + artist = next(iter(result.artists)) + assert artist.item_id == "123" + assert artist.name == "Test Artist" + + +def test_track_from_api_label_release_uses_synthetic_artist_id( + converters: BandcampConverters, +) -> None: + """tralbum_artist != band's name → synthetic artist ID + performer display.""" + mock_artist = Mock() + mock_artist.id = 441379041 + mock_artist.name = "audiophob" # page owner — the label + mock_artist.url = "https://audiophob.bandcamp.com" + + mock_track = Mock() + mock_track.id = 2114682405 + mock_track.title = "Haunted" + mock_track.artist = mock_artist + mock_track.url = "https://audiophob.bandcamp.com/track/haunted" + mock_track.duration = 344 + mock_track.lyrics = None + mock_track.track_number = 4 + mock_track.streaming_url = {"mp3-128": "https://example.com/track.mp3"} + + result = converters.track_from_api( + track=mock_track, + album_id=1938115920, + album_name="Combined Minds", + album_image_url="https://f4.bcbits.com/img/a2825942492_16.jpg", + tralbum_artist="Mortaja", + ) + + artist = next(iter(result.artists)) + assert artist.item_id == "441379041:mortaja" + assert artist.name == "Mortaja" def test_artist_from_api(converters: BandcampConverters) -> None: @@ -118,8 +199,7 @@ def test_artist_from_api(converters: BandcampConverters) -> None: def test_album_from_api(converters: BandcampConverters) -> None: - """Test converting API Album to MA Album.""" - # Create mock API models + """Album by the band itself: real artist ID, no synthetic credit.""" mock_artist = Mock() mock_artist.id = 123 mock_artist.name = "Test Artist" @@ -133,12 +213,46 @@ def test_album_from_api(converters: BandcampConverters) -> None: mock_album.art_url = "https://f4.bcbits.com/img/a1234567890_16.jpg" mock_album.release_date = 1609459200 mock_album.about = "Test album description" + mock_album.tralbum_artist = None # no separate performer credit result = converters.album_from_api(mock_album) assert result.item_id == "123-456" assert result.name == "Test Album" assert result.provider == "bandcamp_test" + artist = next(iter(result.artists)) + assert artist.item_id == "123" + assert artist.name == "Test Artist" + + +def test_album_from_api_label_release_uses_synthetic_artist_id( + converters: BandcampConverters, +) -> None: + """Label release: page owner != performer → synthetic ``{band_id}:{slug}``. + + `artist.name` is the page owner (the label); `tralbum_artist` is the + performer credit. The displayed artist on the album is the performer. + """ + mock_artist = Mock() + mock_artist.id = 441379041 + mock_artist.name = "audiophob" # page owner — the label + mock_artist.url = "https://audiophob.bandcamp.com" + + mock_album = Mock() + mock_album.id = 1938115920 + mock_album.title = "Combined Minds" + mock_album.artist = mock_artist + mock_album.url = "https://audiophob.bandcamp.com/album/combined-minds" + mock_album.art_url = "https://f4.bcbits.com/img/a2825942492_16.jpg" + mock_album.release_date = 1539907200 + mock_album.about = "" + mock_album.tralbum_artist = "Mortaja" # the performer + + result = converters.album_from_api(mock_album) + + artist = next(iter(result.artists)) + assert artist.item_id == "441379041:mortaja" + assert artist.name == "Mortaja" def test_track_from_api_without_album_info(converters: BandcampConverters) -> None: @@ -331,7 +445,10 @@ def test_album_from_discography_item(converters: BandcampConverters) -> None: artists = list(result.artists) assert len(artists) == 1 assert artists[0].name == "Amanda Palmer & Friends" - assert artists[0].item_id == "3463798201" + # The collaboration credit differs from Amanda Palmer's own band + # name — surface it as a synthetic performer scoped to her band so + # users can navigate to *just* the collab releases. + assert artists[0].item_id == "3463798201:amanda-palmer-friends" assert result.metadata.images assert any("a3547137148_0.jpg" in img.path for img in result.metadata.images) @@ -383,3 +500,46 @@ def test_album_from_discography_item_artist_name_fallback( result = converters.album_from_discography_item(item) artists = list(result.artists) assert artists[0].name == "Fallback Artist" + # Performer == band → real artist ID, not synthetic. + assert artists[0].item_id == "200" + + +def test_album_from_discography_item_label_release_uses_synthetic_artist( + converters: BandcampConverters, +) -> None: + """Discography of a label: per-item ``artist_name`` differs from ``band_name``. + + The artist link should point at a synthetic performer scoped to the + label's band_id. + """ + item: DiscographyItem = { + "item_id": 1938115920, + "item_type": "album", + "artist_name": "Mortaja", + "band_name": "audiophob", + "title": "Combined Minds", + "band_id": 441379041, + "art_id": 2825942492, + "release_date": "18 Oct 2018 00:00:00 GMT", + } + result = converters.album_from_discography_item(item) + artists = list(result.artists) + assert artists[0].name == "Mortaja" + assert artists[0].item_id == "441379041:mortaja" + + +def test_synthetic_artist_basics(converters: BandcampConverters) -> None: + """The synthetic_artist factory builds an Artist scoped to (band, performer).""" + artist = converters.synthetic_artist( + band_id=441379041, + performer_name="Mortaja", + url="https://audiophob.bandcamp.com", + image_url="https://f4.bcbits.com/img/a2825942492_16.jpg", + ) + assert artist.item_id == "441379041:mortaja" + assert artist.name == "Mortaja" + assert artist.provider == "bandcamp_test" + # The provider mapping pins the same composite ID, so MA's later + # `get_artist(item_id)` call hits our synthetic resolution path. + mapping = next(iter(artist.provider_mappings)) + assert mapping.item_id == "441379041:mortaja" diff --git a/tests/providers/bandcamp/test_ids.py b/tests/providers/bandcamp/test_ids.py new file mode 100644 index 0000000000..179c6d7b35 --- /dev/null +++ b/tests/providers/bandcamp/test_ids.py @@ -0,0 +1,98 @@ +"""Tests for the Bandcamp composite-artist-ID utilities.""" + +from __future__ import annotations + +import pytest + +from music_assistant.providers.bandcamp._ids import ( + make_artist_id, + parse_artist_id, + slugify_performer, +) + + +class TestSlugifyPerformer: + """Slug generation rules for performer names.""" + + def test_lowercases_alphanumerics(self) -> None: + """Bare alphabetic names round-trip lowercased with no separators.""" + assert slugify_performer("Mortaja") == "mortaja" + + def test_collapses_spaces_to_single_hyphen(self) -> None: + """Spaces between words collapse to one hyphen each.""" + assert slugify_performer("The Green Arrows") == "the-green-arrows" + + def test_collapses_repeated_punctuation(self) -> None: + """Runs of any non-alnum chars collapse into a single separator.""" + assert slugify_performer("Apollo Brown & OC") == "apollo-brown-oc" + + def test_strips_leading_and_trailing_separators(self) -> None: + """Leading/trailing separators are removed.""" + assert slugify_performer(" --foo-- ") == "foo" + + def test_unicode_letters_are_dropped(self) -> None: + """Non-ASCII letters are intentionally dropped (lossy slug).""" + # Slug is intentionally lossy: non-ASCII letters do not round-trip. + # This is acceptable because slugs are scoped per band_id, so the + # only failure mode is rare same-band performer-name collisions. + assert slugify_performer("Adiós Mundo Cruel") == "adi-s-mundo-cruel" + + def test_empty_input_returns_empty(self) -> None: + """Empty input yields empty output, not a crash.""" + assert slugify_performer("") == "" + + def test_only_punctuation_returns_empty(self) -> None: + """Strings with no slug-eligible characters yield empty output.""" + assert slugify_performer("///") == "" + + +class TestMakeArtistId: + """Composite ID construction from band_id + performer.""" + + def test_no_performer_returns_plain_band_id(self) -> None: + """Omitted performer means "the band itself" → plain numeric ID.""" + assert make_artist_id(123) == "123" + + def test_none_performer_returns_plain_band_id(self) -> None: + """Explicit None performer is treated the same as omitted.""" + assert make_artist_id(123, None) == "123" + + def test_empty_performer_returns_plain_band_id(self) -> None: + """Empty string performer is treated the same as omitted.""" + assert make_artist_id(123, "") == "123" + + def test_with_performer_synthesizes(self) -> None: + """Provided performer name produces a synthetic `{band_id}:{slug}` form.""" + assert make_artist_id(441379041, "Mortaja") == "441379041:mortaja" + + def test_string_band_id_is_accepted(self) -> None: + """band_id may be passed as a string for caller convenience.""" + assert make_artist_id("441379041", "Mortaja") == "441379041:mortaja" + + def test_performer_with_only_punctuation_falls_back_to_plain(self) -> None: + """A performer whose slug is empty degrades to the plain band ID.""" + # If the slug ends up empty we cannot build a useful synthetic + # ID, so we degrade to the band's plain ID rather than emit a + # malformed `123:` value. + assert make_artist_id(123, "///") == "123" + + +class TestParseArtistId: + """Reverse of make_artist_id.""" + + def test_plain_band_id(self) -> None: + """A plain numeric ID parses with no performer slug.""" + assert parse_artist_id("123") == (123, None) + + def test_synthetic_id(self) -> None: + """A synthetic ID splits into (band_id, slug).""" + assert parse_artist_id("441379041:mortaja") == (441379041, "mortaja") + + def test_synthetic_with_hyphenated_slug(self) -> None: + """Hyphens inside the slug are preserved (not a separator with band_id).""" + assert parse_artist_id("123:the-green-arrows") == (123, "the-green-arrows") + + def test_non_numeric_band_id_raises(self) -> None: + """Non-numeric band_id portions surface as ValueError.""" + with pytest.raises(ValueError): # noqa: PT011 + parse_artist_id("abc:slug") diff --git a/tests/providers/bandcamp/test_init.py b/tests/providers/bandcamp/test_init.py index a8befbd5c0..036bbb2b73 100644 --- a/tests/providers/bandcamp/test_init.py +++ b/tests/providers/bandcamp/test_init.py @@ -12,7 +12,9 @@ @pytest.fixture -async def bandcamp_provider(mass: MusicAssistant) -> AsyncGenerator[ProviderConfig, None]: +async def bandcamp_provider( # noqa: PLR0915 + mass: MusicAssistant, +) -> AsyncGenerator[ProviderConfig, None]: """Configure a Bandcamp test fixture, and add a provider to mass that uses it.""" # Mock the BandcampAPIClient to avoid real API calls with mock.patch("music_assistant.providers.bandcamp.BandcampAPIClient") as mock_client_class: @@ -55,6 +57,10 @@ async def bandcamp_provider(mass: MusicAssistant) -> AsyncGenerator[ProviderConf mock_album.art_url = "https://f4.bcbits.com/img/a1234567890_16.jpg" mock_album.release_date = 1609459200 mock_album.about = "Test album description" + # Concrete performer credit string — the converter feeds it to + # slugify_performer which only accepts strings. None means + # "no separate performer; the album is by the band itself". + mock_album.tralbum_artist = None mock_track = mock.AsyncMock() mock_track.id = 789 @@ -65,6 +71,7 @@ async def bandcamp_provider(mass: MusicAssistant) -> AsyncGenerator[ProviderConf mock_track.streaming_url = {"mp3-320": "https://example.com/track.mp3"} mock_track.track_number = 1 mock_track.lyrics = "Test lyrics" + mock_track.tralbum_artist = None # Configure the streaming_url to behave like a dictionary mock_track.configure_mock(streaming_url={"mp3-320": "https://example.com/track.mp3"}) diff --git a/tests/providers/bandcamp/test_provider.py b/tests/providers/bandcamp/test_provider.py index 2d9d62c9bf..3e0bd22505 100644 --- a/tests/providers/bandcamp/test_provider.py +++ b/tests/providers/bandcamp/test_provider.py @@ -173,12 +173,61 @@ async def test_is_streaming_provider(provider: BandcampProvider) -> None: assert provider.is_streaming_provider is True +def _search_track_mock( + *, + artist_id: int = 123, + artist_name: str = "Test Artist", + album_id: int | None = 456, + track_id: int = 789, +) -> Mock: + """Construct a SearchResultTrack mock with concrete attribute values.""" + item = Mock(spec=SearchResultTrack) + item.id = track_id + item.name = "Track" + item.artist_id = artist_id + item.artist_name = artist_name + item.album_id = album_id + item.album_name = "Album" + item.url = "" + item.image_url = None + return item + + +def _search_album_mock( + *, + artist_id: int = 123, + artist_name: str = "Test Artist", + album_id: int = 456, +) -> Mock: + """Construct a SearchResultAlbum mock with concrete attribute values.""" + item = Mock(spec=SearchResultAlbum) + item.id = album_id + item.name = "Album" + item.artist_id = artist_id + item.artist_name = artist_name + item.artist_url = "" + item.url = "" + item.image_url = None + return item + + +def _search_artist_mock(*, artist_id: int = 123, name: str = "Test Artist") -> Mock: + """Construct a SearchResultArtist mock with concrete attribute values.""" + item = Mock(spec=SearchResultArtist) + item.id = artist_id + item.name = name + item.url = "" + item.image_url = None + item.tags = None + return item + + async def test_search_with_identity(provider: BandcampProvider) -> None: """Test search functionality with identity token.""" mock_search_results = [ - Mock(spec=SearchResultTrack), - Mock(spec=SearchResultAlbum), - Mock(spec=SearchResultArtist), + _search_track_mock(), + _search_album_mock(), + _search_artist_mock(), ] with ( @@ -203,9 +252,65 @@ async def test_search_with_identity(provider: BandcampProvider) -> None: mock_artist_converter.assert_called_once() assert len(results.tracks) == 1 assert len(results.albums) == 1 + # The album/track results match the artist `b` result so no + # synthetic artists are emitted; the count stays at 1. assert len(results.artists) == 1 +async def test_search_synthesizes_artists_for_label_releases( + provider: BandcampProvider, +) -> None: + """Surface a synthetic artist when an album's performer != the page owner. + + A label-released album whose credit doesn't appear as a `b` result + becomes its own artist entry under MediaType.ARTIST search. + """ + label_id = 441379041 + # `b` result for the label, plus two `a` results — one by the label + # itself and one by Mortaja (a performer with no band page). + label_band = _search_artist_mock(artist_id=label_id, name="audiophob") + label_album = _search_album_mock( + artist_id=label_id, artist_name="audiophob", album_id=1198969224 + ) + mortaja_album = _search_album_mock( + artist_id=label_id, artist_name="Mortaja", album_id=1938115920 + ) + + with patch.object(provider._client, "search", new_callable=AsyncMock) as mock_search: + mock_search.return_value = [label_band, label_album, mortaja_album] + + results = await provider.search("Mortaja", [MediaType.ALBUM, MediaType.ARTIST], limit=10) + + artist_ids = {a.item_id for a in results.artists} + # Real label artist + synthetic Mortaja-on-audiophob. + assert artist_ids == {str(label_id), f"{label_id}:mortaja"} + + # The album by the label itself should link to the real ID; + # the Mortaja album should link to the synthetic. + album_artist_ids = {next(iter(cast("Album", a).artists)).item_id for a in results.albums} + assert album_artist_ids == {str(label_id), f"{label_id}:mortaja"} + + +async def test_search_does_not_duplicate_existing_artists( + provider: BandcampProvider, +) -> None: + """Don't duplicate an artist as a synthetic when a real `b` result matches. + + When an album's performer slug equals the band's own slug, the artist + link reuses the real `{band_id}` and no synthetic entry is emitted. + """ + band_id = 3658985110 + real = _search_artist_mock(artist_id=band_id, name="Apollo Brown") + own_album = _search_album_mock(artist_id=band_id, artist_name="Apollo Brown", album_id=1) + + with patch.object(provider._client, "search", new_callable=AsyncMock) as mock_search: + mock_search.return_value = [real, own_album] + + results = await provider.search("Apollo Brown", [MediaType.ARTIST], limit=10) + + assert [a.item_id for a in results.artists] == [str(band_id)] + + async def test_search_without_identity(provider: BandcampProvider) -> None: """Test search returns empty results without identity token.""" provider._client.identity = None @@ -239,11 +344,204 @@ async def test_get_artist_success(provider: BandcampProvider) -> None: result = await provider.get_artist("123") - mock_get_artist.assert_called_once_with("123") + # The composite-ID parser converts the string to an int before + # forwarding to the underlying client. + mock_get_artist.assert_called_once_with(123) mock_converter.assert_called_once_with(mock_artist) assert result is not None +async def test_get_artist_synthetic_builds_from_discography( + provider: BandcampProvider, +) -> None: + """Genuine label-style synthetic resolves via the band's discography. + + The slug names a per-page performer distinct from the band's own + name, so the band-name short-circuit doesn't apply and we look up + the credit in the discography. + """ + label_id = 441379041 + label_artist = Mock(id=label_id, url="https://audiophob.bandcamp.com") + label_artist.name = "audiophob" # the band's own (page-owner) name + + discography = [ + { + "item_type": "album", + "band_id": label_id, + "item_id": 1938115920, + "title": "Combined Minds", + "artist_name": "Mortaja", + "band_name": "audiophob", + "art_id": 2825942492, + "release_date": "18 Oct 2018 00:00:00 GMT", + }, + ] + + with ( + patch.object( + provider._client, "get_artist_discography", new_callable=AsyncMock + ) as mock_disco, + patch.object(provider._client, "get_artist", new_callable=AsyncMock) as mock_get_artist, + ): + mock_get_artist.return_value = label_artist + mock_disco.return_value = discography + + result = await provider.get_artist(f"{label_id}:mortaja") + + assert result.item_id == f"{label_id}:mortaja" + assert result.name == "Mortaja" + # Per-page performers don't have their own Bandcamp page, so the + # synthetic carries the hosting band's URL (audiophob), matching + # what the search-emission path passes through. + assert result.uri == "https://audiophob.bandcamp.com" + # Both API calls happen: band-name lookup didn't match the slug, + # so we proceeded to look in the discography. + mock_get_artist.assert_called_once_with(label_id) + mock_disco.assert_called_once_with(label_id) + + +async def test_get_artist_synthetic_band_own_slug_collapses_to_real( + provider: BandcampProvider, +) -> None: + """Synthetic slug equal to the band's own slug resolves to the real band. + + This is the drift-collapse path. Bandcamp's autocomplete sometimes + returns ``a``/``t`` rows for a band-by-itself album without the + matching ``b`` row in the same response; the search-time path mints + a synthetic ``{band_id}:slug-of-band-own-name`` because it can't + disambiguate without the ``b`` row. When the user navigates to that + synthetic, we collapse it back to the real band before MA can + persist a duplicate library entry. + + Critically, the discography has matching items (band-by-itself + entries with ``artist_name=null`` fall through to + ``band_name='Apollo Brown'`` whose slug matches), but we must NOT + build a shadow synthetic from them. + """ + band_id = 3658985110 + api_artist = Mock(id=band_id, url="https://apollobrown360.bandcamp.com") + api_artist.name = "Apollo Brown" + + with ( + patch.object( + provider._client, "get_artist_discography", new_callable=AsyncMock + ) as mock_disco, + patch.object(provider._client, "get_artist", new_callable=AsyncMock) as mock_get_artist, + ): + mock_get_artist.return_value = api_artist + # Discography includes items that WOULD match the slug if we + # consulted it (artist_name=null → falls through to band_name + # which equals "Apollo Brown" → slug "apollo-brown"). The fix + # depends on never reaching this lookup once the band-name + # short-circuit fires. + mock_disco.return_value = [ + { + "item_type": "album", + "band_id": band_id, + "item_id": 999, + "title": "Skilled Trade", + "artist_name": None, + "band_name": "Apollo Brown", + }, + ] + + result = await provider.get_artist(f"{band_id}:apollo-brown") + + assert result.item_id == str(band_id) + assert result.name == "Apollo Brown" + mock_get_artist.assert_called_once_with(band_id) + # Short-circuit: no discography fetch when the band's own name + # already matches the synthetic slug. + mock_disco.assert_not_called() + + +async def test_get_synthetic_artist_rate_limit_at_band_lookup( + provider: BandcampProvider, +) -> None: + """Rate-limit on the initial band lookup converts to ResourceTemporarilyUnavailable. + + Tests ``_get_synthetic_artist`` directly to bypass the public method's + ``@throttle_with_retries`` decorator. The decorator's job is to retry + on this exception; our job is to make sure we *raise* it with the + backoff hint preserved so the decorator (and any other caller) can + use it. + """ + band_id = 441379041 + with patch.object(provider._client, "get_artist", new_callable=AsyncMock) as mock_get_artist: + mock_get_artist.side_effect = BandcampRateLimitError("Rate limited", retry_after=42) + with pytest.raises(ResourceTemporarilyUnavailable) as exc_info: + await provider._get_synthetic_artist(f"{band_id}:mortaja", band_id, "mortaja") + assert exc_info.value.backoff_time == 42 + + +async def test_get_synthetic_artist_rate_limit_at_discography( + provider: BandcampProvider, +) -> None: + """Rate-limit on the discography fetch surfaces with backoff hint. + + Regression guard: an earlier version of this method caught + ``BandcampAPIError`` (the parent of ``BandcampRateLimitError``) in a + single ``except`` clause for the secondary lookup, swallowing the + backoff hint. Both API call sites must now handle rate-limits + explicitly. + """ + band_id = 441379041 + label_artist = Mock(id=band_id) + label_artist.name = "audiophob" + + with ( + patch.object(provider._client, "get_artist", new_callable=AsyncMock) as mock_get_artist, + patch.object( + provider._client, "get_artist_discography", new_callable=AsyncMock + ) as mock_disco, + ): + mock_get_artist.return_value = label_artist + mock_disco.side_effect = BandcampRateLimitError("Rate limited", retry_after=99) + with pytest.raises(ResourceTemporarilyUnavailable) as exc_info: + await provider._get_synthetic_artist(f"{band_id}:mortaja", band_id, "mortaja") + assert exc_info.value.backoff_time == 99 + + +async def test_get_artist_synthetic_no_matching_performer_raises( + provider: BandcampProvider, +) -> None: + """Unresolvable synthetic IDs raise MediaNotFoundError. + + A synthetic slug that matches neither the band's own name nor any + per-item performer credit in the discography is genuinely unknown — + we surface MediaNotFoundError rather than fabricate a phantom. + """ + band_id = 441379041 + label_artist = Mock(id=band_id) + label_artist.name = "audiophob" + + with ( + patch.object(provider._client, "get_artist", new_callable=AsyncMock) as mock_get_artist, + patch.object( + provider._client, "get_artist_discography", new_callable=AsyncMock + ) as mock_disco, + ): + mock_get_artist.return_value = label_artist + mock_disco.return_value = [ + { + "item_type": "album", + "band_id": band_id, + "item_id": 1, + "title": "Some Other Album", + "artist_name": "Some Other Artist", + "band_name": "audiophob", + }, + ] + with pytest.raises(MediaNotFoundError): + await provider.get_artist(f"{band_id}:nonexistent-performer") + + +async def test_get_artist_malformed_id_raises(provider: BandcampProvider) -> None: + """Non-numeric band_id portions surface as InvalidDataError.""" + with pytest.raises(InvalidDataError, match=r"Malformed Bandcamp artist ID"): + await provider.get_artist("not-a-number") + + async def test_get_artist_not_found(provider: BandcampProvider) -> None: """Test artist retrieval when not found.""" with ( @@ -301,6 +599,10 @@ async def test_get_track_standalone(provider: BandcampProvider) -> None: mock_api_track = Mock() mock_api_track.album = mock_album_obj + # Standalone single tracks carry their own performer credit; the + # provider forwards it to the converter so synthetic IDs are emitted + # for label-released singles. + mock_api_track.tralbum_artist = "Test Artist" with ( patch.object(provider._client, "get_track", new_callable=AsyncMock) as mock_get_track, @@ -317,6 +619,7 @@ async def test_get_track_standalone(provider: BandcampProvider) -> None: album_id=456, album_name="Standalone Album", album_image_url="http://example.com/art.jpg", + tralbum_artist="Test Artist", ) assert result is not None @@ -325,6 +628,7 @@ async def test_get_track_standalone_no_album(provider: BandcampProvider) -> None """Test get_track for a standalone track where api_track.album is None.""" mock_api_track = Mock() mock_api_track.album = None + mock_api_track.tralbum_artist = None with ( patch.object(provider._client, "get_track", new_callable=AsyncMock) as mock_get_track, @@ -341,6 +645,7 @@ async def test_get_track_standalone_no_album(provider: BandcampProvider) -> None album_id=None, album_name="", album_image_url="", + tralbum_artist=None, ) assert result is not None @@ -399,7 +704,7 @@ async def test_get_artist_albums_success(provider: BandcampProvider) -> None: result = await provider.get_artist_albums("123") - mock_get_discography.assert_called_once_with("123") + mock_get_discography.assert_called_once_with(123) assert len(result) == 1 assert result[0].item_id == "123-456" assert result[0].name == "Test Album" @@ -432,9 +737,62 @@ async def test_get_artist_albums_label_uses_band_id(provider: BandcampProvider) assert len(result) == 1 assert result[0].item_id == "9999-100" # band_id, not label ID artists = list(result[0].artists) + # artist_name == band_name → real artist ID for the album's own band. assert artists[0].item_id == "9999" +async def test_get_artist_albums_synthetic_id_filters_discography( + provider: BandcampProvider, +) -> None: + """A synthetic artist ID should return only the matching performer's albums.""" + label_id = 441379041 + mock_discography = [ + { + "item_type": "album", + "band_id": label_id, + "item_id": 1938115920, + "title": "Combined Minds", + "artist_name": "Mortaja", + "band_name": "audiophob", + "art_id": 2825942492, + "release_date": "18 Oct 2018 00:00:00 GMT", + }, + { + "item_type": "album", + "band_id": label_id, + "item_id": 4042974093, + "title": "Basalt", + "artist_name": "Spherical Disrupted", + "band_name": "audiophob", + "art_id": 1234, + "release_date": "01 Jan 2024 00:00:00 GMT", + }, + { + "item_type": "album", + "band_id": label_id, + "item_id": 1185688687, + "title": "Bone Chamber", + "artist_name": "Mortaja", + "band_name": "audiophob", + "art_id": 5678, + "release_date": "01 Jan 2017 00:00:00 GMT", + }, + ] + + with patch.object( + provider._client, "get_artist_discography", new_callable=AsyncMock + ) as mock_get_discography: + mock_get_discography.return_value = mock_discography + + result = await provider.get_artist_albums(f"{label_id}:mortaja") + + # Only the two Mortaja albums; the Spherical Disrupted entry is filtered out. + assert {album.name for album in result} == {"Combined Minds", "Bone Chamber"} + for album in result: + artist = next(iter(album.artists)) + assert artist.item_id == f"{label_id}:mortaja" + + async def test_get_stream_details_success(provider: BandcampProvider) -> None: """Test stream details fetches fresh URL and audio format from API.""" mock_api_track = Mock() From 62484d2eb260d5b567913fb0d94866a248b354d7 Mon Sep 17 00:00:00 2001 From: David Bishop Date: Fri, 1 May 2026 20:50:18 -0700 Subject: [PATCH 2/3] Bandcamp: Resolve label-released performers to their real band pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on the synthetic `{band_id}:{slug}` artist ID for albums released on a label's page by a different performer. When the performer has their own Bandcamp band page (Apollo Brown, Mortaja, …), a secondary autocomplete lookup redirects to the real `{band_id}` so direct-search and search-by-album-name surface the same artist. When no own band page exists, resolution falls back to the synthetic ID. Wired into search, get_album, get_track, get_album_tracks, and the artist discography listing. Lookups for a batch resolve in parallel and are cached per performer slug. Refs music-assistant/support#5389. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../providers/bandcamp/__init__.py | 302 ++++++++++++--- .../providers/bandcamp/converters.py | 27 +- tests/providers/bandcamp/test_provider.py | 362 +++++++++++++++++- 3 files changed, 621 insertions(+), 70 deletions(-) diff --git a/music_assistant/providers/bandcamp/__init__.py b/music_assistant/providers/bandcamp/__init__.py index f11fefe4e6..f04adc46d3 100644 --- a/music_assistant/providers/bandcamp/__init__.py +++ b/music_assistant/providers/bandcamp/__init__.py @@ -13,6 +13,7 @@ BandcampRateLimitError, SearchResultAlbum, SearchResultArtist, + SearchResultItem, SearchResultTrack, ) from bandcamp_async_api.models import ( @@ -37,6 +38,7 @@ LoginFailed, MediaNotFoundError, ResourceTemporarilyUnavailable, + RetriesExhausted, ) from music_assistant_models.media_items import ( Album, @@ -216,22 +218,27 @@ async def search( bands_by_id: dict[int, SearchResultArtist] = { item.id: item for item in capped if isinstance(item, SearchResultArtist) } + artist_id_by_item: dict[int, str] = await self._resolve_search_artist_ids( + capped, bands_by_id + ) artist_ids_seen: set[str] = set() synthetic_artists: list[Artist] = [] for item in capped: try: if isinstance(item, SearchResultTrack) and MediaType.TRACK in media_types: - artist_item_id = self._resolve_search_artist_id(item, bands_by_id) results.tracks = [ *results.tracks, - self._converters.track_from_search(item, artist_item_id=artist_item_id), + self._converters.track_from_search( + item, artist_item_id=artist_id_by_item[id(item)] + ), ] elif isinstance(item, SearchResultAlbum) and MediaType.ALBUM in media_types: - artist_item_id = self._resolve_search_artist_id(item, bands_by_id) results.albums = [ *results.albums, - self._converters.album_from_search(item, artist_item_id=artist_item_id), + self._converters.album_from_search( + item, artist_item_id=artist_id_by_item[id(item)] + ), ] elif isinstance(item, SearchResultArtist) and MediaType.ARTIST in media_types: artist_ids_seen.add(str(item.id)) @@ -246,47 +253,149 @@ async def search( continue if not item.artist_name: continue - artist_item_id = self._resolve_search_artist_id(item, bands_by_id) + artist_item_id = artist_id_by_item[id(item)] if artist_item_id in artist_ids_seen: continue artist_ids_seen.add(artist_item_id) - # Plain (non-synthetic) IDs that weren't already surfaced - # mean a `b` result for this band wasn't in the capped - # window. Skip — the user is searching by performer name, - # so synthetics are what they'd expect to see; a band that - # didn't make the cap shouldn't be reintroduced here. - if ":" not in artist_item_id: - continue - synthetic_artists.append( - self._converters.synthetic_artist( - band_id=item.artist_id, - performer_name=item.artist_name, - url=item.artist_url or None, - image_url=item.image_url, + if ":" in artist_item_id: + synthetic_artists.append( + self._converters.synthetic_artist( + band_id=item.artist_id, + performer_name=item.artist_name, + url=item.artist_url or None, + image_url=item.image_url, + ) ) - ) + continue + if int(artist_item_id) == item.artist_id: + # Same band as the row's claimed page — its `b` row just + # didn't make the cap; re-introducing it here would surface + # a band the user wasn't searching for. + continue + with suppress(MediaNotFoundError, ResourceTemporarilyUnavailable, RetriesExhausted): + results.artists = [*results.artists, await self.get_artist(artist_item_id)] if synthetic_artists: results.artists = [*results.artists, *synthetic_artists] return results - def _resolve_search_artist_id( + async def _resolve_search_artist_ids( self, - item: SearchResultAlbum | SearchResultTrack, + capped: Sequence[SearchResultItem], bands_by_id: dict[int, SearchResultArtist], - ) -> str: - """Decide whether an album/track's artist link is real or synthetic. + ) -> dict[int, str]: + """Resolve artist item_ids for every album/track row in a search batch. + + Keyed by ``id(row)`` so the materialization loop is a sync lookup. + """ + rows: list[SearchResultAlbum | SearchResultTrack] = [ + row for row in capped if isinstance(row, (SearchResultAlbum, SearchResultTrack)) + ] + slug_to_name: dict[str, str] = {} + for row in rows: + performer = row.artist_name or "" + band = bands_by_id.get(row.artist_id) + if band and slugify_performer(band.name) == slugify_performer(performer): + continue + slug = slugify_performer(performer) + if slug: + slug_to_name.setdefault(slug, performer) + + slug_to_real_id = await self._lookup_performer_band_ids_parallel(slug_to_name) + + resolved: dict[int, str] = {} + for row in rows: + performer = row.artist_name or "" + band = bands_by_id.get(row.artist_id) + if band and slugify_performer(band.name) == slugify_performer(performer): + resolved[id(row)] = str(row.artist_id) + continue + real_id = slug_to_real_id.get(slugify_performer(performer)) + if real_id is not None: + resolved[id(row)] = str(real_id) + continue + resolved[id(row)] = make_artist_id(row.artist_id, performer) + return resolved + + async def _lookup_performer_band_ids_parallel( + self, names_by_slug: dict[str, str] + ) -> dict[str, int | None]: + """Look up performer→band_id mappings concurrently, slug → band_id|None.""" + if not names_by_slug: + return {} + slugs = list(names_by_slug) + raw_results = await asyncio.gather( + *(self._lookup_performer_band_id(names_by_slug[slug]) for slug in slugs), + return_exceptions=True, + ) + out: dict[str, int | None] = {} + for slug, result in zip(slugs, raw_results, strict=True): + if isinstance(result, BaseException): + self.logger.warning( + "performer band lookup failed for %r: %s", + names_by_slug[slug], + result, + ) + out[slug] = None + else: + out[slug] = result + return out - Compares the per-result performer name (autocomplete's ``band_name``, - exposed as ``item.artist_name``) against any matching ``b`` result - in the same search. Same slug → real ``{band_id}``; different → - synthetic ``{band_id}:{slug}``. + async def _lookup_performer_band_id(self, performer_name: str) -> int | None: + """Find the band_id for a performer who has their own Bandcamp page. + + Negative results are cached as integer ``0`` because the cache layer + treats ``None`` as a miss. """ - band = bands_by_id.get(item.artist_id) - if band and slugify_performer(band.name) == slugify_performer(item.artist_name or ""): - return str(item.artist_id) - return make_artist_id(item.artist_id, item.artist_name) + target_slug = slugify_performer(performer_name) + if not target_slug: + return None + cache_key = f"performer_band_id.{target_slug}" + cached = await self.mass.cache.get(cache_key, provider=self.instance_id) + if cached is not None: + try: + cached_int = int(cached) + except (ValueError, TypeError): + self.logger.warning( + "Discarding corrupt performer_band_id cache for %r: %r", + target_slug, + cached, + ) + else: + return cached_int or None + band_id = await self._fetch_performer_band_id(performer_name, target_slug) + await self.mass.cache.set( + cache_key, + band_id or 0, + expiration=CACHE_METADATA, + provider=self.instance_id, + ) + return band_id + + @throttle_with_retries + async def _fetch_performer_band_id(self, performer_name: str, target_slug: str) -> int | None: + """Autocomplete-search for ``performer_name``, return the first matching band_id. + + ``is_label`` results are skipped so a same-named label doesn't + masquerade as the performer's band page. + """ + try: + results = await self._client.search(performer_name) + except BandcampRateLimitError as error: + raise ResourceTemporarilyUnavailable( + "Bandcamp rate limit reached", backoff_time=error.retry_after + ) from error + except BandcampAPIError: + return None + for item in results: + if ( + isinstance(item, SearchResultArtist) + and not item.is_label + and slugify_performer(item.name) == target_slug + ): + return int(item.id) + return None @throttle_with_retries async def _fetch_collection_page( @@ -495,12 +604,7 @@ async def _get_synthetic_artist( except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get artist {prov_artist_id}") from error - matching = [ - item - for item in api_discography - if slugify_performer(str(item.get("artist_name") or item.get("band_name") or "")) - == performer_slug - ] + matching = self._filter_discography_by_performer(api_discography, performer_slug) if not matching: raise MediaNotFoundError(f"Artist {prov_artist_id} not found on Bandcamp") @@ -527,10 +631,38 @@ async def _fetch_discography(self, band_id: int) -> list[dict[str, Any]]: (``"{band_id}:{slug}"``) lookups both go through this so the underlying ``mobile/24/band_details`` call hits once per band per cache window, not once per ``prov_artist_id``. + + Return type is ``list[dict[str, Any]]`` rather than + ``list[DiscographyItem]`` because the cache controller falls back to + ``isinstance(value, value_type)`` on deserialization, which TypedDict + does not support. Callers cast at the converter boundary. """ result: list[dict[str, Any]] = await self._client.get_artist_discography(band_id) return result + @staticmethod + def _filter_discography_by_performer( + items: list[dict[str, Any]], performer_slug: str + ) -> list[dict[str, Any]]: + """Filter discography rows down to those credited to a given performer slug.""" + return [ + item + for item in items + if slugify_performer(str(item.get("artist_name") or item.get("band_name") or "")) + == performer_slug + ] + + async def _resolve_artist_item_id( + self, *, band_id: int, performer: str | None, band_name: str + ) -> str: + """Resolve a single album/track's artist item_id (no batch context).""" + if not performer or slugify_performer(performer) == slugify_performer(band_name): + return str(band_id) + real_band_id = await self._lookup_performer_band_id(performer) + if real_band_id is not None: + return str(real_band_id) + return make_artist_id(band_id, performer) + @use_cache(CACHE_METADATA) @throttle_with_retries async def get_album(self, prov_album_id: str) -> Album: @@ -538,7 +670,6 @@ async def get_album(self, prov_album_id: str) -> Album: artist_id, album_id, _ = split_id(prov_album_id) try: api_album = await self._client.get_album(artist_id, album_id) - return self._converters.album_from_api(api_album) except BandcampNotFoundError as error: raise MediaNotFoundError(f"Album {prov_album_id} not found on Bandcamp") from error except BandcampRateLimitError as error: @@ -547,6 +678,12 @@ async def get_album(self, prov_album_id: str) -> Album: ) from error except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get album {prov_album_id}") from error + artist_item_id = await self._resolve_artist_item_id( + band_id=api_album.artist.id, + performer=api_album.tralbum_artist, + band_name=api_album.artist.name, + ) + return self._converters.album_from_api(api_album, artist_item_id=artist_item_id) @throttle_with_retries async def _fetch_api_track(self, item_id: str) -> tuple[BCTrack, BCAlbum | None]: @@ -585,21 +722,33 @@ async def get_track(self, prov_track_id: str) -> Track: """Get full track details by id.""" api_track, api_album = await self._fetch_api_track(prov_track_id) if api_album: + artist_item_id = await self._resolve_artist_item_id( + band_id=api_album.artist.id, + performer=api_album.tralbum_artist, + band_name=api_album.artist.name, + ) return self._converters.track_from_api( track=api_track, album_id=api_album.id, album_name=api_album.title, album_image_url=api_album.art_url or "", tralbum_artist=api_album.tralbum_artist, + artist_item_id=artist_item_id, ) # Standalone tracks (album_id=0) carry the performer credit on # the track itself when fetched directly from tralbum_details. + artist_item_id = await self._resolve_artist_item_id( + band_id=api_track.artist.id, + performer=api_track.tralbum_artist, + band_name=api_track.artist.name, + ) return self._converters.track_from_api( track=api_track, album_id=api_track.album.id if api_track.album else None, album_name=api_track.album.title if api_track.album else "", album_image_url=(api_track.album.art_url if api_track.album else "") or "", tralbum_artist=api_track.tralbum_artist, + artist_item_id=artist_item_id, ) @use_cache(CACHE_METADATA) @@ -609,21 +758,6 @@ async def get_album_tracks(self, prov_album_id: str) -> list[Track]: artist_id, album_id, _ = split_id(prov_album_id) try: api_album = await self._client.get_album(artist_id, album_id) - if api_album.tracks: - return [ - self._converters.track_from_api( - track=track, - album_id=album_id, - album_name=api_album.title, - album_image_url=api_album.art_url or "", - tralbum_artist=api_album.tralbum_artist, - ) - for track in api_album.tracks - if track.streaming_url # Only include tracks with streaming URLs - ] - - return [] - except BandcampNotFoundError as error: raise MediaNotFoundError( f"Album tracks for {prov_album_id} not found on Bandcamp" @@ -634,6 +768,25 @@ async def get_album_tracks(self, prov_album_id: str) -> list[Track]: ) from error except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get albums tracks for {prov_album_id}") from error + if not api_album.tracks: + return [] + artist_item_id = await self._resolve_artist_item_id( + band_id=api_album.artist.id, + performer=api_album.tralbum_artist, + band_name=api_album.artist.name, + ) + return [ + self._converters.track_from_api( + track=track, + album_id=album_id, + album_name=api_album.title, + album_image_url=api_album.art_url or "", + tralbum_artist=api_album.tralbum_artist, + artist_item_id=artist_item_id, + ) + for track in api_album.tracks + if track.streaming_url # Only include tracks with streaming URLs + ] @use_cache(CACHE_METADATA) @throttle_with_retries @@ -669,17 +822,46 @@ async def get_artist_albums(self, prov_artist_id: str) -> list[Album]: if item.get("item_type") == "album" and item.get("item_id") ] if performer_slug is not None: - items = [ - item - for item in items - if slugify_performer(str(item.get("artist_name") or item.get("band_name") or "")) - == performer_slug - ] + items = self._filter_discography_by_performer(items, performer_slug) + + # Pre-resolve so this listing's artist links match what `get_album` + # produces on click; otherwise list and detail views diverge for the + # same performer. + names_by_slug: dict[str, str] = {} + for item in items: + performer = str(item.get("artist_name") or "") + band_name = str(item.get("band_name") or "") + if not performer: + continue + slug = slugify_performer(performer) + if not slug or slug == slugify_performer(band_name): + continue + names_by_slug.setdefault(slug, performer) + slug_to_real_id = await self._lookup_performer_band_ids_parallel(names_by_slug) + return [ - self._converters.album_from_discography_item(cast("DiscographyItem", item)) + self._converters.album_from_discography_item( + cast("DiscographyItem", item), + artist_item_id=self._discography_artist_item_id(item, slug_to_real_id), + ) for item in items ] + @staticmethod + def _discography_artist_item_id( + item: dict[str, Any], slug_to_real_id: dict[str, int | None] + ) -> str: + """Sync counterpart of ``_resolve_artist_item_id`` for a discography row.""" + band_id = int(item.get("band_id") or 0) + performer = str(item.get("artist_name") or "") + band_name = str(item.get("band_name") or "") + if not performer or slugify_performer(performer) == slugify_performer(band_name): + return str(band_id) + real_id = slug_to_real_id.get(slugify_performer(performer)) + if real_id is not None: + return str(real_id) + return make_artist_id(band_id, performer) + @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist_toptracks(self, prov_artist_id: str) -> list[Track]: diff --git a/music_assistant/providers/bandcamp/converters.py b/music_assistant/providers/bandcamp/converters.py index 6d8f8203ac..dba6b36b25 100644 --- a/music_assistant/providers/bandcamp/converters.py +++ b/music_assistant/providers/bandcamp/converters.py @@ -204,21 +204,23 @@ def track_from_api( album_image_url: str = "", *, tralbum_artist: str | None = None, + artist_item_id: str | None = None, ) -> MATrack: """Convert a Track object from the API to MA Track format. :param tralbum_artist: The per-album performer credit (``BCAlbum.tralbum_artist`` for tracks within an album, or ``BCTrack.tralbum_artist`` for standalone tracks). When set - and different from the band's own name, the artist link uses - a synthetic ``{band_id}:{slug}`` ID and the displayed artist + and different from the band's own name, the displayed artist name is the performer rather than the band. + :param artist_item_id: Pre-resolved artist item_id; falls back to + slug-based resolution if omitted. """ album_id = album_id or 0 _, bitrate, content_type = self.streaming_url_from_api(track.streaming_url or {}) band_name = track.artist.name display_name = tralbum_artist or band_name - artist_item_id = _resolve_artist_id( + artist_item_id = artist_item_id or _resolve_artist_id( band_id=track.artist.id, performer=tralbum_artist, band_name=band_name ) output = MATrack( @@ -307,13 +309,18 @@ def artist_from_api(self, artist: APIArtist) -> MAArtist: ) return output - def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: + def album_from_discography_item( + self, item: DiscographyItem, *, artist_item_id: str | None = None + ) -> MAAlbum: """Convert a raw discography dict to MA Album format. Discography items come from the band_details API and contain summary data (title, art_id, release_date string) without full album details. Fields not available from the discography endpoint (url, description) are omitted and populated later when get_album fetches full details. + + :param artist_item_id: Pre-resolved artist item_id; falls back to + slug-based resolution if omitted. """ band_id = item.get("band_id", 0) item_id = item.get("item_id", 0) @@ -323,7 +330,7 @@ def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: performer = item.get("artist_name") band_name = item.get("band_name") or "" display_name = performer or band_name - artist_item_id = _resolve_artist_id( + artist_item_id = artist_item_id or _resolve_artist_id( band_id=band_id, performer=performer, band_name=band_name ) @@ -373,12 +380,16 @@ def album_from_discography_item(self, item: DiscographyItem) -> MAAlbum: ) return output - def album_from_api(self, album: APIAlbum) -> MAAlbum: - """Convert an API Album object to MA Album format.""" + def album_from_api(self, album: APIAlbum, *, artist_item_id: str | None = None) -> MAAlbum: + """Convert an API Album object to MA Album format. + + :param artist_item_id: Pre-resolved artist item_id; falls back to + slug-based resolution if omitted. + """ album_id = f"{album.artist.id}-{album.id}" band_name = album.artist.name display_name = album.tralbum_artist or band_name - artist_item_id = _resolve_artist_id( + artist_item_id = artist_item_id or _resolve_artist_id( band_id=album.artist.id, performer=album.tralbum_artist, band_name=band_name ) output = MAAlbum( diff --git a/tests/providers/bandcamp/test_provider.py b/tests/providers/bandcamp/test_provider.py index 3e0bd22505..5d4f5a212b 100644 --- a/tests/providers/bandcamp/test_provider.py +++ b/tests/providers/bandcamp/test_provider.py @@ -211,7 +211,9 @@ def _search_album_mock( return item -def _search_artist_mock(*, artist_id: int = 123, name: str = "Test Artist") -> Mock: +def _search_artist_mock( + *, artist_id: int = 123, name: str = "Test Artist", is_label: bool = False +) -> Mock: """Construct a SearchResultArtist mock with concrete attribute values.""" item = Mock(spec=SearchResultArtist) item.id = artist_id @@ -219,6 +221,7 @@ def _search_artist_mock(*, artist_id: int = 123, name: str = "Test Artist") -> M item.url = "" item.image_url = None item.tags = None + item.is_label = is_label return item @@ -311,6 +314,274 @@ async def test_search_does_not_duplicate_existing_artists( assert [a.item_id for a in results.artists] == [str(band_id)] +async def test_search_unifies_label_release_to_real_performer_band( + provider: BandcampProvider, +) -> None: + """A label-released album whose performer has their own band page links to the real band. + + Regression test for music-assistant/support#5389 / Apollo Brown on + Hip Dozer. When a user searches by album name (e.g. "Night Moves"), + Bandcamp's autocomplete returns the album row with ``band_id`` = + Hip Dozer (the label) and ``artist_name`` = "Apollo Brown" — but no + ``b`` row for Apollo Brown's own page in the same response. The + secondary autocomplete lookup for "Apollo Brown" finds the real + band, and the album's artist link uses that real ``band_id`` + instead of a synthetic ``{label_id}:apollo-brown``. This unifies the + artist with what direct-search-by-artist would find. + """ + label_id = 4119123456 + apollo_band_id = 3658985110 + hipdozer_band = _search_artist_mock(artist_id=label_id, name="Hip Dozer", is_label=True) + night_moves = _search_album_mock(artist_id=label_id, artist_name="Apollo Brown", album_id=1) + + # Primary autocomplete for "Night Moves" returns the album + label. + # Secondary lookup for "Apollo Brown" returns the real artist `b` row. + primary_response = [hipdozer_band, night_moves] + secondary_response = [_search_artist_mock(artist_id=apollo_band_id, name="Apollo Brown")] + + apollo_real_artist = Mock(spec=Artist) + apollo_real_artist.item_id = str(apollo_band_id) + + async def fake_search(query: str) -> list[Mock]: + if query == "Apollo Brown": + return secondary_response + return primary_response + + with ( + patch.object(provider._client, "search", side_effect=fake_search) as mock_search, + patch.object( + provider, "get_artist", new_callable=AsyncMock, return_value=apollo_real_artist + ) as mock_get_artist, + ): + results = await provider.search( + "Night Moves", [MediaType.ALBUM, MediaType.ARTIST], limit=10 + ) + + # The album's artist link points to Apollo Brown's real band_id, + # NOT a synthetic `{label_id}:apollo-brown`. + album_artist_ids = {next(iter(cast("Album", a).artists)).item_id for a in results.albums} + assert album_artist_ids == {str(apollo_band_id)} + + # The artist results include Apollo Brown materialized via get_artist. + artist_ids = {a.item_id for a in results.artists} + assert str(apollo_band_id) in artist_ids + # No synthetic artist for a performer who has their own band page. + assert not any(":" in a.item_id for a in results.artists) + # Hip Dozer (the label) is still surfaced — it was a `b` row in + # the primary response and matches MediaType.ARTIST directly. + assert str(label_id) in artist_ids + + # Two autocomplete calls: the primary search + one secondary + # lookup for "Apollo Brown". (Hip Dozer didn't need a lookup — + # it was already the page owner.) + assert mock_search.call_count == 2 + mock_get_artist.assert_awaited_once_with(str(apollo_band_id)) + + +async def test_lookup_performer_band_id_caches_negative_result( + provider: BandcampProvider, mass_mock: Mock +) -> None: + """A performer with no own band page caches the 'not found' outcome. + + The cache layer treats ``None`` as a miss, so we store integer 0 as + the negative-cache sentinel. A repeated lookup for the same slug + must NOT trigger a second autocomplete call. + """ + cache_data: dict[str, int] = {} + + async def fake_get(key: str, **_: object) -> int | None: + return cache_data.get(key) + + async def fake_set(key: str, value: int, **_: object) -> None: + cache_data[key] = value + + mass_mock.cache.get.side_effect = fake_get + mass_mock.cache.set.side_effect = fake_set + + # Secondary search returns no matching b-row for "Mortaja" — the + # response contains an unrelated label. + unrelated = _search_artist_mock(artist_id=441379041, name="audiophob", is_label=True) + with patch.object( + provider._client, "search", new_callable=AsyncMock, return_value=[unrelated] + ) as mock_search: + first = await provider._lookup_performer_band_id("Mortaja") + second = await provider._lookup_performer_band_id("Mortaja") + + assert first is None + assert second is None + # The negative result is cached; the upstream search runs once. + assert mock_search.call_count == 1 + # Sentinel 0 was written for the slug. + assert cache_data["performer_band_id.mortaja"] == 0 + + +async def test_lookup_performer_band_id_skips_label_results( + provider: BandcampProvider, +) -> None: + """A label that shares a performer's exact name must not be returned as a band. + + Without the ``is_label`` filter, a label called "Apollo Brown" + (rare but possible) would shadow the actual artist's band page. + """ + label_with_same_name = _search_artist_mock(artist_id=999, name="Apollo Brown", is_label=True) + real_artist = _search_artist_mock(artist_id=3658985110, name="Apollo Brown") + + # Order matters: even when the label appears first, we skip it and + # return the non-label match. + with patch.object( + provider._client, + "search", + new_callable=AsyncMock, + return_value=[label_with_same_name, real_artist], + ): + result = await provider._lookup_performer_band_id("Apollo Brown") + assert result == 3658985110 + + +async def test_search_resilient_to_lookup_exception_in_one_slug( + provider: BandcampProvider, +) -> None: + """One slug's lookup raising must not kill the whole batch. + + ``_resolve_search_artist_ids`` runs lookups in parallel via + ``asyncio.gather(..., return_exceptions=True)``; an unexpected + exception in any single lookup must degrade that slug to "no own + band page" (synthetic fallback) rather than abort the entire search. + """ + label_a_id = 1111111 + label_b_id = 2222222 + real_b_band_id = 3658985110 + label_a = _search_artist_mock(artist_id=label_a_id, name="Label A", is_label=True) + label_b = _search_artist_mock(artist_id=label_b_id, name="Label B", is_label=True) + album_a = _search_album_mock(artist_id=label_a_id, artist_name="Performer A", album_id=10) + album_b = _search_album_mock(artist_id=label_b_id, artist_name="Performer B", album_id=20) + + async def fake_lookup(name: str) -> int | None: + if name == "Performer A": + raise RuntimeError("boom") + if name == "Performer B": + return real_b_band_id + return None + + with ( + patch.object( + provider._client, + "search", + new_callable=AsyncMock, + return_value=[label_a, label_b, album_a, album_b], + ), + patch.object(provider, "_lookup_performer_band_id", side_effect=fake_lookup), + patch.object( + provider, "get_artist", new_callable=AsyncMock, return_value=Mock(spec=Artist) + ), + ): + results = await provider.search("test", [MediaType.ALBUM], limit=20) + + # Performer A's lookup blew up → falls through to synthetic. + # Performer B's lookup succeeded → uses the real band_id. + album_artist_ids = {next(iter(cast("Album", a).artists)).item_id for a in results.albums} + assert f"{label_a_id}:performer-a" in album_artist_ids + assert str(real_b_band_id) in album_artist_ids + + +async def test_lookup_performer_band_id_corrupt_cache_falls_through( + provider: BandcampProvider, mass_mock: Mock +) -> None: + """A non-integer cache payload is discarded and a fresh fetch runs. + + Defensive guard: if the cache returns a value that can't be coerced + to int (schema change, manual edit, …), the lookup must NOT raise + ``ValueError`` into the parallel ``asyncio.gather`` — it must log, + discard the entry, and re-fetch from the upstream API. + """ + mass_mock.cache.get.return_value = "not-an-int" + + real_band_id = 3658985110 + real_artist = _search_artist_mock(artist_id=real_band_id, name="Apollo Brown") + with patch.object( + provider._client, "search", new_callable=AsyncMock, return_value=[real_artist] + ) as mock_search: + result = await provider._lookup_performer_band_id("Apollo Brown") + + assert result == real_band_id + mock_search.assert_awaited_once_with("Apollo Brown") + + +async def test_search_suppresses_retries_exhausted_from_get_artist( + provider: BandcampProvider, +) -> None: + """``RetriesExhausted`` from ``get_artist`` must not crash an in-progress search. + + The materialized-artist emission is best-effort — its failure should + leave album/track results intact rather than propagate up. + """ + label_id = 4119123456 + apollo_band_id = 3658985110 + label_band = _search_artist_mock(artist_id=label_id, name="Hip Dozer", is_label=True) + night_moves = _search_album_mock(artist_id=label_id, artist_name="Apollo Brown", album_id=1) + secondary_response = [_search_artist_mock(artist_id=apollo_band_id, name="Apollo Brown")] + + async def fake_search(query: str) -> list[Mock]: + if query == "Apollo Brown": + return secondary_response + return [label_band, night_moves] + + with ( + patch.object(provider._client, "search", side_effect=fake_search), + patch.object( + provider, + "get_artist", + new_callable=AsyncMock, + side_effect=RetriesExhausted("throttle exhausted"), + ), + ): + results = await provider.search("Night Moves", [MediaType.ALBUM, MediaType.ARTIST]) + + # Album result still produced despite the artist-materialization failure. + assert len(results.albums) == 1 + # The artist-materialization branch was suppressed → no Apollo Brown + # in artists, but Hip Dozer (the in-batch `b` row) is still surfaced. + artist_ids = {a.item_id for a in results.artists} + assert str(label_id) in artist_ids + assert str(apollo_band_id) not in artist_ids + + +async def test_get_album_unifies_label_release_to_real_performer_band( + provider: BandcampProvider, +) -> None: + """Fetching a label-released album embeds the performer's real band_id. + + Regression test paralleling + :func:`test_search_unifies_label_release_to_real_performer_band` but + on the album-fetch path. When MA opens or syncs a label-released + album, the artist link must point to the performer's own band page + rather than a synthetic ID — otherwise list view and detail view + diverge. + """ + label_id = 4119123456 + apollo_band_id = 3658985110 + + api_album = Mock() + api_album.artist.id = label_id + api_album.artist.name = "Hip Dozer" + api_album.tralbum_artist = "Apollo Brown" + + secondary_response = [_search_artist_mock(artist_id=apollo_band_id, name="Apollo Brown")] + + with ( + patch.object(provider._client, "get_album", new_callable=AsyncMock, return_value=api_album), + patch.object( + provider._client, "search", new_callable=AsyncMock, return_value=secondary_response + ) as mock_search, + patch.object(provider._converters, "album_from_api") as mock_converter, + ): + mock_converter.return_value = Mock() + await provider.get_album(f"{label_id}-456") + + mock_converter.assert_called_once_with(api_album, artist_item_id=str(apollo_band_id)) + mock_search.assert_awaited_once_with("Apollo Brown") + + async def test_search_without_identity(provider: BandcampProvider) -> None: """Test search returns empty results without identity token.""" provider._client.identity = None @@ -556,6 +827,9 @@ async def test_get_artist_not_found(provider: BandcampProvider) -> None: async def test_get_album_success(provider: BandcampProvider) -> None: """Test successful album retrieval.""" mock_album = Mock() + mock_album.artist.id = 123 + mock_album.artist.name = "Test Band" + mock_album.tralbum_artist = None with ( patch.object(provider._client, "get_album", new_callable=AsyncMock) as mock_get_album, @@ -567,6 +841,8 @@ async def test_get_album_success(provider: BandcampProvider) -> None: result = await provider.get_album("123-456") mock_get_album.assert_called_once_with(123, 456) + # Plain band_id passes through when there's no performer credit. + mock_converter.assert_called_once_with(mock_album, artist_item_id="123") assert result is not None @@ -575,6 +851,9 @@ async def test_get_track_success(provider: BandcampProvider) -> None: mock_album = Mock() mock_track = Mock() mock_album.tracks = [mock_track] + mock_album.artist.id = 123 + mock_album.artist.name = "Test Band" + mock_album.tralbum_artist = None mock_track.id = 789 with ( @@ -601,8 +880,11 @@ async def test_get_track_standalone(provider: BandcampProvider) -> None: mock_api_track.album = mock_album_obj # Standalone single tracks carry their own performer credit; the # provider forwards it to the converter so synthetic IDs are emitted - # for label-released singles. + # for label-released singles. Performer matches band name here so the + # secondary lookup short-circuits to the plain band_id. mock_api_track.tralbum_artist = "Test Artist" + mock_api_track.artist.id = 123 + mock_api_track.artist.name = "Test Artist" with ( patch.object(provider._client, "get_track", new_callable=AsyncMock) as mock_get_track, @@ -620,6 +902,7 @@ async def test_get_track_standalone(provider: BandcampProvider) -> None: album_name="Standalone Album", album_image_url="http://example.com/art.jpg", tralbum_artist="Test Artist", + artist_item_id="123", ) assert result is not None @@ -629,6 +912,8 @@ async def test_get_track_standalone_no_album(provider: BandcampProvider) -> None mock_api_track = Mock() mock_api_track.album = None mock_api_track.tralbum_artist = None + mock_api_track.artist.id = 123 + mock_api_track.artist.name = "Test Band" with ( patch.object(provider._client, "get_track", new_callable=AsyncMock) as mock_get_track, @@ -646,6 +931,7 @@ async def test_get_track_standalone_no_album(provider: BandcampProvider) -> None album_name="", album_image_url="", tralbum_artist=None, + artist_item_id="123", ) assert result is not None @@ -667,6 +953,9 @@ async def test_get_album_tracks_success(provider: BandcampProvider) -> None: mock_album.tracks = [mock_track] mock_album.title = "Test Album" mock_album.art_url = "http://example.com/art.jpg" + mock_album.artist.id = 123 + mock_album.artist.name = "Test Band" + mock_album.tralbum_artist = None with ( patch.object(provider._client, "get_album", new_callable=AsyncMock) as mock_get_album, @@ -741,6 +1030,75 @@ async def test_get_artist_albums_label_uses_band_id(provider: BandcampProvider) assert artists[0].item_id == "9999" +async def test_get_artist_albums_label_unifies_performer_to_real_band( + provider: BandcampProvider, +) -> None: + """Listing a label's discography unifies label-released performers to their real band pages. + + When the user navigates to a label artist (e.g. Hip Dozer), the + discography listing must use real performer band_ids — matching what + ``get_album`` would emit on click. Otherwise the album list shows + synthetic IDs while the album detail shows real IDs, sending the + same artist-name link to two different destinations. + """ + label_id = 4119123456 + apollo_band_id = 3658985110 + mock_discography = [ + { + "item_type": "album", + "band_id": label_id, + "item_id": 686338649, + "title": "Night Moves", + "artist_name": "Apollo Brown", + "band_name": "Hip Dozer", + "art_id": 2560657053, + "release_date": "01 Jan 2020 00:00:00 GMT", + }, + { + "item_type": "album", + "band_id": label_id, + "item_id": 100, + "title": "Label Compilation", + # Band-by-itself row: artist_name == band_name → no lookup. + "artist_name": "Hip Dozer", + "band_name": "Hip Dozer", + "art_id": 1234, + "release_date": "01 Jun 2021 00:00:00 GMT", + }, + ] + secondary_response = [_search_artist_mock(artist_id=apollo_band_id, name="Apollo Brown")] + + with ( + patch.object( + provider._client, + "get_artist_discography", + new_callable=AsyncMock, + return_value=mock_discography, + ), + patch.object( + provider._client, + "search", + new_callable=AsyncMock, + return_value=secondary_response, + ) as mock_search, + ): + result = await provider.get_artist_albums(str(label_id)) + + by_name = {album.name: album for album in result} + apollo_album = by_name["Night Moves"] + label_album = by_name["Label Compilation"] + + # Label-released performer with own band page → real band_id, not synthetic. + assert next(iter(apollo_album.artists)).item_id == str(apollo_band_id) + # Band-by-itself row: artist_name slug == band_name slug → plain band_id, + # no lookup attempted. + assert next(iter(label_album.artists)).item_id == str(label_id) + + # One lookup for the unique unmapped performer; the band-by-itself row + # short-circuited before reaching the lookup. + mock_search.assert_awaited_once_with("Apollo Brown") + + async def test_get_artist_albums_synthetic_id_filters_discography( provider: BandcampProvider, ) -> None: From fde4740ed338e388cd12fae435fba84a3c2c5926 Mon Sep 17 00:00:00 2001 From: David Bishop Date: Sun, 3 May 2026 07:00:56 -0700 Subject: [PATCH 3/3] Bandcamp: Address review feedback on docstrings and error handling - Reformat multi-line docstrings to project standard (description on its own line below the opening triple-quote). - Strip implementation details from docstrings; relocate the still-useful bits to inline comments and add a Sphinx :param: to _fetch_discography. - Log a warning when BandcampAPIError is swallowed in _fetch_performer_band_id so it matches the rest of the file's error-handling convention. --- .../providers/bandcamp/__init__.py | 126 ++++++++---------- music_assistant/providers/bandcamp/_ids.py | 19 ++- .../providers/bandcamp/converters.py | 54 ++++---- 3 files changed, 86 insertions(+), 113 deletions(-) diff --git a/music_assistant/providers/bandcamp/__init__.py b/music_assistant/providers/bandcamp/__init__.py index f04adc46d3..757af284ff 100644 --- a/music_assistant/providers/bandcamp/__init__.py +++ b/music_assistant/providers/bandcamp/__init__.py @@ -118,7 +118,8 @@ async def get_config_entries( def split_id(id_: str) -> tuple[int, int, int]: - """Return (artist_id, album_id, track_id). Missing parts are returned as 0. + """ + Return (artist_id, album_id, track_id). Missing parts are returned as 0. :param id_: Compound ID string, e.g. "123-456-789". :raises InvalidDataError: If the ID contains non-numeric parts. @@ -183,18 +184,7 @@ def is_streaming_provider(self) -> bool: async def search( self, search_query: str, media_types: list[MediaType], limit: int = 50 ) -> SearchResults: - """Perform search on music provider. - - Bandcamp's autocomplete returns three result kinds: bands/labels (b), - albums (a), and tracks (t). For album/track results, ``band_id`` is - the page owner (could be a label) and ``band_name`` is the *performer* - credit — which for label-released albums differs from the page - owner's own name. We map performer-without-a-band-page entries onto - synthetic artists keyed by ``{band_id}:{slug}`` and emit them in - artist results too, so searching for e.g. "Mortaja" surfaces - Mortaja-on-audiophob even though the performer has no band page of - their own. See :mod:`._ids` for ID semantics. - """ + """Perform search on music provider.""" results = SearchResults() if not media_types: return results @@ -285,10 +275,7 @@ async def _resolve_search_artist_ids( capped: Sequence[SearchResultItem], bands_by_id: dict[int, SearchResultArtist], ) -> dict[int, str]: - """Resolve artist item_ids for every album/track row in a search batch. - - Keyed by ``id(row)`` so the materialization loop is a sync lookup. - """ + """Resolve artist item_ids for every album/track row, keyed by ``id(row)``.""" rows: list[SearchResultAlbum | SearchResultTrack] = [ row for row in capped if isinstance(row, (SearchResultAlbum, SearchResultTrack)) ] @@ -343,11 +330,7 @@ async def _lookup_performer_band_ids_parallel( return out async def _lookup_performer_band_id(self, performer_name: str) -> int | None: - """Find the band_id for a performer who has their own Bandcamp page. - - Negative results are cached as integer ``0`` because the cache layer - treats ``None`` as a miss. - """ + """Find the band_id for a performer who has their own Bandcamp page.""" target_slug = slugify_performer(performer_name) if not target_slug: return None @@ -363,6 +346,8 @@ async def _lookup_performer_band_id(self, performer_name: str) -> int | None: cached, ) else: + # Negative results are persisted as 0 since the cache layer + # treats None as a miss. return cached_int or None band_id = await self._fetch_performer_band_id(performer_name, target_slug) await self.mass.cache.set( @@ -375,20 +360,20 @@ async def _lookup_performer_band_id(self, performer_name: str) -> int | None: @throttle_with_retries async def _fetch_performer_band_id(self, performer_name: str, target_slug: str) -> int | None: - """Autocomplete-search for ``performer_name``, return the first matching band_id. - - ``is_label`` results are skipped so a same-named label doesn't - masquerade as the performer's band page. - """ + """Autocomplete-search for ``performer_name``; return the first non-label band match.""" try: results = await self._client.search(performer_name) except BandcampRateLimitError as error: raise ResourceTemporarilyUnavailable( "Bandcamp rate limit reached", backoff_time=error.retry_after ) from error - except BandcampAPIError: + except BandcampAPIError as error: + self.logger.warning( + "Bandcamp autocomplete failed for performer %r: %s", performer_name, error + ) return None for item in results: + # Skip labels so a same-named label doesn't masquerade as the band page. if ( isinstance(item, SearchResultArtist) and not item.is_label @@ -404,7 +389,8 @@ async def _fetch_collection_page( older_than_token: str | None, fan_id: int | None, ) -> CollectionSummary: - """Fetch a single page of collection items with throttling and retry. + """ + Fetch a single page of collection items with throttling and retry. :param collection_type: The type of collection to fetch. :param older_than_token: Pagination cursor from the previous page. @@ -426,7 +412,8 @@ async def _get_all_collection_items( collection_type: CollectionType, fan_id: int | None = None, ) -> list[CollectionItem | FollowingItem | FanItem]: - """Fetch all pages of a collection endpoint. + """ + Fetch all pages of a collection endpoint. :param collection_type: The type of collection to fetch. :param fan_id: Fan ID to query. None = authenticated user. @@ -525,7 +512,8 @@ async def get_library_tracks(self) -> AsyncGenerator[Track, None]: @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist(self, prov_artist_id: str) -> Artist: - """Get full artist details by id. + """ + Get full artist details by id. Accepts both forms: ``"{band_id}"`` (a real band/label page) and ``"{band_id}:{slug}"`` (a synthetic per-page performer that has @@ -560,22 +548,7 @@ async def get_artist(self, prov_artist_id: str) -> Artist: async def _get_synthetic_artist( self, prov_artist_id: str, band_id: int, performer_slug: str ) -> Artist: - """Resolve a synthetic ID to either the real band or a per-page performer. - - Order matters. Bandcamp's autocomplete sometimes returns ``a``/``t`` - rows for a band without the matching ``b`` row in the same response - (typical for queries that are album/track titles rather than the - band's name). The search-time path mints a synthetic - ``{band_id}:slug-of-band-own-name`` in that case because it can't - tell band-by-itself from label-release without the ``b`` row. We - collapse that drift here, on the navigation/persistence path, by - resolving the synthetic to the real band whenever the slug equals - the band's own slug — BEFORE consulting the discography. Otherwise - an album whose performer-name happens to match the band's own name - (band-by-itself items with ``artist_name=null``) would build a - shadow synthetic that lives parallel to the real band in MA's - library and produces duplicate artist entries. - """ + """Resolve a synthetic ID to either the real band or a per-page performer.""" try: api_artist = await self._client.get_artist(band_id) except BandcampNotFoundError as error: @@ -587,6 +560,10 @@ async def _get_synthetic_artist( except BandcampAPIError as error: raise MediaNotFoundError(f"Failed to get artist {prov_artist_id}") from error + # Collapse synthetics whose slug matches the band's own name back to + # the real band BEFORE consulting the discography. Otherwise a + # band-by-itself item with `artist_name=null` would mint a shadow + # synthetic parallel to the real band, producing duplicates. if slugify_performer(api_artist.name) == performer_slug: return self._converters.artist_from_api(api_artist) @@ -625,18 +602,16 @@ async def _get_synthetic_artist( @use_cache(CACHE_METADATA) @throttle_with_retries async def _fetch_discography(self, band_id: int) -> list[dict[str, Any]]: - """Fetch a band's discography keyed by band_id (cached). - - Real artist (``"{band_id}"``) and synthetic performer - (``"{band_id}:{slug}"``) lookups both go through this so the - underlying ``mobile/24/band_details`` call hits once per band per - cache window, not once per ``prov_artist_id``. + """ + Fetch a band's discography (cached). - Return type is ``list[dict[str, Any]]`` rather than - ``list[DiscographyItem]`` because the cache controller falls back to - ``isinstance(value, value_type)`` on deserialization, which TypedDict - does not support. Callers cast at the converter boundary. + :param band_id: The Bandcamp ``band_id`` (page owner) whose + discography to fetch. """ + # Return type is `list[dict[str, Any]]` rather than + # `list[DiscographyItem]`: the cache controller's deserializer + # uses `isinstance` checks which TypedDict does not support. + # Callers cast at the converter boundary. result: list[dict[str, Any]] = await self._client.get_artist_discography(band_id) return result @@ -687,7 +662,8 @@ async def get_album(self, prov_album_id: str) -> Album: @throttle_with_retries async def _fetch_api_track(self, item_id: str) -> tuple[BCTrack, BCAlbum | None]: - """Fetch a raw API track and its parent album by compound item ID. + """ + Fetch a raw API track and its parent album by compound item ID. Uses get_album when album_id is present (most tracks), falling back to get_track for standalone tracks (album_id=0). @@ -791,7 +767,8 @@ async def get_album_tracks(self, prov_album_id: str) -> list[Track]: @use_cache(CACHE_METADATA) @throttle_with_retries async def get_artist_albums(self, prov_artist_id: str) -> list[Album]: - """Get albums by an artist. + """ + Get albums by an artist. For real artist IDs this returns the band's full discography (the original behavior). For synthetic IDs (``{band_id}:{slug}``) this @@ -878,7 +855,8 @@ async def get_artist_toptracks(self, prov_artist_id: str) -> list[Track]: return tracks[: self.top_tracks_limit] async def browse(self, path: str) -> Sequence[MediaItemType | ItemMapping | BrowseFolder]: - """Browse this provider's items. + """ + Browse this provider's items. :param path: The path to browse, (e.g. provider_id://artists). """ @@ -927,7 +905,8 @@ async def _browse_person( path_parts: list[str], base: str, ) -> Sequence[MediaItemType | ItemMapping | BrowseFolder]: - """Route person browse paths: fans/followers and their sub-categories. + """ + Route person browse paths: fans/followers and their sub-categories. Pattern: (fans|followers)[/{id}[/(collection|wishlist|following|fans|followers)]*] """ @@ -975,12 +954,12 @@ async def _browse_person( # --- Person browse helpers (fans, followers, and social graph traversal) --- async def _resolve_person_segment(self, segment: str) -> int | None: - """Resolve a path segment to a fan_id. + """ + Resolve a path segment to a fan_id. - Checks the slug→fan_id cache first, then tries numeric parse. - For unknown slugs, rebuilds the cache from fan/follower lists and retries. - Returns None if the segment is neither a known slug nor a valid int, - or if it is a known sub-route name (e.g. "collection", "wishlist"). + Returns None if the segment is neither a known slug nor a valid + int, or if it is a known sub-route name (e.g. "collection", + "wishlist"). """ if segment in self._slug_to_fan_id: return self._slug_to_fan_id[segment] @@ -1007,7 +986,8 @@ async def _rebuild_slug_cache(self) -> None: @staticmethod def _fan_slug(person: FanItem) -> str | None: - """Extract the URL slug from a FanItem's url. + """ + Extract the URL slug from a FanItem's url. e.g. "https://bandcamp.com/teancom" → "teancom" """ @@ -1082,7 +1062,8 @@ def _deserialize_content_item(item: dict[str, object]) -> Album | Track: async def _browse_person_content( self, person_id: int | None, collection_type: CollectionType ) -> list[Album | Track]: - """Fetch a person's collection or wishlist items. + """ + Fetch a person's collection or wishlist items. :param person_id: Person to query. None = authenticated user. """ @@ -1113,7 +1094,8 @@ async def _browse_person_content( @throttle_with_retries async def _browse_person_following(self, person_id: int | None) -> list[Artist]: - """Fetch a person's followed artists. + """ + Fetch a person's followed artists. :param person_id: Person to query. None = authenticated user. """ @@ -1148,7 +1130,8 @@ async def _browse_person_people( base_path: str, person_id: int | None = None, ) -> list[BrowseFolder]: - """Fetch a person's fans or followers as browsable folders. + """ + Fetch a person's fans or followers as browsable folders. :param collection_type: FOLLOWING_FANS or FOLLOWERS. :param base_path: Browse path prefix for the resulting folder links. @@ -1179,7 +1162,8 @@ async def _browse_person_people( return folders async def get_stream_details(self, item_id: str, media_type: MediaType) -> StreamDetails: - """Return the content details for the given track. + """ + Return the content details for the given track. Fetches fresh from the Bandcamp API since streaming URLs may expire. """ diff --git a/music_assistant/providers/bandcamp/_ids.py b/music_assistant/providers/bandcamp/_ids.py index dbe46ccaa6..f6dfd7978c 100644 --- a/music_assistant/providers/bandcamp/_ids.py +++ b/music_assistant/providers/bandcamp/_ids.py @@ -1,4 +1,5 @@ -"""Composite artist ID utilities for the Bandcamp provider. +""" +Composite artist ID utilities for the Bandcamp provider. Bandcamp's data model treats labels as bands: an album published on a label's page (e.g. ``audiophob``) reports ``band_id`` = the label, while the @@ -31,14 +32,8 @@ def slugify_performer(name: str) -> str: - """Reduce a performer name to a stable lowercase slug. - - Two performer names compare equal as artists when their slugs match. - The transform is intentionally lossy — punctuation and case are - discarded — so minor variations like ``"&"`` vs ``"and"`` collapse - together. Bandcamp's per-album performer field is consistent within a - band page in practice, so within-band collisions are rare; cross-band - collisions are not a concern because ``band_id`` is part of the key. + """ + Reduce a performer name to a stable lowercase slug. :param name: Raw performer name from the Bandcamp API. :returns: A slug containing only ``[a-z0-9-]`` with no leading or @@ -49,7 +44,8 @@ def slugify_performer(name: str) -> str: def make_artist_id(band_id: int | str, performer: str | None = None) -> str: - """Build the artist ``item_id`` used on Music Assistant media items. + """ + Build the artist ``item_id`` used on Music Assistant media items. :param band_id: The Bandcamp ``band_id`` (page owner). :param performer: Optional performer name. If provided and produces a @@ -66,7 +62,8 @@ def make_artist_id(band_id: int | str, performer: str | None = None) -> str: def parse_artist_id(artist_id: str) -> tuple[int, str | None]: - """Split an artist ``item_id`` into ``(band_id, performer_slug)``. + """ + Split an artist ``item_id`` into ``(band_id, performer_slug)``. :returns: A tuple where the second element is ``None`` for plain ``"{band_id}"`` IDs and a non-empty slug for synthetic ones. diff --git a/music_assistant/providers/bandcamp/converters.py b/music_assistant/providers/bandcamp/converters.py index dba6b36b25..e92a5884dd 100644 --- a/music_assistant/providers/bandcamp/converters.py +++ b/music_assistant/providers/bandcamp/converters.py @@ -53,7 +53,8 @@ def __init__(self, domain: str, instance_id: str): def streaming_url_from_api( streaming_info: dict[str, str], ) -> tuple[str | None, int | None, ContentType]: - """Parse streaming URL info. + """ + Parse streaming URL info. :param streaming_info: Dict of format keys to URLs from the Bandcamp API. """ @@ -77,13 +78,11 @@ def streaming_url_from_api( def track_from_search( self, item: SearchResultTrack, *, artist_item_id: str | None = None ) -> MATrack: - """Create a Track from new API SearchResultTrack. + """ + Create a Track from new API SearchResultTrack. - :param artist_item_id: Resolved artist item_id chosen by the provider's - cross-result dedup. If omitted, defaults to the synthetic form - ``"{artist_id}:{slug(artist_name)}"`` so callers without - disambiguation context still get correct labels-vs-performers - behavior. + :param artist_item_id: Pre-resolved artist item_id; falls back to + slug-based resolution if omitted. """ track_id = f"{item.artist_id}-{item.album_id or 0}-{item.id}" artist_item_id = artist_item_id or make_artist_id(item.artist_id, item.artist_name) @@ -124,11 +123,11 @@ def track_from_search( def album_from_search( self, item: SearchResultAlbum, *, artist_item_id: str | None = None ) -> MAAlbum: - """Create an Album from new API SearchResultAlbum. + """ + Create an Album from new API SearchResultAlbum. - :param artist_item_id: Resolved artist item_id chosen by the provider's - cross-result dedup. If omitted, defaults to the synthetic form - ``"{artist_id}:{slug(artist_name)}"``. + :param artist_item_id: Pre-resolved artist item_id; falls back to + slug-based resolution if omitted. """ album_id = f"{item.artist_id}-{item.id}" artist_item_id = artist_item_id or make_artist_id(item.artist_id, item.artist_name) @@ -206,12 +205,11 @@ def track_from_api( tralbum_artist: str | None = None, artist_item_id: str | None = None, ) -> MATrack: - """Convert a Track object from the API to MA Track format. + """ + Convert a Track object from the API to MA Track format. - :param tralbum_artist: The per-album performer credit - (``BCAlbum.tralbum_artist`` for tracks within an album, or - ``BCTrack.tralbum_artist`` for standalone tracks). When set - and different from the band's own name, the displayed artist + :param tralbum_artist: Per-album performer credit. When set and + different from the band's own name, the displayed artist name is the performer rather than the band. :param artist_item_id: Pre-resolved artist item_id; falls back to slug-based resolution if omitted. @@ -312,12 +310,8 @@ def artist_from_api(self, artist: APIArtist) -> MAArtist: def album_from_discography_item( self, item: DiscographyItem, *, artist_item_id: str | None = None ) -> MAAlbum: - """Convert a raw discography dict to MA Album format. - - Discography items come from the band_details API and contain summary - data (title, art_id, release_date string) without full album details. - Fields not available from the discography endpoint (url, description) - are omitted and populated later when get_album fetches full details. + """ + Convert a raw discography dict to MA Album format. :param artist_item_id: Pre-resolved artist item_id; falls back to slug-based resolution if omitted. @@ -381,7 +375,8 @@ def album_from_discography_item( return output def album_from_api(self, album: APIAlbum, *, artist_item_id: str | None = None) -> MAAlbum: - """Convert an API Album object to MA Album format. + """ + Convert an API Album object to MA Album format. :param artist_item_id: Pre-resolved artist item_id; falls back to slug-based resolution if omitted. @@ -441,11 +436,8 @@ def synthetic_artist( url: str | None = None, image_url: str | None = None, ) -> MAArtist: - """Build an Artist for a per-album performer that has no band page. - - These performers exist on Bandcamp only as a credit on a band's - page (typically a label), so we synthesize an Artist scoped to the - ``(band_id, performer)`` pair. See :mod:`._ids` for the ID format. + """ + Build an Artist for a per-album performer that has no band page. :param band_id: The hosting Bandcamp band's id. :param performer_name: The performer credit (display name). @@ -489,12 +481,12 @@ def _resolve_artist_id( performer: str | None, band_name: str | None, ) -> str: - """Choose between a real or synthetic artist item_id. + """ + Choose between a real or synthetic artist item_id. Returns the synthetic ``{band_id}:{slug(performer)}`` form only when the performer is set AND its slug differs from the band's own slug; - otherwise returns the plain ``{band_id}``. Missing inputs collapse - to the plain form rather than fabricating a meaningless synthetic. + otherwise returns the plain ``{band_id}``. """ if ( not performer