{tagline}
} diff --git a/front/src/components/items/item-grid.tsx b/front/src/components/items/item-grid.tsx index ff744a76..41037465 100644 --- a/front/src/components/items/item-grid.tsx +++ b/front/src/components/items/item-grid.tsx @@ -39,6 +39,7 @@ export const ItemGrid = ({ watchPercent, availableCount, seenCount, + videoSlug, horizontal = false, className, ...props @@ -53,6 +54,7 @@ export const ItemGrid = ({ kind: "movie" | "serie" | "collection"; availableCount?: number | null; seenCount?: number | null; + videoSlug: string | null; horizontal?: boolean; className?: string; }) => { @@ -89,6 +91,7 @@ export const ItemGrid = ({{t("show.links")}:
- {Object.entries(externalIds) - .filter(([_, data]) => data.link) - .map(([name, data]) => ( -{t("admin.unmatched.empty")}
-{t("admin.unmatched.empty")}
} /> ); }; diff --git a/scanner/pyproject.toml b/scanner/pyproject.toml index 2964bd29..5b9fcf31 100644 --- a/scanner/pyproject.toml +++ b/scanner/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "opentelemetry-instrumentation-fastapi>=0.59b0", "opentelemetry-sdk>=1.38.0", "pydantic>=2.11.4", + "pydantic-xml>=2.14.0", "pyjwt[crypto]>=2.10.1", "python-slugify>=8.0.4", "watchfiles>=1.0.5", diff --git a/scanner/scanner/client.py b/scanner/scanner/client.py index 3738957f..67e89108 100644 --- a/scanner/scanner/client.py +++ b/scanner/scanner/client.py @@ -54,6 +54,8 @@ class KyooClient(metaclass=Singleton): return VideoInfo(**await r.json()) async def create_videos(self, videos: list[Video]) -> list[VideoCreated]: + if len(videos) == 0: + return [] async with self._client.post( "videos", data=TypeAdapter(list[Video]).dump_json(videos, by_alias=True), diff --git a/scanner/scanner/identifiers/anilist.py b/scanner/scanner/identifiers/anilist.py new file mode 100644 index 00000000..248d0123 --- /dev/null +++ b/scanner/scanner/identifiers/anilist.py @@ -0,0 +1,371 @@ +from __future__ import annotations + +import re +import unicodedata +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from functools import cached_property +from logging import getLogger +from typing import Literal + +from aiohttp import ClientSession +from pydantic import field_validator +from pydantic_xml import BaseXmlModel, attr, element, wrapped + +from ..cache import cache +from ..models.metadataid import EpisodeId, MetadataId, SeasonId +from ..models.serie import Serie +from ..models.videos import Guess +from ..providers.names import ProviderName + +logger = getLogger(__name__) + + +class AnimeTitlesDb(BaseXmlModel, tag="animetitles"): + animes: list[AnimeTitlesEntry] = element(default=[]) + + @classmethod + def get_url(cls): + return "https://raw.githubusercontent.com/Anime-Lists/anime-lists/master/animetitles.xml" + + class AnimeTitlesEntry(BaseXmlModel, tag="anime"): + aid: str = attr() + titles: list[AnimeTitle] = element(default=[]) + + class AnimeTitle( + BaseXmlModel, + tag="title", + nsmap={"xml": "http://www.w3.org/XML/1998/namespace"}, + ): + type: str = attr() + lang: str = attr(ns="xml") + text: str + + +class AnimeListDb(BaseXmlModel, tag="anime-list"): + animes: list[AnimeEntry] = element(default=[]) + + @classmethod + def get_url(cls): + return "https://raw.githubusercontent.com/Anime-Lists/anime-lists/refs/heads/master/anime-list.xml" + + class AnimeEntry(BaseXmlModel, tag="anime"): + anidbid: str = attr() + tvdbid: str | None = attr(default=None) + defaulttvdbseason: int | Literal["a"] | None = attr(default=None) + episodeoffset: int = attr(default=0) + tmdbtv: str | None = attr(default=None) + tmdbid: str | None = attr(default=None) + imdbid: str | None = attr(default=None) + name: str | None = element(default=None) + mappings: list[EpisodeMapping] = wrapped( + "mapping-list", element(default=[], tag="mapping") + ) + + @field_validator("tvdbid", "tmdbtv", "tmdbid", "imdbid", "defaulttvdbseason") + @classmethod + def _empty_to_none(cls, v: str | None) -> str | None: + # pornographic titles have this id. + if v == "hentai" or v == "": + return None + return v + + class EpisodeMapping(BaseXmlModel): + anidbseason: int = attr() + tvdbseason: int | None = attr(default=None) + start: int | None = attr(default=None) + end: int | None = attr(default=None) + offset: int = attr(default=0) + text: str | None = None + + @cached_property + def tvdb_mappings(self) -> dict[int, list[int]]: + if self.tvdbseason is None or not self.text: + return {} + ret = {} + for map in self.text.split(";"): + map = map.strip() + if not map or "-" not in map: + continue + [aid, tvdbids] = map.split("-", 1) + try: + ret[int(aid.strip())] = [ + int(x.strip()) for x in tvdbids.split("+") + ] + except ValueError: + continue + return ret + + +@dataclass +class AnimeListData: + fetched_at: datetime + # normalized title -> anidbid + titles: dict[str, str] = field(default_factory=dict) + # anidbid -> AnimeEntry + animes: dict[str, AnimeListDb.AnimeEntry] = field(default_factory=dict) + # tvdbid -> anidbid + tvdb_anidb: dict[str, list[str]] = field(default_factory=dict) + + +@cache(ttl=timedelta(days=30)) +async def get_anilist_data() -> AnimeListData: + logger.info("Fetching anime-lists XML databases...") + ret = AnimeListData(fetched_at=datetime.now()) + async with ClientSession( + headers={ + "User-Agent": "kyoo scanner v5", + }, + ) as session: + async with session.get(AnimeTitlesDb.get_url()) as resp: + resp.raise_for_status() + titles = AnimeTitlesDb.from_xml(await resp.read()) + ret.titles = { + normalize_title(title.text): x.aid + for x in titles.animes + for title in x.titles + } + async with session.get(AnimeListDb.get_url()) as resp: + resp.raise_for_status() + db = AnimeListDb.from_xml(await resp.read()) + ret.animes = {entry.anidbid: entry for entry in db.animes} + ret.tvdb_anidb = defaultdict(list) + for entry in db.animes: + if not entry.tvdbid: + continue + ret.tvdb_anidb[entry.tvdbid].append(entry.anidbid) + + logger.info( + "Loaded %d anime titles from animelist-xml.", + len(ret.titles), + ) + return ret + + +def normalize_title(title: str) -> str: + title = unicodedata.normalize("NFD", title) + title = "".join(c for c in title if unicodedata.category(c) != "Mn") + title = title.lower() + title = re.sub(r"[^\w\s]", "", title) + title = re.sub(r"\s+", " ", title).strip() + return title + + +def anidb_to_tvdb( + anime: AnimeListDb.AnimeEntry, + anidb_ep: int, +) -> tuple[int | None, list[int]]: + for map in anime.mappings: + if map.anidbseason != 1 or map.tvdbseason is None: + continue + + # Handle mapping overrides (;anidb-tvdb; format) + if anidb_ep in map.tvdb_mappings: + tvdb_eps = map.tvdb_mappings[anidb_ep] + # Mapped to 0 means no TVDB equivalent + if tvdb_eps[0] == 0: + return (None, []) + return (map.tvdbseason, tvdb_eps) + + # Check start/end range with offset + if ( + map.start is not None + and map.end is not None + and map.start <= anidb_ep <= map.end + ): + return (map.tvdbseason, [anidb_ep + map.offset]) + + if anime.defaulttvdbseason == "a": + return (None, [anidb_ep]) + return (anime.defaulttvdbseason, [anidb_ep + anime.episodeoffset]) + + +def tvdb_to_anidb( + animes: list[AnimeListDb.AnimeEntry], + tvdb_season: int, + tvdb_ep: int, +) -> list[tuple[AnimeListDb.AnimeEntry, int, int]]: + for anime in animes: + for map in anime.mappings: + if map.tvdbseason != tvdb_season: + continue + + # Handle mapping overrides (;anidb-tvdb; format) + overrides = [ + anidb_num + for anidb_num, tvdb_nums in map.tvdb_mappings.items() + if tvdb_ep in tvdb_nums + ] + if len(overrides): + return [(anime, map.anidbseason, ep) for ep in overrides] + + if map.start is not None and map.end is not None: + candidate = tvdb_ep - map.offset + if map.start <= candidate <= map.end: + return [(anime, map.anidbseason, candidate)] + + seasons = sorted( + ( + x + for x in animes + if x.defaulttvdbseason == tvdb_season and x.episodeoffset < tvdb_ep + ), + key=lambda x: x.episodeoffset, + reverse=True, + ) + + fallback = next( + iter(seasons), + next( + (x for x in animes if x.defaulttvdbseason == "a"), + animes[0], + ), + ) + + return [(fallback, 1, tvdb_ep - fallback.episodeoffset)] + + +async def identify_anilist(_path: str, guess: Guess) -> Guess: + data = await get_anilist_data() + + aid = data.titles.get(normalize_title(guess.title)) + if aid is None: + return guess + anime = data.animes.get(aid) + if anime is None: + return guess + + new_external_id = dict(guess.external_id) + new_external_id[ProviderName.ANIDB] = aid + if anime.tvdbid: + new_external_id[ProviderName.TVDB] = anime.tvdbid + # tmdbtv is for TV series, tmdbid is for standalone movies + if anime.tmdbtv: + new_external_id[ProviderName.TMDB] = anime.tmdbtv + elif anime.tmdbid and "," not in anime.tmdbid: + new_external_id[ProviderName.TMDB] = anime.tmdbid + if anime.imdbid and "," not in anime.imdbid: + new_external_id[ProviderName.IMDB] = anime.imdbid + + # if we don't have a single external id, skip it and use the normal flow + if len(new_external_id) == 1: + return guess + + logger.info( + "Matched '%s' to AniDB id %s (tvdb=%s, tmdbid=%s)", + guess.title, + aid, + anime.tvdbid, + anime.tmdbid, + ) + + animes = ( + [data.animes[id] for id in data.tvdb_anidb.get(anime.tvdbid, [])] + if anime.tvdbid + else [] + ) + new_title = next( + (x.name for x in animes if x.defaulttvdbseason == 1), + next( + (x.name for x in animes if x.defaulttvdbseason == "a"), + anime.name, + ), + ) + + new_episodes: list[Guess.Episode] = [] + for ep in guess.episodes: + if ( + anime.tvdbid is None + or anime.defaulttvdbseason is None + or anime.defaulttvdbseason == 1 + ): + new_episodes.append( + Guess.Episode( + season=ep.season or (1 if anime.defaulttvdbseason else None), + episode=ep.episode, + ) + ) + continue + + # guess numbers are anidb-relative if defaulttvdbseason != 1 because + # the title already contains season information. + tvdb_season, tvdb_eps = anidb_to_tvdb(anime, ep.episode) + new_episodes += [ + Guess.Episode( + season=tvdb_season, + episode=tvdb_ep, + ) + for tvdb_ep in tvdb_eps + ] + + kind = guess.kind + if ( + guess.kind == "movie" + and anime.tvdbid + and isinstance(anime.defaulttvdbseason, int) + ): + kind = "episode" + new_episodes.append( + Guess.Episode( + season=anime.defaulttvdbseason, + episode=1 + anime.episodeoffset, + ) + ) + + return Guess( + title=new_title or guess.title, + kind=kind, + extra_kind=guess.extra_kind, + years=guess.years, + episodes=new_episodes, + external_id=new_external_id, + raw=guess.raw, + from_="anilist", + history=[*guess.history, guess], + ) + + +async def anilist_enrich_ids(serie: Serie): + data = await get_anilist_data() + animes = [ + data.animes[aid] + for tvdb_id in serie.external_id[ProviderName.TVDB] + for aid in data.tvdb_anidb.get(tvdb_id.data_id, []) + ] + if not animes: + return serie + + serie.external_id[ProviderName.ANIDB] = [ + MetadataId( + data_id=anime.anidbid, + link=f"https://anidb.net/anime/{anime.anidbid}", + label=anime.name, + ) + for anime in animes + ] + + for season in serie.seasons: + season.external_id[ProviderName.ANIDB] = [ + SeasonId( + serie_id=anime.anidbid, + season=1, + link=f"https://anidb.net/anime/{anime.anidbid}", + label=anime.name, + ) + for anime in animes + if anime.defaulttvdbseason == season.season_number + or anime.defaulttvdbseason == "a" + ] + + for entry in serie.entries: + season = entry.season_number or 0 + episode = entry.episode_number or entry.number + if episode is None: + continue + entry.external_id[ProviderName.ANIDB] = [ + EpisodeId(serie_id=anime.anidbid, season=season, episode=ep) + for anime, season, ep in tvdb_to_anidb(animes, season, episode) + ] + + return serie diff --git a/scanner/scanner/identifiers/guess/guess.py b/scanner/scanner/identifiers/guess/guess.py index c1a2b32f..2b86ba63 100644 --- a/scanner/scanner/identifiers/guess/guess.py +++ b/scanner/scanner/identifiers/guess/guess.py @@ -21,7 +21,7 @@ def guessit( { "episode_prefer_number": True, "excludes": "language", - "expected_title": expected_titles, + "expected_titles": expected_titles, "enforce_list": True, "advanced": True, } diff --git a/scanner/scanner/identifiers/guess/rules.py b/scanner/scanner/identifiers/guess/rules.py index 9c38c0b9..7a55a655 100644 --- a/scanner/scanner/identifiers/guess/rules.py +++ b/scanner/scanner/identifiers/guess/rules.py @@ -76,6 +76,95 @@ class UnlistTitles(Rule): return [titles, [title]] +class ExpectedTitles(Rule): + """Fix both alternate names and seasons that are known titles but parsed differently by guessit + + Example: "JoJo's Bizarre Adventure - Diamond is Unbreakable - 12.mkv" + Default: + ```json + { + "title": "JoJo's Bizarre Adventure", + "alternative_title": "Diamond is Unbreakable", + "episode": 12, + } + ``` + Expected: + ```json + { + "title": "JoJo's Bizarre Adventure - Diamond is Unbreakable", + "episode": 12, + } + ``` + + Or + Example: 'Owarimonogatari S2 E15.mkv' + Default: + ```json + { + "title": "Owarimonogatari", + "season": 2, + "episode": 15 + } + ``` + Expected: + ```json + { + "title": "Owarimonogatari S2", + "episode": 15 + } + ``` + """ + + priority = POST_PROCESS + consequence = [RemoveMatch, AppendMatch] + + @override + def when(self, matches: Matches, context) -> Any: + from ..anilist import normalize_title + + titles: list[Match] = matches.named("title", lambda m: m.tagged("title")) # type: ignore + + if not titles or not context["expected_titles"]: + return + title = titles[0] + + # Greedily collect all adjacent matches that could be part of the title + absorbed: list[Match] = [] + current = title + while True: + nmatch: list[Match] = matches.next(current) + if not nmatch or not ( + nmatch[0].tagged("title") + or nmatch[0].named("season") + or nmatch[0].named("part") + ): + break + absorbed.append(nmatch[0]) + current = nmatch[0] + if not absorbed: + return + + # Try longest combined title first, then progressively shorter ones + for end in range(len(absorbed), 0, -1): + candidate_matches = absorbed[:end] + + mtitle = f"{title.value}" + prev = title + for m in candidate_matches: + holes: list[Match] = matches.holes(prev.end, m.start) # type: ignore + hole = "".join( + f" {h.value}" if h.value != "-" else " - " for h in holes + ) + mtitle = f"{mtitle}{hole}{m.value}" + prev = m + + if normalize_title(mtitle) in context["expected_titles"]: + new_title = copy(title) + new_title.end = candidate_matches[-1].end + new_title.value = mtitle + return [[title] + candidate_matches, [new_title]] + + class MultipleSeasonRule(Rule): """Understand `abcd Season 2 - 5.mkv` as S2E5 diff --git a/scanner/scanner/identifiers/identify.py b/scanner/scanner/identifiers/identify.py index 0c3187f4..41cc7db5 100644 --- a/scanner/scanner/identifiers/identify.py +++ b/scanner/scanner/identifiers/identify.py @@ -7,19 +7,23 @@ from typing import Callable, Literal, cast from rebulk.match import Match from ..models.videos import Guess, Video +from .anilist import get_anilist_data, identify_anilist from .guess.guess import guessit logger = getLogger(__name__) pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [ + identify_anilist, # TODO: add nfo scanner # TODO: add thexem - # TODO: add anilist ] async def identify(path: str) -> Video: - raw = guessit(path, expected_titles=[]) + raw = guessit( + path, + expected_titles=list((await get_anilist_data()).titles.keys()), + ) # guessit should only return one (according to the doc) title = raw.get("title", [])[0] diff --git a/scanner/scanner/models/collection.py b/scanner/scanner/models/collection.py index 1229357b..8e602b5d 100644 --- a/scanner/scanner/models/collection.py +++ b/scanner/scanner/models/collection.py @@ -10,7 +10,7 @@ class Collection(Model): original_language: Language | None genres: list[Genre] rating: int | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] translations: dict[Language, CollectionTranslation] = {} diff --git a/scanner/scanner/models/entry.py b/scanner/scanner/models/entry.py index cbc0bcd9..b8f4071f 100644 --- a/scanner/scanner/models/entry.py +++ b/scanner/scanner/models/entry.py @@ -26,7 +26,7 @@ class Entry(Model): # Special-specific fields number: int | None - external_id: dict[str, MetadataId | EpisodeId] + external_id: dict[str, list[MetadataId | EpisodeId]] translations: dict[Language, EntryTranslation] = {} videos: list[str] = [] extra: dict[str, Any] = Field(exclude=True) diff --git a/scanner/scanner/models/metadataid.py b/scanner/scanner/models/metadataid.py index 00d1a16a..8cf09b2f 100644 --- a/scanner/scanner/models/metadataid.py +++ b/scanner/scanner/models/metadataid.py @@ -6,19 +6,26 @@ from ..utils import Model class MetadataId(Model): data_id: str link: str | None = None + label: str | None = None @classmethod - def map_dict(cls, self: dict[str, MetadataId]): - return {k: v.data_id for k, v in self.items()} + def map_dict(cls, self: dict[str, list[MetadataId]]): + return {k: v[0].data_id for k, v in self.items()} @classmethod def merge( - cls, self: dict[str, MetadataId], other: dict[str, MetadataId] - ) -> dict[str, MetadataId]: + cls, + self: dict[str, list[MetadataId]], + other: dict[str, list[MetadataId]], + ) -> dict[str, list[MetadataId]]: ret = other | self for k in set(self.keys()) & set(other.keys()): - if ret[k].data_id == other[k].data_id and ret[k].link is None: - ret[k].link = other[k].link + for x in ret[k]: + if x.link is not None: + continue + o = next((ox for ox in other[k] if ox.data_id == x.data_id), None) + if o: + x = o.link return ret @@ -26,6 +33,7 @@ class SeasonId(Model): serie_id: str season: int link: str | None = None + label: str | None = None class EpisodeId(Model): diff --git a/scanner/scanner/models/movie.py b/scanner/scanner/models/movie.py index ca50f205..ddf4d3fd 100644 --- a/scanner/scanner/models/movie.py +++ b/scanner/scanner/models/movie.py @@ -26,7 +26,7 @@ class Movie(Model): runtime: int | None air_date: date | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] translations: dict[Language, MovieTranslation] = {} collection: Collection | None = None studios: list[Studio] = [] @@ -56,4 +56,4 @@ class SearchMovie(Model): air_date: date | None poster: str | None original_language: Language | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] diff --git a/scanner/scanner/models/season.py b/scanner/scanner/models/season.py index 0de0c0f1..4dd29dc3 100644 --- a/scanner/scanner/models/season.py +++ b/scanner/scanner/models/season.py @@ -13,7 +13,7 @@ class Season(Model): season_number: int start_air: date | None end_air: date | None - external_id: dict[str, SeasonId] + external_id: dict[str, list[SeasonId]] translations: dict[Language, SeasonTranslation] = {} extra: dict[str, Any] = Field(exclude=True) diff --git a/scanner/scanner/models/serie.py b/scanner/scanner/models/serie.py index cdf0402c..ea58646f 100644 --- a/scanner/scanner/models/serie.py +++ b/scanner/scanner/models/serie.py @@ -31,7 +31,7 @@ class Serie(Model): start_air: date | None end_air: date | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] translations: dict[Language, SerieTranslation] = {} seasons: list[Season] = [] entries: list[Entry] = [] @@ -64,4 +64,4 @@ class SearchSerie(Model): end_air: date | None poster: str | None original_language: Language | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] diff --git a/scanner/scanner/models/staff.py b/scanner/scanner/models/staff.py index dfe82bc3..5901e413 100644 --- a/scanner/scanner/models/staff.py +++ b/scanner/scanner/models/staff.py @@ -33,4 +33,4 @@ class Person(Model): name: str latin_name: str | None image: str | None - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] diff --git a/scanner/scanner/models/studio.py b/scanner/scanner/models/studio.py index eff7840b..6cdd1a60 100644 --- a/scanner/scanner/models/studio.py +++ b/scanner/scanner/models/studio.py @@ -6,7 +6,7 @@ from .metadataid import MetadataId class Studio(Model): slug: str - external_id: dict[str, MetadataId] + external_id: dict[str, list[MetadataId]] translations: dict[str, StudioTranslation] = {} diff --git a/scanner/scanner/providers/composite.py b/scanner/scanner/providers/composite.py index 9b44c031..cb16da1d 100644 --- a/scanner/scanner/providers/composite.py +++ b/scanner/scanner/providers/composite.py @@ -2,7 +2,9 @@ from typing import override from langcodes import Language +from scanner.identifiers.anilist import anilist_enrich_ids from scanner.models.metadataid import MetadataId +from scanner.providers.names import ProviderName from scanner.utils import uniq_by from ..models.movie import Movie, SearchMovie @@ -59,6 +61,8 @@ class CompositeProvider(Provider): ret.entries, lambda x: (x.season_number, x.episode_number, x.number, x.slug) ) + ret = await anilist_enrich_ids(ret) + # themoviedb has better global info than tvdb but tvdb has better entries info info = await self._themoviedb.get_serie( MetadataId.map_dict(ret.external_id), skip_entries=True diff --git a/scanner/scanner/providers/names.py b/scanner/scanner/providers/names.py index 704ccf27..a50cda53 100644 --- a/scanner/scanner/providers/names.py +++ b/scanner/scanner/providers/names.py @@ -2,3 +2,4 @@ class ProviderName: TMDB = "themoviedatabase" TVDB = "tvdb" IMDB = "imdb" + ANIDB = "anidb" diff --git a/scanner/scanner/providers/themoviedatabase.py b/scanner/scanner/providers/themoviedatabase.py index cadce35c..a861ccee 100644 --- a/scanner/scanner/providers/themoviedatabase.py +++ b/scanner/scanner/providers/themoviedatabase.py @@ -127,10 +127,12 @@ class TheMovieDatabase(Provider): poster=self._map_image(x["poster_path"]), original_language=Language.get(x["original_language"]), external_id={ - self.name: MetadataId( - data_id=str(x["id"]), - link=f"https://www.themoviedb.org/movie/{x['id']}", - ) + self.name: [ + MetadataId( + data_id=str(x["id"]), + link=f"https://www.themoviedb.org/movie/{x['id']}", + ) + ] }, ) for x in search @@ -163,17 +165,21 @@ class TheMovieDatabase(Provider): else None, external_id=( { - self.name: MetadataId( - data_id=str(movie["id"]), - link=f"https://www.themoviedb.org/movie/{movie['id']}", - ) + self.name: [ + MetadataId( + data_id=str(movie["id"]), + link=f"https://www.themoviedb.org/movie/{movie['id']}", + ) + ] } | ( { - ProviderName.IMDB: MetadataId( - data_id=str(movie["imdb_id"]), - link=f"https://www.imdb.com/title/{movie['imdb_id']}", - ) + ProviderName.IMDB: [ + MetadataId( + data_id=str(movie["imdb_id"]), + link=f"https://www.imdb.com/title/{movie['imdb_id']}", + ) + ] } if movie["imdb_id"] else {} @@ -256,10 +262,12 @@ class TheMovieDatabase(Provider): poster=self._map_image(x["poster_path"]), original_language=Language.get(x["original_language"]), external_id={ - self.name: MetadataId( - data_id=str(x["id"]), - link=f"https://www.themoviedb.org/tv/{x['id']}", - ) + self.name: [ + MetadataId( + data_id=str(x["id"]), + link=f"https://www.themoviedb.org/tv/{x['id']}", + ) + ] }, ) for x in search @@ -317,27 +325,33 @@ class TheMovieDatabase(Provider): if serie["last_air_date"] else None, external_id={ - self.name: MetadataId( - data_id=str((serie["id"])), - link=f"https://www.themoviedb.org/tv/{serie['id']}", - ), + self.name: [ + MetadataId( + data_id=str((serie["id"])), + link=f"https://www.themoviedb.org/tv/{serie['id']}", + ) + ], } | ( { - ProviderName.IMDB: MetadataId( - data_id=str(serie["external_ids"]["imdb_id"]), - link=f"https://www.imdb.com/title/{serie['external_ids']['imdb_id']}", - ) + ProviderName.IMDB: [ + MetadataId( + data_id=str(serie["external_ids"]["imdb_id"]), + link=f"https://www.imdb.com/title/{serie['external_ids']['imdb_id']}", + ) + ] } if serie["external_ids"]["imdb_id"] else {} ) | ( { - ProviderName.TVDB: MetadataId( - data_id=str(serie["external_ids"]["tvdb_id"]), - link=None, - ) + ProviderName.TVDB: [ + MetadataId( + data_id=str(serie["external_ids"]["tvdb_id"]), + link=None, + ) + ] } if serie["external_ids"]["tvdb_id"] else {} @@ -408,11 +422,13 @@ class TheMovieDatabase(Provider): else None, end_air=None, external_id={ - self.name: SeasonId( - serie_id=str(serie_id), - season=season["season_number"], - link=f"https://www.themoviedb.org/tv/{serie_id}/season/{season['season_number']}", - ) + self.name: [ + SeasonId( + serie_id=str(serie_id), + season=season["season_number"], + link=f"https://www.themoviedb.org/tv/{serie_id}/season/{season['season_number']}", + ) + ] }, translations={ Language.get( @@ -543,12 +559,14 @@ class TheMovieDatabase(Provider): episode_number=episode["episode_number"], number=episode["episode_number"], external_id={ - self.name: EpisodeId( - serie_id=str(serie_id), - season=episode["season_number"], - episode=episode["episode_number"], - link=f"https://www.themoviedb.org/tv/{serie_id}/season/{episode['season_number']}/episode/{episode['episode_number']}", - ), + self.name: [ + EpisodeId( + serie_id=str(serie_id), + season=episode["season_number"], + episode=episode["episode_number"], + link=f"https://www.themoviedb.org/tv/{serie_id}/season/{episode['season_number']}/episode/{episode['episode_number']}", + ) + ], }, translations={ Language.get( @@ -583,10 +601,12 @@ class TheMovieDatabase(Provider): mean(float(x["vote_average"]) * 10 for x in collection["parts"]) ), external_id={ - self.name: MetadataId( - data_id=str(collection["id"]), - link=f"https://www.themoviedb.org/collection/{collection['id']}", - ) + self.name: [ + MetadataId( + data_id=str(collection["id"]), + link=f"https://www.themoviedb.org/collection/{collection['id']}", + ) + ] }, translations={ Language.get( @@ -685,10 +705,12 @@ class TheMovieDatabase(Provider): return Studio( slug=to_slug(company["name"]), external_id={ - self.name: MetadataId( - data_id=str(company["id"]), - link=f"https://www.themoviedb.org/company/{company['id']}", - ) + self.name: [ + MetadataId( + data_id=str(company["id"]), + link=f"https://www.themoviedb.org/company/{company['id']}", + ) + ] }, translations={ "en": StudioTranslation( @@ -714,10 +736,12 @@ class TheMovieDatabase(Provider): latin_name=person["name"], image=self._map_image(person["profile_path"]), external_id={ - self.name: MetadataId( - data_id=str(person["id"]), - link=f"https://www.themoviedb.org/person/{person['id']}", - ) + self.name: [ + MetadataId( + data_id=str(person["id"]), + link=f"https://www.themoviedb.org/person/{person['id']}", + ) + ] }, ), ) diff --git a/scanner/scanner/providers/thetvdb.py b/scanner/scanner/providers/thetvdb.py index e9032d01..e075dff0 100644 --- a/scanner/scanner/providers/thetvdb.py +++ b/scanner/scanner/providers/thetvdb.py @@ -181,10 +181,12 @@ class TVDB(Provider): poster=x["image_url"], original_language=Language.get(x["primary_language"]), external_id={ - self.name: MetadataId( - data_id=str(x["tvdb_id"]), - link=f"https://thetvdb.com/series/{x['slug']}", - ), + self.name: [ + MetadataId( + data_id=str(x["tvdb_id"]), + link=f"https://thetvdb.com/series/{x['slug']}", + ) + ], }, ) for x in ret["data"] @@ -234,10 +236,12 @@ class TVDB(Provider): start_air=datetime.strptime(ret["firstAired"], "%Y-%m-%d").date(), end_air=datetime.strptime(ret["lastAired"], "%Y-%m-%d").date(), external_id={ - self.name: MetadataId( - data_id=ret["id"], - link=f"https://thetvdb.com/series/{ret['slug']}", - ), + self.name: [ + MetadataId( + data_id=ret["id"], + link=f"https://thetvdb.com/series/{ret['slug']}", + ) + ], **self._process_remote_id(ret["remoteIds"]), }, translations={ @@ -332,7 +336,7 @@ class TVDB(Provider): def _process_remote_id( self, ids: list[dict[str, Any]] | None - ) -> dict[str, MetadataId]: + ) -> dict[str, list[MetadataId]]: # sometimes `remoteIds` is not even part of the response. if ids is None: return {} @@ -341,11 +345,11 @@ class TVDB(Provider): imdb = next((x["id"] for x in ids if x["sourceName"] == "IMDB"), None) if imdb is not None: - ret[ProviderName.IMDB] = MetadataId(data_id=imdb) + ret[ProviderName.IMDB] = [MetadataId(data_id=imdb)] tmdb = next((x["id"] for x in ids if x["sourceName"] == "TheMovieDB.com"), None) if tmdb is not None: - ret[ProviderName.TMDB] = MetadataId(data_id=tmdb) + ret[ProviderName.TMDB] = [MetadataId(data_id=tmdb)] return ret @@ -421,10 +425,12 @@ class TVDB(Provider): ], rating=None, external_id={ - self.name: MetadataId( - data_id=data["id"], - link=f"https://thetvdb.com/lists/{data['url']}", - ) + self.name: [ + MetadataId( + data_id=data["id"], + link=f"https://thetvdb.com/lists/{data['url']}", + ) + ] }, translations={ Language.get(lang): tl @@ -472,10 +478,12 @@ class TVDB(Provider): default=None, ), external_id={ - self.name: SeasonId( - serie_id=info["seriesId"], - season=info["number"], - ), + self.name: [ + SeasonId( + serie_id=info["seriesId"], + season=info["number"], + ) + ], }, translations={Language.get(lang): tl for lang, tl in zip(languages, trans)}, extra={}, @@ -520,12 +528,14 @@ class TVDB(Provider): episode_number=entry["number"], number=entry["number"], external_id={ - self.name: EpisodeId( - serie_id=str(serie_id), - season=entry["seasonNumber"], - episode=entry["number"], - link=f"https://thetvdb.com/series/{serie_id}/episodes/{entry['id']}", - ), + self.name: [ + EpisodeId( + serie_id=str(serie_id), + season=entry["seasonNumber"], + episode=entry["number"], + link=f"https://thetvdb.com/series/{serie_id}/episodes/{entry['id']}", + ) + ], }, translations={ Language.get(lang): EntryTranslation( @@ -672,11 +682,13 @@ class TVDB(Provider): for trans in ret["translations"]["nameTranslations"] if trans.get("isAlias") is None or False } - entry.external_id = { - self.name: MetadataId( - data_id=ret["id"], - link=f"https://thetvdb.com/movies/{ret['slug']}", - ), + entry.external_id = { # pyright: ignore[reportAttributeAccessIssue] + self.name: [ + MetadataId( + data_id=ret["id"], + link=f"https://thetvdb.com/movies/{ret['slug']}", + ) + ], **self._process_remote_id(ret["remoteIds"]), } @@ -731,10 +743,12 @@ class TVDB(Provider): if ret.get("first_release") and ret["first_release"].get("date") else None, external_id={ - self.name: MetadataId( - data_id=ret["id"], - link=f"https://thetvdb.com/series/{ret['slug']}", - ), + self.name: [ + MetadataId( + data_id=ret["id"], + link=f"https://thetvdb.com/series/{ret['slug']}", + ) + ], **self._process_remote_id(ret["remoteIds"]), }, translations={ diff --git a/scanner/uv.lock b/scanner/uv.lock index 47446e7e..92784083 100644 --- a/scanner/uv.lock +++ b/scanner/uv.lock @@ -1259,6 +1259,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, ] +[[package]] +name = "pydantic-xml" +version = "2.19.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b8/cb/5f80b61d73a8d6171ee4611bfd2b944c036c6f6e5f6e01d9fb02f29d7bfc/pydantic_xml-2.19.0.tar.gz", hash = "sha256:b7acba5a0966cbbbc9bf88d0d870b2bc875da063fe1bbe62d83939b549224730", size = 26228, upload-time = "2026-02-14T17:33:53.368Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/2d/dce0dc471fade04829c2948462d79c9bc4991305b0f73889f70c9645e540/pydantic_xml-2.19.0-py3-none-any.whl", hash = "sha256:42854bf962758bec338c112c2de984723708262793e108416f33aa4d6c11b3b4", size = 42536, upload-time = "2026-02-14T17:33:54.206Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1482,6 +1495,7 @@ dependencies = [ { name = "opentelemetry-instrumentation-fastapi" }, { name = "opentelemetry-sdk" }, { name = "pydantic" }, + { name = "pydantic-xml" }, { name = "pyjwt", extra = ["crypto"] }, { name = "python-slugify" }, { name = "watchfiles" }, @@ -1502,6 +1516,7 @@ requires-dist = [ { name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.59b0" }, { name = "opentelemetry-sdk", specifier = ">=1.38.0" }, { name = "pydantic", specifier = ">=2.11.4" }, + { name = "pydantic-xml", specifier = ">=2.14.0" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.10.1" }, { name = "python-slugify", specifier = ">=8.0.4" }, { name = "watchfiles", specifier = ">=1.0.5" },