mirror of
https://github.com/zoriya/Kyoo.git
synced 2026-03-22 17:37:49 -04:00
Parse animelist.xml
This commit is contained in:
parent
1d0c8a81ed
commit
3202c01767
@ -18,6 +18,7 @@ dependencies = [
|
||||
"opentelemetry-instrumentation-fastapi>=0.59b0",
|
||||
"opentelemetry-sdk>=1.38.0",
|
||||
"pydantic>=2.11.4",
|
||||
"pydantic-xml>=2.14.0",
|
||||
"pyjwt[crypto]>=2.10.1",
|
||||
"python-slugify>=8.0.4",
|
||||
"watchfiles>=1.0.5",
|
||||
|
||||
182
scanner/scanner/identifiers/anilist.py
Normal file
182
scanner/scanner/identifiers/anilist.py
Normal file
@ -0,0 +1,182 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from functools import cached_property
|
||||
from logging import getLogger
|
||||
from typing import Literal
|
||||
|
||||
from aiohttp import ClientSession
|
||||
from pydantic import field_validator
|
||||
from pydantic_xml import BaseXmlModel, attr, element
|
||||
|
||||
from ..cache import cache
|
||||
from ..models.metadataid import EpisodeId, MetadataId
|
||||
from ..models.videos import Guess
|
||||
from ..providers.names import ProviderName
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class AnimeTitlesDb(BaseXmlModel, tag="animetitles"):
|
||||
animes: list[AnimeTitlesEntry] = element(default=[])
|
||||
|
||||
@classmethod
|
||||
def get_url(cls):
|
||||
return "https://raw.githubusercontent.com/Anime-Lists/anime-lists/master/animetitles.xml"
|
||||
|
||||
class AnimeTitlesEntry(BaseXmlModel, tag="anime"):
|
||||
aid: str = attr()
|
||||
titles: list[AnimeTitle] = element(default=[])
|
||||
|
||||
class AnimeTitle(
|
||||
BaseXmlModel,
|
||||
tag="title",
|
||||
nsmap={"xml": "http://www.w3.org/XML/1998/namespace"},
|
||||
):
|
||||
type: str = attr()
|
||||
lang: str = attr(ns="xml")
|
||||
text: str
|
||||
|
||||
|
||||
class AnimeListDb(BaseXmlModel, tag="anime-list"):
|
||||
animes: list[AnimeEntry] = element(default=[])
|
||||
|
||||
@classmethod
|
||||
def get_url(cls):
|
||||
return "https://raw.githubusercontent.com/Anime-Lists/anime-lists/refs/heads/master/anime-list.xml"
|
||||
|
||||
class AnimeEntry(BaseXmlModel, tag="anime"):
|
||||
anidbid: str = attr()
|
||||
tvdbid: str | None = attr(default=None)
|
||||
defaulttvdbseason: int | Literal["a"] | None = attr(default=None)
|
||||
episodeoffset: int = attr(default=0)
|
||||
tmdbid: str | None = attr(default=None)
|
||||
imdbid: str | None = attr(default=None)
|
||||
name: str | None = element(default=None)
|
||||
mapping_list: MappingList | None = element(default=[])
|
||||
|
||||
@field_validator("tmdbid", "imdbid")
|
||||
@classmethod
|
||||
def _empty_to_none(cls, v: str | None) -> str | None:
|
||||
return v or None
|
||||
|
||||
class MappingList(BaseXmlModel, tag="mapping-list"):
|
||||
mappings: list[EpisodeMapping] = element(default=[])
|
||||
|
||||
class EpisodeMapping(BaseXmlModel):
|
||||
anidbseason: int = attr()
|
||||
tvdbseason: int | None = attr(default=None)
|
||||
start: int | None = attr(default=None)
|
||||
end: int | None = attr(default=None)
|
||||
offset: int = attr(default=0)
|
||||
text: str | None = None
|
||||
|
||||
@cached_property
|
||||
def tvdb_mappings(self) -> dict[int, list[int]]:
|
||||
if self.tvdbseason is None or not self.text:
|
||||
return {}
|
||||
ret = {}
|
||||
for map in self.text.split(";"):
|
||||
map = map.strip()
|
||||
if not map or "-" not in map:
|
||||
continue
|
||||
[aid, tvdbids] = map.split("-", 1)
|
||||
try:
|
||||
ret[int(aid.strip())] = [
|
||||
int(x.strip()) for x in tvdbids.split("+")
|
||||
]
|
||||
except ValueError:
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnimeListData:
|
||||
fetched_at: datetime
|
||||
# normalized title -> anidbid
|
||||
titles: dict[str, str] = {}
|
||||
# anidbid -> AnimeEntry
|
||||
animes: dict[str, AnimeListDb.AnimeEntry] = {}
|
||||
|
||||
|
||||
@cache(ttl=timedelta(days=30))
|
||||
async def get_data() -> AnimeListData:
|
||||
logger.info("Fetching anime-lists XML databases...")
|
||||
ret = AnimeListData(fetched_at=datetime.now())
|
||||
async with ClientSession() as session:
|
||||
async with session.get(AnimeTitlesDb.get_url()) as resp:
|
||||
resp.raise_for_status()
|
||||
titles = AnimeTitlesDb.from_xml(await resp.read())
|
||||
ret.titles = {
|
||||
normalize_title(title.text): x.aid
|
||||
for x in titles.animes
|
||||
for title in x.titles
|
||||
}
|
||||
async with session.get(AnimeListDb.get_url()) as resp:
|
||||
resp.raise_for_status()
|
||||
db = AnimeListDb.from_xml(await resp.read())
|
||||
ret.animes = {entry.anidbid: entry for entry in db.animes}
|
||||
|
||||
logger.info(
|
||||
"Loaded %d anime titles from animelist-xml.",
|
||||
len(ret.titles),
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
def normalize_title(title: str) -> str:
|
||||
title = unicodedata.normalize("NFD", title)
|
||||
title = "".join(c for c in title if unicodedata.category(c) != "Mn")
|
||||
title = title.lower()
|
||||
title = re.sub(r"[^\w\s]", "", title)
|
||||
title = re.sub(r"\s+", " ", title).strip()
|
||||
return title
|
||||
|
||||
|
||||
async def anilist(_path: str, guess: Guess) -> Guess:
|
||||
data = await get_data()
|
||||
|
||||
aid = data.titles.get(guess.title)
|
||||
if aid is None:
|
||||
return guess
|
||||
anime = data.animes.get(aid)
|
||||
if anime is None:
|
||||
logger.warning("AniDB id %s found in titles but not in anime-list.xml", aid)
|
||||
return guess
|
||||
|
||||
logger.info(
|
||||
"Matched '%s' to AniDB id %s (tvdb=%s, tmdbid=%s)",
|
||||
guess.title,
|
||||
aid,
|
||||
anime.tvdbid,
|
||||
anime.tmdbid,
|
||||
)
|
||||
|
||||
new_external_id = dict(guess.external_id)
|
||||
new_external_id[ProviderName.ANIDB] = aid
|
||||
if anime.tvdbid:
|
||||
new_external_id[ProviderName.TVDB] = anime.tvdbid
|
||||
if anime.tmdbid:
|
||||
new_external_id[ProviderName.TMDB] = anime.tmdbid
|
||||
if anime.imdbid:
|
||||
new_external_id[ProviderName.IMDB] = anime.imdbid
|
||||
|
||||
new_episodes: list[Guess.Episode] = []
|
||||
for ep in guess.episodes:
|
||||
# TODO: implement this
|
||||
...
|
||||
|
||||
return Guess(
|
||||
title=guess.title,
|
||||
kind=guess.kind,
|
||||
extra_kind=guess.extra_kind,
|
||||
years=guess.years,
|
||||
episodes=new_episodes,
|
||||
external_id=new_external_id,
|
||||
raw=guess.raw,
|
||||
from_="anilist",
|
||||
history=[*guess.history, guess],
|
||||
)
|
||||
@ -7,14 +7,15 @@ from typing import Callable, Literal, cast
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..models.videos import Guess, Video
|
||||
from .anilist import anilist
|
||||
from .guess.guess import guessit
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
|
||||
anilist,
|
||||
# TODO: add nfo scanner
|
||||
# TODO: add thexem
|
||||
# TODO: add anilist
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ class Guess(Model, extra="allow"):
|
||||
class Episode(Model):
|
||||
season: int | None
|
||||
episode: int
|
||||
external_id: dict[str, MetadataId | EpisodeId] = {}
|
||||
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
|
||||
@ -2,3 +2,4 @@ class ProviderName:
|
||||
TMDB = "themoviedatabase"
|
||||
TVDB = "tvdb"
|
||||
IMDB = "imdb"
|
||||
ANIDB = "anidb"
|
||||
|
||||
15
scanner/uv.lock
generated
15
scanner/uv.lock
generated
@ -1259,6 +1259,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic-xml"
|
||||
version = "2.19.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-core" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b8/cb/5f80b61d73a8d6171ee4611bfd2b944c036c6f6e5f6e01d9fb02f29d7bfc/pydantic_xml-2.19.0.tar.gz", hash = "sha256:b7acba5a0966cbbbc9bf88d0d870b2bc875da063fe1bbe62d83939b549224730", size = 26228, upload-time = "2026-02-14T17:33:53.368Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/27/2d/dce0dc471fade04829c2948462d79c9bc4991305b0f73889f70c9645e540/pydantic_xml-2.19.0-py3-none-any.whl", hash = "sha256:42854bf962758bec338c112c2de984723708262793e108416f33aa4d6c11b3b4", size = 42536, upload-time = "2026-02-14T17:33:54.206Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.2"
|
||||
@ -1482,6 +1495,7 @@ dependencies = [
|
||||
{ name = "opentelemetry-instrumentation-fastapi" },
|
||||
{ name = "opentelemetry-sdk" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-xml" },
|
||||
{ name = "pyjwt", extra = ["crypto"] },
|
||||
{ name = "python-slugify" },
|
||||
{ name = "watchfiles" },
|
||||
@ -1502,6 +1516,7 @@ requires-dist = [
|
||||
{ name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.59b0" },
|
||||
{ name = "opentelemetry-sdk", specifier = ">=1.38.0" },
|
||||
{ name = "pydantic", specifier = ">=2.11.4" },
|
||||
{ name = "pydantic-xml", specifier = ">=2.14.0" },
|
||||
{ name = "pyjwt", extras = ["crypto"], specifier = ">=2.10.1" },
|
||||
{ name = "python-slugify", specifier = ">=8.0.4" },
|
||||
{ name = "watchfiles", specifier = ">=1.0.5" },
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user