diff --git a/scanner/providers/implementations/thexem.py b/scanner/providers/implementations/thexem.py index 63b21a60..b0a971de 100644 --- a/scanner/providers/implementations/thexem.py +++ b/scanner/providers/implementations/thexem.py @@ -124,3 +124,21 @@ class TheXem: # Only tvdb has a proper absolute handling so we always use this one. return (ep[provider]["season"], ep[provider]["episode"], ep["tvdb"]["absolute"]) + + @cache(ttl=timedelta(days=1)) + async def get_expected_titles( + self, provider: Literal["tvdb"] | Literal["anidb"] = "tvdb" + ) -> list[str]: + map = await self.get_map(provider) + titles = [] + + def clean(s: str): + return s.lower().replace(" ", "") + + for x in map.values(): + # Only the first element is a string (the show name) so we need to ignore the type hint + master_show_name: str = x[0] # type: ignore + titles.append(clean(master_show_name)) + for y in x[1:]: + titles.extend(clean(name) for name in y.keys()) + return titles diff --git a/scanner/providers/provider.py b/scanner/providers/provider.py index 31e5d244..aef107d0 100644 --- a/scanner/providers/provider.py +++ b/scanner/providers/provider.py @@ -3,6 +3,7 @@ from aiohttp import ClientSession from abc import abstractmethod, abstractproperty from typing import Optional, TypeVar +from providers.implementations.thexem import TheXem from providers.utils import ProviderError from .types.show import Show @@ -19,15 +20,12 @@ class Provider: @classmethod def get_all( cls: type[Self], client: ClientSession, languages: list[str] - ) -> list[Self]: + ) -> tuple[list[Self], TheXem]: providers = [] from providers.idmapper import IdMapper idmapper = IdMapper() - - from providers.implementations.thexem import TheXem - xem = TheXem(client) from providers.implementations.themoviedatabase import TheMovieDatabase @@ -46,7 +44,7 @@ class Provider: idmapper.init(tmdb=tmdb, language=languages[0]) - return providers + return providers, xem @abstractproperty def name(self) -> str: diff --git a/scanner/scanner/__init__.py b/scanner/scanner/__init__.py index f4eb657b..6e3a7dd2 100644 --- a/scanner/scanner/__init__.py +++ b/scanner/scanner/__init__.py @@ -1,8 +1,3 @@ -from providers.utils import ProviderError -from .scanner import Scanner -from .monitor import monitor - - async def main(): import os import logging @@ -11,7 +6,9 @@ async def main(): from datetime import date from typing import Optional from aiohttp import ClientSession - from providers.utils import format_date + from providers.utils import format_date, ProviderError + from .scanner import Scanner + from .monitor import monitor path = os.environ.get("SCANNER_LIBRARY_ROOT", "/video") languages = os.environ.get("LIBRARY_LANGUAGES") diff --git a/scanner/scanner/cache.py b/scanner/scanner/cache.py index f9efbc24..5148a282 100644 --- a/scanner/scanner/cache.py +++ b/scanner/scanner/cache.py @@ -1,7 +1,7 @@ import asyncio from datetime import datetime, timedelta from functools import wraps -from typing import Any, Optional, Tuple, Final, Literal +from typing import Any, Optional, Tuple, Final, Literal, TypeVar from enum import Enum @@ -14,6 +14,7 @@ none: Final = Sentinel.NoneSentinel type Cache = dict[ Any, Tuple[asyncio.Event | Literal[none], datetime | Literal[none], Any] ] +# Cache = TypeVar("Cache") # type: ignore def cache(ttl: timedelta, cache: Optional[Cache] = None, typed=False): diff --git a/scanner/scanner/parser/guess.py b/scanner/scanner/parser/guess.py index d594bc08..7f36cf4e 100644 --- a/scanner/scanner/parser/guess.py +++ b/scanner/scanner/parser/guess.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from guessit.api import default_api -from typing import cast +from typing import cast, List from rebulk import Rebulk try: @@ -14,9 +14,14 @@ rblk = cast(Rebulk, default_api.rebulk) rblk.rules(rules) -def guessit(name: str): +def guessit(name: str, *, xem_titles: List[str] = []): return default_api.guessit( - name, {"episode_prefer_number": True, "excludes": "language"} + name, + { + "episode_prefer_number": True, + "excludes": "language", + "xem_titles": xem_titles, + }, ) @@ -24,7 +29,19 @@ def guessit(name: str): if __name__ == "__main__": import sys import json + from pathlib import Path from guessit.jsonutils import GuessitEncoder + from aiohttp import ClientSession + import asyncio - ret = guessit(sys.argv[1]) - print(json.dumps(ret, cls=GuessitEncoder, indent=4)) + sys.path.append(str(Path(f"{__file__}/../../..").resolve())) + from providers.implementations.thexem import TheXem + + async def main(): + async with ClientSession() as client: + xem = TheXem(client) + + ret = guessit(sys.argv[1], xem_titles=await xem.get_expected_titles()) + print(json.dumps(ret, cls=GuessitEncoder, indent=4)) + + asyncio.run(main()) diff --git a/scanner/scanner/parser/rules.py b/scanner/scanner/parser/rules.py index b707e28f..32ed4243 100644 --- a/scanner/scanner/parser/rules.py +++ b/scanner/scanner/parser/rules.py @@ -234,7 +234,7 @@ class XemFixup(Rule): def when(self, matches: Matches, context) -> Any: titles: List[Match] = matches.named("title", lambda m: m.tagged("title")) # type: ignore - if not titles: + if not titles or not context["xem_titles"]: return title = titles[0] @@ -249,6 +249,5 @@ class XemFixup(Rule): new_title.end = nmatch[0].end new_title.value = f"{title.value}{hole}{nmatch[0].value}" - # TODO: check if new_title exists on thexem, if not early return - - return [[title, nmatch[0]], [new_title]] + if new_title.value.lower().replace(" ", "") in context["xem_titles"]: + return [[title, nmatch[0]], [new_title]] diff --git a/scanner/scanner/scanner.py b/scanner/scanner/scanner.py index 863ae58b..ce96a3f9 100644 --- a/scanner/scanner/scanner.py +++ b/scanner/scanner/scanner.py @@ -31,7 +31,7 @@ class Scanner: except Exception as e: self._ignore_pattern = re.compile("") logging.error(f"Invalid ignore pattern. Ignoring. Error: {e}") - self.provider = Provider.get_all(client, languages)[0] + [self.provider, *_], self._xem = Provider.get_all(client, languages) self.languages = languages self._collection_cache = {} @@ -80,7 +80,7 @@ class Scanner: if path in self.registered or self._ignore_pattern.match(path): return - raw = guessit(path) + raw = guessit(path, xem_titles=await self._xem.get_expected_titles()) if "mimetype" not in raw or not raw["mimetype"].startswith("video"): return @@ -90,7 +90,7 @@ class Scanner: if isinstance(raw.get("season"), List): raise ProviderError( - f"An episode can't have multiple seasons (found {raw.get("season")} for {path})" + f"An episode can't have multiple seasons (found {raw.get('season')} for {path})" ) if isinstance(raw.get("episode"), List): raise ProviderError(