Finish xem fixup rule

This commit is contained in:
Zoe Roux 2024-02-05 17:00:34 +01:00
parent ed9c4ebb68
commit e772a798f7
7 changed files with 54 additions and 24 deletions

View File

@ -124,3 +124,21 @@ class TheXem:
# Only tvdb has a proper absolute handling so we always use this one. # Only tvdb has a proper absolute handling so we always use this one.
return (ep[provider]["season"], ep[provider]["episode"], ep["tvdb"]["absolute"]) return (ep[provider]["season"], ep[provider]["episode"], ep["tvdb"]["absolute"])
@cache(ttl=timedelta(days=1))
async def get_expected_titles(
self, provider: Literal["tvdb"] | Literal["anidb"] = "tvdb"
) -> list[str]:
map = await self.get_map(provider)
titles = []
def clean(s: str):
return s.lower().replace(" ", "")
for x in map.values():
# Only the first element is a string (the show name) so we need to ignore the type hint
master_show_name: str = x[0] # type: ignore
titles.append(clean(master_show_name))
for y in x[1:]:
titles.extend(clean(name) for name in y.keys())
return titles

View File

@ -3,6 +3,7 @@ from aiohttp import ClientSession
from abc import abstractmethod, abstractproperty from abc import abstractmethod, abstractproperty
from typing import Optional, TypeVar from typing import Optional, TypeVar
from providers.implementations.thexem import TheXem
from providers.utils import ProviderError from providers.utils import ProviderError
from .types.show import Show from .types.show import Show
@ -19,15 +20,12 @@ class Provider:
@classmethod @classmethod
def get_all( def get_all(
cls: type[Self], client: ClientSession, languages: list[str] cls: type[Self], client: ClientSession, languages: list[str]
) -> list[Self]: ) -> tuple[list[Self], TheXem]:
providers = [] providers = []
from providers.idmapper import IdMapper from providers.idmapper import IdMapper
idmapper = IdMapper() idmapper = IdMapper()
from providers.implementations.thexem import TheXem
xem = TheXem(client) xem = TheXem(client)
from providers.implementations.themoviedatabase import TheMovieDatabase from providers.implementations.themoviedatabase import TheMovieDatabase
@ -46,7 +44,7 @@ class Provider:
idmapper.init(tmdb=tmdb, language=languages[0]) idmapper.init(tmdb=tmdb, language=languages[0])
return providers return providers, xem
@abstractproperty @abstractproperty
def name(self) -> str: def name(self) -> str:

View File

@ -1,8 +1,3 @@
from providers.utils import ProviderError
from .scanner import Scanner
from .monitor import monitor
async def main(): async def main():
import os import os
import logging import logging
@ -11,7 +6,9 @@ async def main():
from datetime import date from datetime import date
from typing import Optional from typing import Optional
from aiohttp import ClientSession from aiohttp import ClientSession
from providers.utils import format_date from providers.utils import format_date, ProviderError
from .scanner import Scanner
from .monitor import monitor
path = os.environ.get("SCANNER_LIBRARY_ROOT", "/video") path = os.environ.get("SCANNER_LIBRARY_ROOT", "/video")
languages = os.environ.get("LIBRARY_LANGUAGES") languages = os.environ.get("LIBRARY_LANGUAGES")

View File

@ -1,7 +1,7 @@
import asyncio import asyncio
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import wraps from functools import wraps
from typing import Any, Optional, Tuple, Final, Literal from typing import Any, Optional, Tuple, Final, Literal, TypeVar
from enum import Enum from enum import Enum
@ -14,6 +14,7 @@ none: Final = Sentinel.NoneSentinel
type Cache = dict[ type Cache = dict[
Any, Tuple[asyncio.Event | Literal[none], datetime | Literal[none], Any] Any, Tuple[asyncio.Event | Literal[none], datetime | Literal[none], Any]
] ]
# Cache = TypeVar("Cache") # type: ignore
def cache(ttl: timedelta, cache: Optional[Cache] = None, typed=False): def cache(ttl: timedelta, cache: Optional[Cache] = None, typed=False):

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from guessit.api import default_api from guessit.api import default_api
from typing import cast from typing import cast, List
from rebulk import Rebulk from rebulk import Rebulk
try: try:
@ -14,9 +14,14 @@ rblk = cast(Rebulk, default_api.rebulk)
rblk.rules(rules) rblk.rules(rules)
def guessit(name: str): def guessit(name: str, *, xem_titles: List[str] = []):
return default_api.guessit( return default_api.guessit(
name, {"episode_prefer_number": True, "excludes": "language"} name,
{
"episode_prefer_number": True,
"excludes": "language",
"xem_titles": xem_titles,
},
) )
@ -24,7 +29,19 @@ def guessit(name: str):
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
import json import json
from pathlib import Path
from guessit.jsonutils import GuessitEncoder from guessit.jsonutils import GuessitEncoder
from aiohttp import ClientSession
import asyncio
ret = guessit(sys.argv[1]) sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
from providers.implementations.thexem import TheXem
async def main():
async with ClientSession() as client:
xem = TheXem(client)
ret = guessit(sys.argv[1], xem_titles=await xem.get_expected_titles())
print(json.dumps(ret, cls=GuessitEncoder, indent=4)) print(json.dumps(ret, cls=GuessitEncoder, indent=4))
asyncio.run(main())

View File

@ -234,7 +234,7 @@ class XemFixup(Rule):
def when(self, matches: Matches, context) -> Any: def when(self, matches: Matches, context) -> Any:
titles: List[Match] = matches.named("title", lambda m: m.tagged("title")) # type: ignore titles: List[Match] = matches.named("title", lambda m: m.tagged("title")) # type: ignore
if not titles: if not titles or not context["xem_titles"]:
return return
title = titles[0] title = titles[0]
@ -249,6 +249,5 @@ class XemFixup(Rule):
new_title.end = nmatch[0].end new_title.end = nmatch[0].end
new_title.value = f"{title.value}{hole}{nmatch[0].value}" new_title.value = f"{title.value}{hole}{nmatch[0].value}"
# TODO: check if new_title exists on thexem, if not early return if new_title.value.lower().replace(" ", "") in context["xem_titles"]:
return [[title, nmatch[0]], [new_title]] return [[title, nmatch[0]], [new_title]]

View File

@ -31,7 +31,7 @@ class Scanner:
except Exception as e: except Exception as e:
self._ignore_pattern = re.compile("") self._ignore_pattern = re.compile("")
logging.error(f"Invalid ignore pattern. Ignoring. Error: {e}") logging.error(f"Invalid ignore pattern. Ignoring. Error: {e}")
self.provider = Provider.get_all(client, languages)[0] [self.provider, *_], self._xem = Provider.get_all(client, languages)
self.languages = languages self.languages = languages
self._collection_cache = {} self._collection_cache = {}
@ -80,7 +80,7 @@ class Scanner:
if path in self.registered or self._ignore_pattern.match(path): if path in self.registered or self._ignore_pattern.match(path):
return return
raw = guessit(path) raw = guessit(path, xem_titles=await self._xem.get_expected_titles())
if "mimetype" not in raw or not raw["mimetype"].startswith("video"): if "mimetype" not in raw or not raw["mimetype"].startswith("video"):
return return
@ -90,7 +90,7 @@ class Scanner:
if isinstance(raw.get("season"), List): if isinstance(raw.get("season"), List):
raise ProviderError( raise ProviderError(
f"An episode can't have multiple seasons (found {raw.get("season")} for {path})" f"An episode can't have multiple seasons (found {raw.get('season')} for {path})"
) )
if isinstance(raw.get("episode"), List): if isinstance(raw.get("episode"), List):
raise ProviderError( raise ProviderError(