Finish xem fixup rule

This commit is contained in:
Zoe Roux 2024-02-05 17:00:34 +01:00
parent ed9c4ebb68
commit e772a798f7
7 changed files with 54 additions and 24 deletions

View File

@ -124,3 +124,21 @@ class TheXem:
# Only tvdb has a proper absolute handling so we always use this one.
return (ep[provider]["season"], ep[provider]["episode"], ep["tvdb"]["absolute"])
@cache(ttl=timedelta(days=1))
async def get_expected_titles(
self, provider: Literal["tvdb"] | Literal["anidb"] = "tvdb"
) -> list[str]:
map = await self.get_map(provider)
titles = []
def clean(s: str):
return s.lower().replace(" ", "")
for x in map.values():
# Only the first element is a string (the show name) so we need to ignore the type hint
master_show_name: str = x[0] # type: ignore
titles.append(clean(master_show_name))
for y in x[1:]:
titles.extend(clean(name) for name in y.keys())
return titles

View File

@ -3,6 +3,7 @@ from aiohttp import ClientSession
from abc import abstractmethod, abstractproperty
from typing import Optional, TypeVar
from providers.implementations.thexem import TheXem
from providers.utils import ProviderError
from .types.show import Show
@ -19,15 +20,12 @@ class Provider:
@classmethod
def get_all(
cls: type[Self], client: ClientSession, languages: list[str]
) -> list[Self]:
) -> tuple[list[Self], TheXem]:
providers = []
from providers.idmapper import IdMapper
idmapper = IdMapper()
from providers.implementations.thexem import TheXem
xem = TheXem(client)
from providers.implementations.themoviedatabase import TheMovieDatabase
@ -46,7 +44,7 @@ class Provider:
idmapper.init(tmdb=tmdb, language=languages[0])
return providers
return providers, xem
@abstractproperty
def name(self) -> str:

View File

@ -1,8 +1,3 @@
from providers.utils import ProviderError
from .scanner import Scanner
from .monitor import monitor
async def main():
import os
import logging
@ -11,7 +6,9 @@ async def main():
from datetime import date
from typing import Optional
from aiohttp import ClientSession
from providers.utils import format_date
from providers.utils import format_date, ProviderError
from .scanner import Scanner
from .monitor import monitor
path = os.environ.get("SCANNER_LIBRARY_ROOT", "/video")
languages = os.environ.get("LIBRARY_LANGUAGES")

View File

@ -1,7 +1,7 @@
import asyncio
from datetime import datetime, timedelta
from functools import wraps
from typing import Any, Optional, Tuple, Final, Literal
from typing import Any, Optional, Tuple, Final, Literal, TypeVar
from enum import Enum
@ -14,6 +14,7 @@ none: Final = Sentinel.NoneSentinel
type Cache = dict[
Any, Tuple[asyncio.Event | Literal[none], datetime | Literal[none], Any]
]
# Cache = TypeVar("Cache") # type: ignore
def cache(ttl: timedelta, cache: Optional[Cache] = None, typed=False):

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
from guessit.api import default_api
from typing import cast
from typing import cast, List
from rebulk import Rebulk
try:
@ -14,9 +14,14 @@ rblk = cast(Rebulk, default_api.rebulk)
rblk.rules(rules)
def guessit(name: str):
def guessit(name: str, *, xem_titles: List[str] = []):
return default_api.guessit(
name, {"episode_prefer_number": True, "excludes": "language"}
name,
{
"episode_prefer_number": True,
"excludes": "language",
"xem_titles": xem_titles,
},
)
@ -24,7 +29,19 @@ def guessit(name: str):
if __name__ == "__main__":
import sys
import json
from pathlib import Path
from guessit.jsonutils import GuessitEncoder
from aiohttp import ClientSession
import asyncio
ret = guessit(sys.argv[1])
print(json.dumps(ret, cls=GuessitEncoder, indent=4))
sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
from providers.implementations.thexem import TheXem
async def main():
async with ClientSession() as client:
xem = TheXem(client)
ret = guessit(sys.argv[1], xem_titles=await xem.get_expected_titles())
print(json.dumps(ret, cls=GuessitEncoder, indent=4))
asyncio.run(main())

View File

@ -234,7 +234,7 @@ class XemFixup(Rule):
def when(self, matches: Matches, context) -> Any:
titles: List[Match] = matches.named("title", lambda m: m.tagged("title")) # type: ignore
if not titles:
if not titles or not context["xem_titles"]:
return
title = titles[0]
@ -249,6 +249,5 @@ class XemFixup(Rule):
new_title.end = nmatch[0].end
new_title.value = f"{title.value}{hole}{nmatch[0].value}"
# TODO: check if new_title exists on thexem, if not early return
return [[title, nmatch[0]], [new_title]]
if new_title.value.lower().replace(" ", "") in context["xem_titles"]:
return [[title, nmatch[0]], [new_title]]

View File

@ -31,7 +31,7 @@ class Scanner:
except Exception as e:
self._ignore_pattern = re.compile("")
logging.error(f"Invalid ignore pattern. Ignoring. Error: {e}")
self.provider = Provider.get_all(client, languages)[0]
[self.provider, *_], self._xem = Provider.get_all(client, languages)
self.languages = languages
self._collection_cache = {}
@ -80,7 +80,7 @@ class Scanner:
if path in self.registered or self._ignore_pattern.match(path):
return
raw = guessit(path)
raw = guessit(path, xem_titles=await self._xem.get_expected_titles())
if "mimetype" not in raw or not raw["mimetype"].startswith("video"):
return
@ -90,7 +90,7 @@ class Scanner:
if isinstance(raw.get("season"), List):
raise ProviderError(
f"An episode can't have multiple seasons (found {raw.get("season")} for {path})"
f"An episode can't have multiple seasons (found {raw.get('season')} for {path})"
)
if isinstance(raw.get("episode"), List):
raise ProviderError(