From eb70f8f802a109971956fd2dcdb947fe0d240448 Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Mon, 5 May 2025 12:37:29 +0200 Subject: [PATCH] Write identify function --- .../parser => scanner/guess}/guess.py | 19 +++++--- .../parser => scanner/guess}/rules.py | 0 scanner/scanner/identify.py | 47 +++++++++++++++++++ scanner/scanner/models/videos.py | 9 ++-- 4 files changed, 65 insertions(+), 10 deletions(-) rename scanner/{matcher/parser => scanner/guess}/guess.py (72%) rename scanner/{matcher/parser => scanner/guess}/rules.py (100%) create mode 100644 scanner/scanner/identify.py diff --git a/scanner/matcher/parser/guess.py b/scanner/scanner/guess/guess.py similarity index 72% rename from scanner/matcher/parser/guess.py rename to scanner/scanner/guess/guess.py index a13347c4..b6ffcc43 100644 --- a/scanner/matcher/parser/guess.py +++ b/scanner/scanner/guess/guess.py @@ -9,6 +9,7 @@ if __name__ == "__main__": from guessit.api import default_api from typing import cast, List, Any from rebulk import Rebulk +from rebulk.match import MatchesDict try: from . import rules @@ -20,35 +21,41 @@ rblk = cast(Rebulk, default_api.rebulk) rblk.rules(rules) -def guessit(name: str, *, xem_titles: List[str] = [], extra_flags: dict[str, Any] = {}): +def guessit( + name: str, + *, + expected_titles: List[str] = [], + extra_flags: dict[str, Any] = {}, +) -> MatchesDict: return default_api.guessit( name, { "episode_prefer_number": True, "excludes": "language", - "expected_title": xem_titles, + "expected_title": expected_titles, + "enforce_list": True } | extra_flags, ) -# Only used to test localy +# Only used to test locally if __name__ == "__main__": import sys import json - from providers.implementations.thexem import TheXemClient + # from providers.implementations.thexem import TheXemClient from guessit.jsonutils import GuessitEncoder from aiohttp import ClientSession import asyncio async def main(): async with ClientSession() as client: - xem = TheXemClient(client) + # xem = TheXemClient(client) advanced = any(x == "-a" for x in sys.argv) ret = guessit( sys.argv[1], - xem_titles=await xem.get_expected_titles(), + expected_titles=[], extra_flags={"advanced": advanced}, ) print(json.dumps(ret, cls=GuessitEncoder, indent=4)) diff --git a/scanner/matcher/parser/rules.py b/scanner/scanner/guess/rules.py similarity index 100% rename from scanner/matcher/parser/rules.py rename to scanner/scanner/guess/rules.py diff --git a/scanner/scanner/identify.py b/scanner/scanner/identify.py new file mode 100644 index 00000000..71a81d7f --- /dev/null +++ b/scanner/scanner/identify.py @@ -0,0 +1,47 @@ +from .models.videos import Video, Guess +from .guess.guess import guessit +from typing import Literal +from itertools import zip_longest + + +async def identify(path: str) -> Video: + raw = guessit(path, expected_titles=[]) + + # guessit should only return one (according to the doc) + title: str = raw.get("title", [])[0] + kind: Literal["movie"] | Literal["episode"] = raw.get("type", [])[0] + version: int = raw.get("version", [])[0] + # apparently guessit can return multiples but tbh idk what to do with + # multiples part. we'll just ignore them for now + part: int = raw.get("part", [])[0] + + years: list[int] = raw.get("year", []) + seasons: list[int] = raw.get("season", []) + episodes: list[int] = raw.get("episode", []) + + guess = Guess( + title=title, + kind=kind, + extraKind=None, + years=years, + episodes=[ + Guess.Episode(season=s, episode=e) + for s, e in zip_longest( + seasons, + episodes, + fillvalue=seasons[-1] if len(seasons) < len(episodes) else episodes[-1], + ) + ], + # TODO: add external ids parsing in guessit + external_id={}, + from_="guessit", + raw=raw, + ) + + return Video( + path=path, + rendering="", + part=part, + version=version, + guess=guess, + ) diff --git a/scanner/scanner/models/videos.py b/scanner/scanner/models/videos.py index 628bf5d8..8ed7f405 100644 --- a/scanner/scanner/models/videos.py +++ b/scanner/scanner/models/videos.py @@ -3,7 +3,7 @@ from __future__ import annotations from ..utils import Model from .extra import ExtraKind from .metadataid import MetadataId, EpisodeId -from typing import Optional, Literal +from typing import Optional, Literal, Any class Resource(Model): @@ -17,16 +17,17 @@ class VideoInfo(Model): guesses: dict[str, dict[str, Resource]] -class Guess(Model): +class Guess(Model, extra="allow"): title: str kind: Literal["episode"] | Literal["movie"] | Literal["extra"] extraKind: Optional[ExtraKind] years: list[int] episodes: list[Guess.Episode] external_id: dict[str, MetadataId | EpisodeId] + raw: dict[str, Any] from_: str - history: list[Guess] + history: list[Guess] = [] class Episode(Model): season: Optional[int] @@ -68,4 +69,4 @@ class Video(Model): guess: Guess for_: Optional[ For.Slug | For.ExternalId | For.Movie | For.Episode | For.Order | For.Special - ] + ] = None