diff --git a/scanner/scanner/parser/guess.py b/scanner/scanner/parser/guess.py new file mode 100644 index 00000000..e13442bf --- /dev/null +++ b/scanner/scanner/parser/guess.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +from guessit.api import default_api +from typing import cast +from rebulk import Rebulk + +import rules + +default_api.configure({}) +rblk = cast(Rebulk, default_api.rebulk) +rblk.rules(rules) + + +def guessit(name: str): + return default_api.guessit( + name, {"episode_prefer_number": True, "excludes": "language"} + ) + + +# Only used to test localy +if __name__ == "__main__": + import sys + import json + from guessit.jsonutils import GuessitEncoder + + ret = guessit(sys.argv[1]) + print(json.dumps(ret, cls=GuessitEncoder, indent=4)) diff --git a/scanner/scanner/parser/rules.py b/scanner/scanner/parser/rules.py new file mode 100644 index 00000000..3f9eb94f --- /dev/null +++ b/scanner/scanner/parser/rules.py @@ -0,0 +1,63 @@ +# Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py + +from typing import Any +from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS +from rebulk.match import Matches + + +class MultipleSeasonRule(Rule): + """Understand `abcd Season 2 - 5.mkv` as S2E5 + + Example: '[Erai-raws] Spy x Family Season 2 - 08 [1080p][Multiple Subtitle][00C44E2F].mkv' + + Default: + ```json + { + "title": "Spy x Family", + "season": [ + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], + "screen_size": "1080p", + "release_group": "Multiple Subtitle", + "crc32": "00C44E2F", + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + + Expected: + ```json + { + "title": "Spy x Family Season 2", + "season": null, + "episode": 8, + "screen_size": "1080p", + "release_group": "Multiple Subtitle", + "crc32": "00C44E2F", + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + + We want `Season 2 ` to be parsed as part of the title since this format is + often used for animes (where season often does not match, we use thexem for that) + """ + + priority = POST_PROCESS + consequence = [RemoveMatch, AppendMatch] + + def when(self, matches: Matches, context) -> Any: + seasons = matches.named("season") + print(seasons) + print(seasons[0]) + print(vars(seasons[0])) + print(seasons[0].initiator) + return diff --git a/scanner/scanner/scanner.py b/scanner/scanner/scanner.py index 54e169a7..5f061e92 100644 --- a/scanner/scanner/scanner.py +++ b/scanner/scanner/scanner.py @@ -6,13 +6,13 @@ import jsons import re from aiohttp import ClientSession from pathlib import Path -from guessit import guessit from typing import List, Literal, Any from providers.provider import Provider from providers.types.collection import Collection from providers.types.show import Show from providers.types.episode import Episode, PartialShow from providers.types.season import Season +from .parser.guess import guessit from .utils import batch, log_errors from .cache import cache, exec_as_cache, make_key @@ -80,7 +80,7 @@ class Scanner: if path in self.registered or self._ignore_pattern.match(path): return - raw = guessit(path, {"episode_prefer_number": True, "excludes": "language"}) + raw = guessit(path) if "mimetype" not in raw or not raw["mimetype"].startswith("video"): return