Try stuff with rendering & guessit rules

This commit is contained in:
Zoe Roux 2025-05-06 02:14:35 +02:00
parent 461333a90f
commit 01d2dc25d6
No known key found for this signature in database
4 changed files with 79 additions and 28 deletions

View File

@ -4,7 +4,7 @@ app = FastAPI(
title="Scanner", title="Scanner",
description="API to control the long running scanner or interacting with external databases (themoviedb, tvdb...)\n\n" description="API to control the long running scanner or interacting with external databases (themoviedb, tvdb...)\n\n"
+ "Most of those APIs are for admins only.", + "Most of those APIs are for admins only.",
openapi_prefix="/scanner", root_path="/scanner",
# lifetime=smth # lifetime=smth
) )

View File

@ -9,7 +9,7 @@ if __name__ == "__main__":
from guessit.api import default_api from guessit.api import default_api
from typing import cast, List, Any from typing import cast, List, Any
from rebulk import Rebulk from rebulk import Rebulk
from rebulk.match import MatchesDict from rebulk.match import Match
try: try:
from . import rules from . import rules
@ -26,17 +26,22 @@ def guessit(
*, *,
expected_titles: List[str] = [], expected_titles: List[str] = [],
extra_flags: dict[str, Any] = {}, extra_flags: dict[str, Any] = {},
) -> MatchesDict: ) -> dict[str, list[Match]]:
return default_api.guessit( rendering = []
ret = default_api.guessit(
name, name,
{ {
"episode_prefer_number": True, "episode_prefer_number": True,
"excludes": "language", "excludes": "language",
"expected_title": expected_titles, "expected_title": expected_titles,
"enforce_list": True, "enforce_list": True,
"advanced": True,
"rendering": rendering,
} }
| extra_flags, | extra_flags,
) )
print(rendering)
return ret
# Only used to test locally # Only used to test locally

View File

@ -1,10 +1,12 @@
# Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py # Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py
import hashlib
from copy import copy
from logging import getLogger from logging import getLogger
from typing import Any, List, Optional, cast from typing import Any, List, Optional, cast
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
from rebulk.match import Matches, Match from rebulk import POST_PROCESS, AppendMatch, RemoveMatch, Rule
from copy import copy from rebulk.match import Match, Matches
logger = getLogger(__name__) logger = getLogger(__name__)
@ -67,7 +69,7 @@ class UnlistTitles(Rule):
# Check if titles are next to each other, if they are not ignore it. # Check if titles are next to each other, if they are not ignore it.
next: List[Match] = matches.next(title) # type: ignore next: List[Match] = matches.next(title) # type: ignore
if not next or next[0] != nmatch: if not next or next[0] != nmatch:
logger.warn(f"Ignoring potential part of title: {nmatch.value}") logger.warning(f"Ignoring potential part of title: {nmatch.value}")
continue continue
title.end = nmatch.end title.end = nmatch.end
@ -176,7 +178,7 @@ class SeasonYearDedup(Rule):
""" """
# This rules does the opposite of the YearSeason rule of guessit (with POST_PROCESS priority) # This rules does the opposite of the YearSeason rule of guessit (with POST_PROCESS priority)
# To overide it, we need the -1. (rule: https://github.com/guessit-io/guessit/blob/develop/guessit/rules/processors.py#L195) # To override it, we need the -1. (rule: https://github.com/guessit-io/guessit/blob/develop/guessit/rules/processors.py#L195)
priority = POST_PROCESS - 1 priority = POST_PROCESS - 1
consequence = RemoveMatch consequence = RemoveMatch
@ -185,3 +187,29 @@ class SeasonYearDedup(Rule):
year: List[Match] = matches.named("year") # type: ignore year: List[Match] = matches.named("year") # type: ignore
if len(season) == 1 and len(year) == 1 and season[0].value == year[0].value: if len(season) == 1 and len(year) == 1 and season[0].value == year[0].value:
return season return season
# class RenderingDedup(Rule):
# """Compute rendering (sha of path - version/part)
#
# Example: "One Piece (1999) v2 152 part2.mkv"
# Computes: sha("One Piece (1999) 152.mkv")
# ```
# """
#
# priority = POST_PROCESS + 100000
# consequence = AppendMatch
#
# def when(self, matches: Matches, context: dict[str, list[str]]) -> Any:
# ret = hashlib.new("sha256")
#
# value: list[Match] = sorted(
# list(matches) + matches.holes(), # type: ignore
# key=lambda m: m.start,
# )
# for m in value:
# if m.name == "part" or m.name == "version":
# continue
# ret.update(cast(str, m.raw).encode("utf-8"))
# context["rendering"] = [ret.hexdigest()]
# return [Match(start=0, end=1, value=ret.hexdigest(), raw="", name="rendering")]

View File

@ -1,6 +1,7 @@
from hashlib import sha256
from itertools import zip_longest from itertools import zip_longest
from logging import getLogger from logging import getLogger
from typing import Awaitable, Callable, Literal from typing import Awaitable, Callable, Literal, Optional, cast
from .guess.guess import guessit from .guess.guess import guessit
from .models.videos import Guess, Video from .models.videos import Guess, Video
@ -15,37 +16,43 @@ pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
async def identify(path: str) -> Video: async def identify(path: str) -> Video:
raw = guessit(path, expected_titles=[]) raw = guessit(
path,
expected_titles=[],
extra_flags={"advanced": True},
)
# guessit should only return one (according to the doc) # guessit should only return one (according to the doc)
title: str = raw.get("title", [])[0] title = raw.get("title", [])[0]
kind: Literal["movie"] | Literal["episode"] = raw.get("type", [])[0] kind = raw.get("type", [])[0]
version: int = raw.get("version", [])[0] version = next(iter(raw.get("version", [])), None)
# apparently guessit can return multiples but tbh idk what to do with # apparently guessit can return multiples but tbh idk what to do with
# multiples part. we'll just ignore them for now # multiples part. we'll just ignore them for now
part: int = raw.get("part", [])[0] part = next(iter(raw.get("part", [])), None)
years: list[int] = raw.get("year", []) years = raw.get("year", [])
seasons: list[int] = raw.get("season", []) seasons = raw.get("season", [])
episodes: list[int] = raw.get("episode", []) episodes = raw.get("episode", [])
rendering = path[:version.start] + path[version.end:]
print(raw)
guess = Guess( guess = Guess(
title=title, title=cast(str, title.value),
kind=kind, kind=cast(Literal["episode"] | Literal["movie"], kind.value),
extraKind=None, extra_kind=None,
years=years, years=[cast(int, y.value) for y in years],
episodes=[ episodes=[
Guess.Episode(season=s, episode=e) Guess.Episode(season=cast(int, s.value), episode=cast(int, e.value))
for s, e in zip_longest( for s, e in zip_longest(
seasons, seasons,
episodes, episodes,
fillvalue=seasons[-1] if len(seasons) < len(episodes) else episodes[-1], fillvalue=seasons[-1] if len(seasons) < len(episodes) else episodes[-1],
) )
], ],
# TODO: add external ids parsing in guessit
external_id={}, external_id={},
from_="guessit", from_="guessit",
raw=raw, raw={k: [x.value for x in v] for k, v in raw.items()},
) )
for step in pipeline: for step in pipeline:
@ -56,8 +63,19 @@ async def identify(path: str) -> Video:
return Video( return Video(
path=path, path=path,
rendering="", rendering=sha256(path.encode()).hexdigest(),
part=part, part=cast(int, part.value) if part else None,
version=version, version=cast(int, version.value) if version else 1,
guess=guess, guess=guess,
) )
if __name__ == "__main__":
import asyncio
import sys
async def main():
ret = await identify(sys.argv[1])
print(ret.model_dump_json(indent=4, by_alias=True))
asyncio.run(main())