Better handle titles list (#445)

This commit is contained in:
Zoe Roux 2024-04-28 13:04:50 +02:00 committed by GitHub
commit 8b1524c155
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 154 additions and 52 deletions

View File

@ -51,15 +51,10 @@ class Matcher:
if "mimetype" not in raw or not raw["mimetype"].startswith("video"): if "mimetype" not in raw or not raw["mimetype"].startswith("video"):
return return
# Remove seasons in "One Piece (1999) 152.mkv" for example
if raw.get("season") == raw.get("year") and "season" in raw:
del raw["season"]
logger.info("Identied %s: %s", path, raw) logger.info("Identied %s: %s", path, raw)
title = raw.get("title") title = raw.get("title")
if isinstance(title, list):
title = title[0]
if not isinstance(title, str): if not isinstance(title, str):
raise ProviderError(f"Could not guess title, found: {title}") raise ProviderError(f"Could not guess title, found: {title}")

View File

@ -7,7 +7,7 @@ if __name__ == "__main__":
sys.path.append(str(Path(f"{__file__}/../../..").resolve())) sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
from guessit.api import default_api from guessit.api import default_api
from typing import cast, List from typing import cast, List, Any
from rebulk import Rebulk from rebulk import Rebulk
try: try:
@ -20,14 +20,15 @@ rblk = cast(Rebulk, default_api.rebulk)
rblk.rules(rules) rblk.rules(rules)
def guessit(name: str, *, xem_titles: List[str] = []): def guessit(name: str, *, xem_titles: List[str] = [], extra_flags: dict[str, Any] = {}):
return default_api.guessit( return default_api.guessit(
name, name,
{ {
"episode_prefer_number": True, "episode_prefer_number": True,
"excludes": "language", "excludes": "language",
"xem_titles": xem_titles, "xem_titles": xem_titles,
}, }
| extra_flags,
) )
@ -44,7 +45,11 @@ if __name__ == "__main__":
async with ClientSession() as client: async with ClientSession() as client:
xem = TheXemClient(client) xem = TheXemClient(client)
ret = guessit(sys.argv[1], xem_titles=await xem.get_expected_titles()) ret = guessit(
sys.argv[1],
xem_titles=await xem.get_expected_titles(),
# extra_flags={"advanced": True},
)
print(json.dumps(ret, cls=GuessitEncoder, indent=4)) print(json.dumps(ret, cls=GuessitEncoder, indent=4))
asyncio.run(main()) asyncio.run(main())

View File

@ -1,5 +1,6 @@
# Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py # Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py
from logging import getLogger
from typing import Any, List, Optional, cast from typing import Any, List, Optional, cast
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
from rebulk.match import Matches, Match from rebulk.match import Matches, Match
@ -7,6 +8,68 @@ from copy import copy
from providers.implementations.thexem import clean from providers.implementations.thexem import clean
logger = getLogger(__name__)
class UnlistTitles(Rule):
"""Join titles to a single string instead of a list
Example: '/media/series/Demon Slayer - Kimetsu no Yaiba/Season 4/Demon Slayer - Kimetsu no Yaiba - S04E10 - Love Hashira Mitsuri Kanroji WEBDL-1080p.mkv'
Default:
```json
{
"title": [
"Demon Slayer",
"Kimetsu no Yaiba"
],
"season": 4,
"episode_title": "Demon Slayer",
"alternative_title": "Kimetsu no Yaiba",
"episode": 10,
"source": "Web",
"screen_size": "1080p",
"container": "mkv",
"mimetype": "video/x-matroska",
"type": "episode"
}
```
Expected:
```json
{
"title": "Demon Slayer - Kimetsu no Yaiba",
"season": 4,
"episode_title": "Demon Slayer",
"alternative_title": "Kimetsu no Yaiba",
"episode": 10,
"source": "Web",
"screen_size": "1080p",
"container": "mkv",
"mimetype": "video/x-matroska",
"type": "episode"
}
```
"""
priority = POST_PROCESS
consequence = [RemoveMatch, AppendMatch]
def when(self, matches: Matches, context) -> Any:
titles: List[Match] = matches.named("title") # type: ignore
if not titles or len(titles) <= 1:
return
title = copy(titles[0])
for nmatch in titles[1:]:
# Check if titles are next to each other, if they are not ignore it.
next: List[Match] = matches.next(title) # type: ignore
if not next or next[0] != nmatch:
logger.warn(f"Ignoring potential part of title: {nmatch.value}")
continue
title.end = nmatch.end
return [titles, [title]]
class EpisodeTitlePromotion(Rule): class EpisodeTitlePromotion(Rule):
"""Promote "episode_title" to "episode" when the title is in fact the episode number """Promote "episode_title" to "episode" when the title is in fact the episode number
@ -253,3 +316,42 @@ class XemFixup(Rule):
if clean(new_title.value) in context["xem_titles"]: if clean(new_title.value) in context["xem_titles"]:
return [[title, nmatch[0]], [new_title]] return [[title, nmatch[0]], [new_title]]
class SeasonYearDedup(Rule):
"""Remove "season" when it's the same as "year"
Example: "One Piece (1999) 152.mkv"
Default:
```json
{
"title": "One Piece",
"year": 1999,
"season": 1999,
"episode": 152,
"container": "mkv",
"mimetype": "video/x-matroska",
"type": "episode"
}
```
Expected:
```json
{
"title": "One Piece",
"year": 1999,
"episode": 152,
"container": "mkv",
"mimetype": "video/x-matroska",
"type": "episode"
}
```
"""
priority = POST_PROCESS
consequence = [RemoveMatch]
def when(self, matches: Matches, context) -> Any:
season: List[Match] = matches.named("season") # type: ignore
year: List[Match] = matches.named("year") # type: ignore
if len(season) == 1 and len(year) == 1 and season[0].value == year[0].value:
return [season]

View File

@ -1,5 +1,5 @@
{pkgs ? import <nixpkgs> {}}: let {pkgs ? import <nixpkgs> {}}: let
python = pkgs.python311.withPackages (ps: python = pkgs.python312.withPackages (ps:
with ps; [ with ps; [
guessit guessit
aiohttp aiohttp