mirror of
https://github.com/zoriya/Kyoo.git
synced 2025-07-09 03:04:20 -04:00
Improve xem titles sanitizing
This commit is contained in:
parent
fbd76594ea
commit
8b2c0f732f
@ -463,7 +463,9 @@ class TheMovieDatabase(Provider):
|
||||
else None
|
||||
)
|
||||
if tvdb_id is None:
|
||||
logging.info("Tvdb could not be found, trying xem name lookup for %s", name)
|
||||
logging.info(
|
||||
"Tvdb could not be found, trying xem name lookup for %s", name
|
||||
)
|
||||
_, tvdb_id = await self._xem.get_show_override("tvdb", old_name)
|
||||
if tvdb_id is not None:
|
||||
(
|
||||
|
@ -8,6 +8,17 @@ from providers.utils import ProviderError
|
||||
from scanner.cache import cache
|
||||
|
||||
|
||||
def clean(s: str):
|
||||
s = s.lower()
|
||||
# remove content of () (guessit does not allow them as part of a name)
|
||||
s = re.sub(r"\([^)]*\)", "", s)
|
||||
# remove separators
|
||||
s = re.sub(r"[:\-_/\\&|,;.=\"'+~~@`ー]+", " ", s)
|
||||
# remove subsequent spaces (that may be introduced above)
|
||||
s = re.sub(r" +", " ", s)
|
||||
return s
|
||||
|
||||
|
||||
class TheXem:
|
||||
def __init__(self, client: ClientSession) -> None:
|
||||
self._client = client
|
||||
@ -61,12 +72,13 @@ class TheXem:
|
||||
self, provider: Literal["tvdb"] | Literal["anidb"], show_name: str
|
||||
):
|
||||
map = await self.get_map(provider)
|
||||
show_name = clean(show_name)
|
||||
for [id, v] in map.items():
|
||||
# Only the first element is a string (the show name) so we need to ignore the type hint
|
||||
master_show_name: str = v[0] # type: ignore
|
||||
for x in v[1:]:
|
||||
[(name, season)] = x.items()
|
||||
if show_name.lower() == name.lower():
|
||||
if show_name == clean(name):
|
||||
return master_show_name, id
|
||||
return None, None
|
||||
|
||||
@ -76,11 +88,12 @@ class TheXem:
|
||||
map = await self.get_map(provider)
|
||||
if id not in map:
|
||||
return None
|
||||
show_name = clean(show_name)
|
||||
# Ignore the first element, this is the show name has a string
|
||||
for x in map[id][1:]:
|
||||
[(name, season)] = x.items()
|
||||
# TODO: replace .lower() with something a bit smarter
|
||||
if show_name.lower() == name.lower():
|
||||
if show_name == clean(name):
|
||||
return season
|
||||
return None
|
||||
|
||||
@ -133,11 +146,6 @@ class TheXem:
|
||||
map = await self.get_map(provider)
|
||||
titles = []
|
||||
|
||||
def clean(s: str):
|
||||
s = s.lower()
|
||||
s = re.sub(r"\([^)]*\)", "", s) # remove content of () (guessit does not allow them as part of a name)
|
||||
return re.sub(r"[\W_]+", "", s) # remove non alphanum content (it does keep non us chars like kanjis or accents)
|
||||
|
||||
for x in map.values():
|
||||
# Only the first element is a string (the show name) so we need to ignore the type hint
|
||||
master_show_name: str = x[0] # type: ignore
|
||||
|
@ -1,5 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
|
||||
|
||||
from guessit.api import default_api
|
||||
from typing import cast, List
|
||||
from rebulk import Rebulk
|
||||
@ -29,14 +35,11 @@ def guessit(name: str, *, xem_titles: List[str] = []):
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from providers.implementations.thexem import TheXem
|
||||
from guessit.jsonutils import GuessitEncoder
|
||||
from aiohttp import ClientSession
|
||||
import asyncio
|
||||
|
||||
sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
|
||||
from providers.implementations.thexem import TheXem
|
||||
|
||||
async def main():
|
||||
async with ClientSession() as client:
|
||||
xem = TheXem(client)
|
||||
|
@ -3,9 +3,10 @@
|
||||
from typing import Any, List, Optional, cast
|
||||
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
|
||||
from rebulk.match import Matches, Match
|
||||
import re
|
||||
from copy import copy
|
||||
|
||||
from providers.implementations.thexem import clean
|
||||
|
||||
|
||||
class EpisodeTitlePromotion(Rule):
|
||||
"""Promote "episode_title" to "episode" when the title is in fact the episode number
|
||||
@ -250,8 +251,5 @@ class XemFixup(Rule):
|
||||
new_title.end = nmatch[0].end
|
||||
new_title.value = f"{title.value}{hole}{nmatch[0].value}"
|
||||
|
||||
def clean(s: str):
|
||||
return re.sub(r"[\W_]+", "", s.lower())
|
||||
|
||||
if clean(new_title.value) in context["xem_titles"]:
|
||||
return [[title, nmatch[0]], [new_title]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user