mirror of
https://github.com/zoriya/Kyoo.git
synced 2025-07-09 03:04:20 -04:00
Improve xem titles sanitizing
This commit is contained in:
parent
fbd76594ea
commit
8b2c0f732f
@ -463,7 +463,9 @@ class TheMovieDatabase(Provider):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
if tvdb_id is None:
|
if tvdb_id is None:
|
||||||
logging.info("Tvdb could not be found, trying xem name lookup for %s", name)
|
logging.info(
|
||||||
|
"Tvdb could not be found, trying xem name lookup for %s", name
|
||||||
|
)
|
||||||
_, tvdb_id = await self._xem.get_show_override("tvdb", old_name)
|
_, tvdb_id = await self._xem.get_show_override("tvdb", old_name)
|
||||||
if tvdb_id is not None:
|
if tvdb_id is not None:
|
||||||
(
|
(
|
||||||
|
@ -8,6 +8,17 @@ from providers.utils import ProviderError
|
|||||||
from scanner.cache import cache
|
from scanner.cache import cache
|
||||||
|
|
||||||
|
|
||||||
|
def clean(s: str):
|
||||||
|
s = s.lower()
|
||||||
|
# remove content of () (guessit does not allow them as part of a name)
|
||||||
|
s = re.sub(r"\([^)]*\)", "", s)
|
||||||
|
# remove separators
|
||||||
|
s = re.sub(r"[:\-_/\\&|,;.=\"'+~~@`ー]+", " ", s)
|
||||||
|
# remove subsequent spaces (that may be introduced above)
|
||||||
|
s = re.sub(r" +", " ", s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
class TheXem:
|
class TheXem:
|
||||||
def __init__(self, client: ClientSession) -> None:
|
def __init__(self, client: ClientSession) -> None:
|
||||||
self._client = client
|
self._client = client
|
||||||
@ -61,12 +72,13 @@ class TheXem:
|
|||||||
self, provider: Literal["tvdb"] | Literal["anidb"], show_name: str
|
self, provider: Literal["tvdb"] | Literal["anidb"], show_name: str
|
||||||
):
|
):
|
||||||
map = await self.get_map(provider)
|
map = await self.get_map(provider)
|
||||||
|
show_name = clean(show_name)
|
||||||
for [id, v] in map.items():
|
for [id, v] in map.items():
|
||||||
# Only the first element is a string (the show name) so we need to ignore the type hint
|
# Only the first element is a string (the show name) so we need to ignore the type hint
|
||||||
master_show_name: str = v[0] # type: ignore
|
master_show_name: str = v[0] # type: ignore
|
||||||
for x in v[1:]:
|
for x in v[1:]:
|
||||||
[(name, season)] = x.items()
|
[(name, season)] = x.items()
|
||||||
if show_name.lower() == name.lower():
|
if show_name == clean(name):
|
||||||
return master_show_name, id
|
return master_show_name, id
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
@ -76,11 +88,12 @@ class TheXem:
|
|||||||
map = await self.get_map(provider)
|
map = await self.get_map(provider)
|
||||||
if id not in map:
|
if id not in map:
|
||||||
return None
|
return None
|
||||||
|
show_name = clean(show_name)
|
||||||
# Ignore the first element, this is the show name has a string
|
# Ignore the first element, this is the show name has a string
|
||||||
for x in map[id][1:]:
|
for x in map[id][1:]:
|
||||||
[(name, season)] = x.items()
|
[(name, season)] = x.items()
|
||||||
# TODO: replace .lower() with something a bit smarter
|
# TODO: replace .lower() with something a bit smarter
|
||||||
if show_name.lower() == name.lower():
|
if show_name == clean(name):
|
||||||
return season
|
return season
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -133,11 +146,6 @@ class TheXem:
|
|||||||
map = await self.get_map(provider)
|
map = await self.get_map(provider)
|
||||||
titles = []
|
titles = []
|
||||||
|
|
||||||
def clean(s: str):
|
|
||||||
s = s.lower()
|
|
||||||
s = re.sub(r"\([^)]*\)", "", s) # remove content of () (guessit does not allow them as part of a name)
|
|
||||||
return re.sub(r"[\W_]+", "", s) # remove non alphanum content (it does keep non us chars like kanjis or accents)
|
|
||||||
|
|
||||||
for x in map.values():
|
for x in map.values():
|
||||||
# Only the first element is a string (the show name) so we need to ignore the type hint
|
# Only the first element is a string (the show name) so we need to ignore the type hint
|
||||||
master_show_name: str = x[0] # type: ignore
|
master_show_name: str = x[0] # type: ignore
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
|
||||||
|
|
||||||
from guessit.api import default_api
|
from guessit.api import default_api
|
||||||
from typing import cast, List
|
from typing import cast, List
|
||||||
from rebulk import Rebulk
|
from rebulk import Rebulk
|
||||||
@ -29,14 +35,11 @@ def guessit(name: str, *, xem_titles: List[str] = []):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from providers.implementations.thexem import TheXem
|
||||||
from guessit.jsonutils import GuessitEncoder
|
from guessit.jsonutils import GuessitEncoder
|
||||||
from aiohttp import ClientSession
|
from aiohttp import ClientSession
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
sys.path.append(str(Path(f"{__file__}/../../..").resolve()))
|
|
||||||
from providers.implementations.thexem import TheXem
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
async with ClientSession() as client:
|
async with ClientSession() as client:
|
||||||
xem = TheXem(client)
|
xem = TheXem(client)
|
||||||
|
@ -3,9 +3,10 @@
|
|||||||
from typing import Any, List, Optional, cast
|
from typing import Any, List, Optional, cast
|
||||||
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
|
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
|
||||||
from rebulk.match import Matches, Match
|
from rebulk.match import Matches, Match
|
||||||
import re
|
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
|
||||||
|
from providers.implementations.thexem import clean
|
||||||
|
|
||||||
|
|
||||||
class EpisodeTitlePromotion(Rule):
|
class EpisodeTitlePromotion(Rule):
|
||||||
"""Promote "episode_title" to "episode" when the title is in fact the episode number
|
"""Promote "episode_title" to "episode" when the title is in fact the episode number
|
||||||
@ -250,8 +251,5 @@ class XemFixup(Rule):
|
|||||||
new_title.end = nmatch[0].end
|
new_title.end = nmatch[0].end
|
||||||
new_title.value = f"{title.value}{hole}{nmatch[0].value}"
|
new_title.value = f"{title.value}{hole}{nmatch[0].value}"
|
||||||
|
|
||||||
def clean(s: str):
|
|
||||||
return re.sub(r"[\W_]+", "", s.lower())
|
|
||||||
|
|
||||||
if clean(new_title.value) in context["xem_titles"]:
|
if clean(new_title.value) in context["xem_titles"]:
|
||||||
return [[title, nmatch[0]], [new_title]]
|
return [[title, nmatch[0]], [new_title]]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user