Remove non letters from titles for xem lookup

This commit is contained in:
Zoe Roux 2024-02-05 22:36:57 +01:00
parent e772a798f7
commit a055dfac5b
2 changed files with 9 additions and 2 deletions

View File

@ -1,3 +1,4 @@
import re
import logging
from typing import Dict, List, Literal
from aiohttp import ClientSession
@ -133,7 +134,9 @@ class TheXem:
titles = []
def clean(s: str):
return s.lower().replace(" ", "")
s = s.lower()
s = re.sub(r"\([^)]*\)", "", s) # remove content of () (guessit does not allow them as part of a name)
return re.sub(r"[\W_]+", "", s) # remove non alphanum content (it does keep non us chars like kanjis or accents)
for x in map.values():
# Only the first element is a string (the show name) so we need to ignore the type hint

View File

@ -3,6 +3,7 @@
from typing import Any, List, Optional, cast
from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS
from rebulk.match import Matches, Match
import re
from copy import copy
@ -249,5 +250,8 @@ class XemFixup(Rule):
new_title.end = nmatch[0].end
new_title.value = f"{title.value}{hole}{nmatch[0].value}"
if new_title.value.lower().replace(" ", "") in context["xem_titles"]:
def clean(s: str):
return re.sub(r"[\W_]+", "", s.lower())
if clean(new_title.value) in context["xem_titles"]:
return [[title, nmatch[0]], [new_title]]