Tag mapper: Ensure tag rules are unicode normalized before matching

This commit is contained in:
Kovid Goyal 2023-03-21 15:46:27 +05:30
parent 5ab7c66363
commit b55dd98bce
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -5,6 +5,7 @@
from collections import deque from collections import deque
from calibre.utils.icu import lower as icu_lower, upper as icu_upper from calibre.utils.icu import lower as icu_lower, upper as icu_upper
from polyglot.builtins import as_unicode
def compile_pat(pat): def compile_pat(pat):
@ -14,25 +15,29 @@ def compile_pat(pat):
def matcher(rule): def matcher(rule):
import unicodedata
def n(x):
return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace'))
mt = rule['match_type'] mt = rule['match_type']
if mt == 'one_of': if mt == 'one_of':
tags = {icu_lower(x.strip()) for x in rule['query'].split(',')} tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
return lambda x: x in tags return lambda x: x in tags
if mt == 'not_one_of': if mt == 'not_one_of':
tags = {icu_lower(x.strip()) for x in rule['query'].split(',')} tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
return lambda x: x not in tags return lambda x: x not in tags
if mt == 'matches': if mt == 'matches':
pat = compile_pat(rule['query']) pat = compile_pat(n(rule['query']))
return lambda x: pat.match(x) is not None return lambda x: pat.match(x) is not None
if mt == 'not_matches': if mt == 'not_matches':
pat = compile_pat(rule['query']) pat = compile_pat(n(rule['query']))
return lambda x: pat.match(x) is None return lambda x: pat.match(x) is None
if mt == 'has': if mt == 'has':
s = icu_lower(rule['query']) s = icu_lower(n(rule['query']))
return lambda x: s in x return lambda x: s in x
return lambda x: False return lambda x: False