Metadata download: Publisher/series transform rules: Fix values with commas in them not working. Fixes #2098620 [series mapping rules cut off replacement at ","](https://bugs.launchpad.net/calibre/+bug/2098620)

This commit is contained in:
Kovid Goyal 2025-02-17 20:43:44 +05:30
parent 8492dcc53a
commit f9f593cc3b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 23 additions and 19 deletions

View File

@ -15,19 +15,23 @@ def compile_pat(pat):
return regex.compile(pat, flags=REGEX_FLAGS) return regex.compile(pat, flags=REGEX_FLAGS)
def matcher(rule): def matcher(rule, separator=','):
import unicodedata import unicodedata
def n(x): def n(x):
return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace')) return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace'))
mt = rule['match_type'] mt = rule['match_type']
if mt == 'one_of': if mt == 'one_of':
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')} if separator:
return lambda x: x in tags tags = {icu_lower(n(x.strip())) for x in rule['query'].split(separator)}
return tags.__contains__
return icu_lower(n(rule['query'].strip())).__eq__
if mt == 'not_one_of': if mt == 'not_one_of':
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')} if separator:
return lambda x: x not in tags tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
return lambda x: x not in tags
return icu_lower(n(rule['query'].strip())).__ne__
if mt == 'matches': if mt == 'matches':
pat = compile_pat(n(rule['query'])) pat = compile_pat(n(rule['query']))
@ -44,7 +48,7 @@ def matcher(rule):
return lambda x: False return lambda x: False
def apply_rules(tag, rules): def apply_rules(tag, rules, separator=','):
ans = [] ans = []
tags = deque() tags = deque()
tags.append(tag) tags.append(tag)
@ -66,7 +70,7 @@ def apply_rules(tag, rules):
tag = compile_pat(rule['query']).sub(rule['replace'], tag) tag = compile_pat(rule['query']).sub(rule['replace'], tag)
else: else:
tag = rule['replace'] tag = rule['replace']
if ',' in tag: if separator and separator in tag:
replacement_tags = [] replacement_tags = []
self_added = False self_added = False
for rtag in (x.strip() for x in tag.split(',')): for rtag in (x.strip() for x in tag.split(',')):
@ -122,15 +126,15 @@ def uniq(vals, kmap=icu_lower):
return [x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)] return [x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)]
def map_tags(tags, rules=()): def map_tags(tags, rules=(), separator=','):
if not tags: if not tags:
return [] return []
if not rules: if not rules:
return list(tags) return list(tags)
rules = [(r, matcher(r)) for r in rules] rules = [(r, matcher(r, separator)) for r in rules]
ans = [] ans = []
for t in tags: for t in tags:
ans.extend(apply_rules(t, rules)) ans.extend(apply_rules(t, rules, separator))
return uniq(list(filter(None, ans))) return uniq(list(filter(None, ans)))

View File

@ -420,7 +420,7 @@ class MyBlockingBusy(QDialog): # {{{
publishers_map = cache.all_field_for('publisher', self.ids) publishers_map = cache.all_field_for('publisher', self.ids)
changed = {} changed = {}
for book_id, publisher in publishers_map.items(): for book_id, publisher in publishers_map.items():
new_publishers = map_tags([publisher], args.publisher_map_rules) new_publishers = map_tags([publisher], args.publisher_map_rules, separator='')
new_publisher = new_publishers[0] if new_publishers else '' new_publisher = new_publishers[0] if new_publishers else ''
if new_publisher != publisher: if new_publisher != publisher:
changed[book_id] = new_publisher changed[book_id] = new_publisher
@ -433,7 +433,7 @@ class MyBlockingBusy(QDialog): # {{{
series_map = cache.all_field_for('series', self.ids) series_map = cache.all_field_for('series', self.ids)
changed = {} changed = {}
for book_id, series in series_map.items(): for book_id, series in series_map.items():
new_series = map_tags([series], args.series_map_rules) new_series = map_tags([series], args.series_map_rules, separator='')
new_series = new_series[0] if new_series else '' new_series = new_series[0] if new_series else ''
if new_series != series: if new_series != series:
changed[book_id] = new_series changed[book_id] = new_series

View File

@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase):
)) ))
MATCH_TYPE_MAP = OrderedDict(( MATCH_TYPE_MAP = OrderedDict((
('one_of', _('is one of')), ('one_of', _('is')),
('not_one_of', _('is not one of')), ('not_one_of', _('is not')),
('has', _('contains')), ('has', _('contains')),
('matches', _('matches regex pattern')), ('matches', _('matches regex pattern')),
('not_matches', _('does not match regex pattern')), ('not_matches', _('does not match regex pattern')),
@ -113,7 +113,7 @@ class Tester(TesterBase):
def do_test(self): def do_test(self):
publisher = self.value.strip() publisher = self.value.strip()
ans = map_tags([publisher], self.rules) ans = map_tags([publisher], self.rules, separator='')
self.result.setText((ans or ('',))[0]) self.result.setText((ans or ('',))[0])

View File

@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase):
)) ))
MATCH_TYPE_MAP = OrderedDict(( MATCH_TYPE_MAP = OrderedDict((
('one_of', _('is one of')), ('one_of', _('is')),
('not_one_of', _('is not one of')), ('not_one_of', _('is not')),
('has', _('contains')), ('has', _('contains')),
('matches', _('matches regex pattern')), ('matches', _('matches regex pattern')),
('not_matches', _('does not match regex pattern')), ('not_matches', _('does not match regex pattern')),
@ -113,7 +113,7 @@ class Tester(TesterBase):
def do_test(self): def do_test(self):
series = self.value.strip() series = self.value.strip()
ans = map_tags([series], self.rules) ans = map_tags([series], self.rules, separator='')
self.result.setText((ans or ('',))[0]) self.result.setText((ans or ('',))[0])
@ -130,7 +130,7 @@ if __name__ == '__main__':
app = Application([]) app = Application([])
d = RulesDialog() d = RulesDialog()
d.rules = [ d.rules = [
{'action':'replace', 'query':'alice Bob', 'match_type':'one_of', 'replace':'Alice Bob'}, {'action':'replace', 'query':'alice Bob, moose', 'match_type':'one_of', 'replace':'Alice Bob, Moose'},
] ]
d.exec() d.exec()
from pprint import pprint from pprint import pprint