From f9f593cc3b6319f5505046e45f10a2e1f4812e62 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Feb 2025 20:43:44 +0530 Subject: [PATCH] Metadata download: Publisher/series transform rules: Fix values with commas in them not working. Fixes #2098620 [series mapping rules cut off replacement at ","](https://bugs.launchpad.net/calibre/+bug/2098620) --- src/calibre/ebooks/metadata/tag_mapper.py | 24 +++++++++++++---------- src/calibre/gui2/dialogs/metadata_bulk.py | 4 ++-- src/calibre/gui2/publisher_mapper.py | 6 +++--- src/calibre/gui2/series_mapper.py | 8 ++++---- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/calibre/ebooks/metadata/tag_mapper.py b/src/calibre/ebooks/metadata/tag_mapper.py index 84e3997e55..bbe4e455bd 100644 --- a/src/calibre/ebooks/metadata/tag_mapper.py +++ b/src/calibre/ebooks/metadata/tag_mapper.py @@ -15,19 +15,23 @@ def compile_pat(pat): return regex.compile(pat, flags=REGEX_FLAGS) -def matcher(rule): +def matcher(rule, separator=','): import unicodedata def n(x): return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace')) mt = rule['match_type'] if mt == 'one_of': - tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')} - return lambda x: x in tags + if separator: + tags = {icu_lower(n(x.strip())) for x in rule['query'].split(separator)} + return tags.__contains__ + return icu_lower(n(rule['query'].strip())).__eq__ if mt == 'not_one_of': - tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')} - return lambda x: x not in tags + if separator: + tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')} + return lambda x: x not in tags + return icu_lower(n(rule['query'].strip())).__ne__ if mt == 'matches': pat = compile_pat(n(rule['query'])) @@ -44,7 +48,7 @@ def matcher(rule): return lambda x: False -def apply_rules(tag, rules): +def apply_rules(tag, rules, separator=','): ans = [] tags = deque() tags.append(tag) @@ -66,7 +70,7 @@ def apply_rules(tag, rules): tag = compile_pat(rule['query']).sub(rule['replace'], tag) else: tag = rule['replace'] - if ',' in tag: + if separator and separator in tag: replacement_tags = [] self_added = False for rtag in (x.strip() for x in tag.split(',')): @@ -122,15 +126,15 @@ def uniq(vals, kmap=icu_lower): return [x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)] -def map_tags(tags, rules=()): +def map_tags(tags, rules=(), separator=','): if not tags: return [] if not rules: return list(tags) - rules = [(r, matcher(r)) for r in rules] + rules = [(r, matcher(r, separator)) for r in rules] ans = [] for t in tags: - ans.extend(apply_rules(t, rules)) + ans.extend(apply_rules(t, rules, separator)) return uniq(list(filter(None, ans))) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 0e5735b692..9a84be92d5 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -420,7 +420,7 @@ class MyBlockingBusy(QDialog): # {{{ publishers_map = cache.all_field_for('publisher', self.ids) changed = {} for book_id, publisher in publishers_map.items(): - new_publishers = map_tags([publisher], args.publisher_map_rules) + new_publishers = map_tags([publisher], args.publisher_map_rules, separator='') new_publisher = new_publishers[0] if new_publishers else '' if new_publisher != publisher: changed[book_id] = new_publisher @@ -433,7 +433,7 @@ class MyBlockingBusy(QDialog): # {{{ series_map = cache.all_field_for('series', self.ids) changed = {} for book_id, series in series_map.items(): - new_series = map_tags([series], args.series_map_rules) + new_series = map_tags([series], args.series_map_rules, separator='') new_series = new_series[0] if new_series else '' if new_series != series: changed[book_id] = new_series diff --git a/src/calibre/gui2/publisher_mapper.py b/src/calibre/gui2/publisher_mapper.py index d0403894cf..a9dd28848a 100644 --- a/src/calibre/gui2/publisher_mapper.py +++ b/src/calibre/gui2/publisher_mapper.py @@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase): )) MATCH_TYPE_MAP = OrderedDict(( - ('one_of', _('is one of')), - ('not_one_of', _('is not one of')), + ('one_of', _('is')), + ('not_one_of', _('is not')), ('has', _('contains')), ('matches', _('matches regex pattern')), ('not_matches', _('does not match regex pattern')), @@ -113,7 +113,7 @@ class Tester(TesterBase): def do_test(self): publisher = self.value.strip() - ans = map_tags([publisher], self.rules) + ans = map_tags([publisher], self.rules, separator='') self.result.setText((ans or ('',))[0]) diff --git a/src/calibre/gui2/series_mapper.py b/src/calibre/gui2/series_mapper.py index 39dc0ae61c..dc576af42f 100644 --- a/src/calibre/gui2/series_mapper.py +++ b/src/calibre/gui2/series_mapper.py @@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase): )) MATCH_TYPE_MAP = OrderedDict(( - ('one_of', _('is one of')), - ('not_one_of', _('is not one of')), + ('one_of', _('is')), + ('not_one_of', _('is not')), ('has', _('contains')), ('matches', _('matches regex pattern')), ('not_matches', _('does not match regex pattern')), @@ -113,7 +113,7 @@ class Tester(TesterBase): def do_test(self): series = self.value.strip() - ans = map_tags([series], self.rules) + ans = map_tags([series], self.rules, separator='') self.result.setText((ans or ('',))[0]) @@ -130,7 +130,7 @@ if __name__ == '__main__': app = Application([]) d = RulesDialog() d.rules = [ - {'action':'replace', 'query':'alice Bob', 'match_type':'one_of', 'replace':'Alice Bob'}, + {'action':'replace', 'query':'alice Bob, moose', 'match_type':'one_of', 'replace':'Alice Bob, Moose'}, ] d.exec() from pprint import pprint