mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Metadata download: Publisher/series transform rules: Fix values with commas in them not working. Fixes #2098620 [series mapping rules cut off replacement at ","](https://bugs.launchpad.net/calibre/+bug/2098620)
This commit is contained in:
parent
8492dcc53a
commit
f9f593cc3b
@ -15,19 +15,23 @@ def compile_pat(pat):
|
|||||||
return regex.compile(pat, flags=REGEX_FLAGS)
|
return regex.compile(pat, flags=REGEX_FLAGS)
|
||||||
|
|
||||||
|
|
||||||
def matcher(rule):
|
def matcher(rule, separator=','):
|
||||||
import unicodedata
|
import unicodedata
|
||||||
def n(x):
|
def n(x):
|
||||||
return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace'))
|
return unicodedata.normalize('NFC', as_unicode(x or '', errors='replace'))
|
||||||
|
|
||||||
mt = rule['match_type']
|
mt = rule['match_type']
|
||||||
if mt == 'one_of':
|
if mt == 'one_of':
|
||||||
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
|
if separator:
|
||||||
return lambda x: x in tags
|
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(separator)}
|
||||||
|
return tags.__contains__
|
||||||
|
return icu_lower(n(rule['query'].strip())).__eq__
|
||||||
|
|
||||||
if mt == 'not_one_of':
|
if mt == 'not_one_of':
|
||||||
|
if separator:
|
||||||
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
|
tags = {icu_lower(n(x.strip())) for x in rule['query'].split(',')}
|
||||||
return lambda x: x not in tags
|
return lambda x: x not in tags
|
||||||
|
return icu_lower(n(rule['query'].strip())).__ne__
|
||||||
|
|
||||||
if mt == 'matches':
|
if mt == 'matches':
|
||||||
pat = compile_pat(n(rule['query']))
|
pat = compile_pat(n(rule['query']))
|
||||||
@ -44,7 +48,7 @@ def matcher(rule):
|
|||||||
return lambda x: False
|
return lambda x: False
|
||||||
|
|
||||||
|
|
||||||
def apply_rules(tag, rules):
|
def apply_rules(tag, rules, separator=','):
|
||||||
ans = []
|
ans = []
|
||||||
tags = deque()
|
tags = deque()
|
||||||
tags.append(tag)
|
tags.append(tag)
|
||||||
@ -66,7 +70,7 @@ def apply_rules(tag, rules):
|
|||||||
tag = compile_pat(rule['query']).sub(rule['replace'], tag)
|
tag = compile_pat(rule['query']).sub(rule['replace'], tag)
|
||||||
else:
|
else:
|
||||||
tag = rule['replace']
|
tag = rule['replace']
|
||||||
if ',' in tag:
|
if separator and separator in tag:
|
||||||
replacement_tags = []
|
replacement_tags = []
|
||||||
self_added = False
|
self_added = False
|
||||||
for rtag in (x.strip() for x in tag.split(',')):
|
for rtag in (x.strip() for x in tag.split(',')):
|
||||||
@ -122,15 +126,15 @@ def uniq(vals, kmap=icu_lower):
|
|||||||
return [x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)]
|
return [x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)]
|
||||||
|
|
||||||
|
|
||||||
def map_tags(tags, rules=()):
|
def map_tags(tags, rules=(), separator=','):
|
||||||
if not tags:
|
if not tags:
|
||||||
return []
|
return []
|
||||||
if not rules:
|
if not rules:
|
||||||
return list(tags)
|
return list(tags)
|
||||||
rules = [(r, matcher(r)) for r in rules]
|
rules = [(r, matcher(r, separator)) for r in rules]
|
||||||
ans = []
|
ans = []
|
||||||
for t in tags:
|
for t in tags:
|
||||||
ans.extend(apply_rules(t, rules))
|
ans.extend(apply_rules(t, rules, separator))
|
||||||
return uniq(list(filter(None, ans)))
|
return uniq(list(filter(None, ans)))
|
||||||
|
|
||||||
|
|
||||||
|
@ -420,7 +420,7 @@ class MyBlockingBusy(QDialog): # {{{
|
|||||||
publishers_map = cache.all_field_for('publisher', self.ids)
|
publishers_map = cache.all_field_for('publisher', self.ids)
|
||||||
changed = {}
|
changed = {}
|
||||||
for book_id, publisher in publishers_map.items():
|
for book_id, publisher in publishers_map.items():
|
||||||
new_publishers = map_tags([publisher], args.publisher_map_rules)
|
new_publishers = map_tags([publisher], args.publisher_map_rules, separator='')
|
||||||
new_publisher = new_publishers[0] if new_publishers else ''
|
new_publisher = new_publishers[0] if new_publishers else ''
|
||||||
if new_publisher != publisher:
|
if new_publisher != publisher:
|
||||||
changed[book_id] = new_publisher
|
changed[book_id] = new_publisher
|
||||||
@ -433,7 +433,7 @@ class MyBlockingBusy(QDialog): # {{{
|
|||||||
series_map = cache.all_field_for('series', self.ids)
|
series_map = cache.all_field_for('series', self.ids)
|
||||||
changed = {}
|
changed = {}
|
||||||
for book_id, series in series_map.items():
|
for book_id, series in series_map.items():
|
||||||
new_series = map_tags([series], args.series_map_rules)
|
new_series = map_tags([series], args.series_map_rules, separator='')
|
||||||
new_series = new_series[0] if new_series else ''
|
new_series = new_series[0] if new_series else ''
|
||||||
if new_series != series:
|
if new_series != series:
|
||||||
changed[book_id] = new_series
|
changed[book_id] = new_series
|
||||||
|
@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase):
|
|||||||
))
|
))
|
||||||
|
|
||||||
MATCH_TYPE_MAP = OrderedDict((
|
MATCH_TYPE_MAP = OrderedDict((
|
||||||
('one_of', _('is one of')),
|
('one_of', _('is')),
|
||||||
('not_one_of', _('is not one of')),
|
('not_one_of', _('is not')),
|
||||||
('has', _('contains')),
|
('has', _('contains')),
|
||||||
('matches', _('matches regex pattern')),
|
('matches', _('matches regex pattern')),
|
||||||
('not_matches', _('does not match regex pattern')),
|
('not_matches', _('does not match regex pattern')),
|
||||||
@ -113,7 +113,7 @@ class Tester(TesterBase):
|
|||||||
|
|
||||||
def do_test(self):
|
def do_test(self):
|
||||||
publisher = self.value.strip()
|
publisher = self.value.strip()
|
||||||
ans = map_tags([publisher], self.rules)
|
ans = map_tags([publisher], self.rules, separator='')
|
||||||
self.result.setText((ans or ('',))[0])
|
self.result.setText((ans or ('',))[0])
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,8 +28,8 @@ class RuleEdit(RuleEditBase):
|
|||||||
))
|
))
|
||||||
|
|
||||||
MATCH_TYPE_MAP = OrderedDict((
|
MATCH_TYPE_MAP = OrderedDict((
|
||||||
('one_of', _('is one of')),
|
('one_of', _('is')),
|
||||||
('not_one_of', _('is not one of')),
|
('not_one_of', _('is not')),
|
||||||
('has', _('contains')),
|
('has', _('contains')),
|
||||||
('matches', _('matches regex pattern')),
|
('matches', _('matches regex pattern')),
|
||||||
('not_matches', _('does not match regex pattern')),
|
('not_matches', _('does not match regex pattern')),
|
||||||
@ -113,7 +113,7 @@ class Tester(TesterBase):
|
|||||||
|
|
||||||
def do_test(self):
|
def do_test(self):
|
||||||
series = self.value.strip()
|
series = self.value.strip()
|
||||||
ans = map_tags([series], self.rules)
|
ans = map_tags([series], self.rules, separator='')
|
||||||
self.result.setText((ans or ('',))[0])
|
self.result.setText((ans or ('',))[0])
|
||||||
|
|
||||||
|
|
||||||
@ -130,7 +130,7 @@ if __name__ == '__main__':
|
|||||||
app = Application([])
|
app = Application([])
|
||||||
d = RulesDialog()
|
d = RulesDialog()
|
||||||
d.rules = [
|
d.rules = [
|
||||||
{'action':'replace', 'query':'alice Bob', 'match_type':'one_of', 'replace':'Alice Bob'},
|
{'action':'replace', 'query':'alice Bob, moose', 'match_type':'one_of', 'replace':'Alice Bob, Moose'},
|
||||||
]
|
]
|
||||||
d.exec()
|
d.exec()
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
Loading…
x
Reference in New Issue
Block a user