Tag mapper: Fix bug when replacement tag has comma in it and the replacement rule is not the last rule

This commit is contained in:
Kovid Goyal 2015-10-17 12:25:08 +05:30
parent da8e5bffa8
commit 317d56727d

View File

@ -6,6 +6,8 @@ from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import regex import regex
from collections import deque
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.IGNORECASE | regex.UNICODE REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.IGNORECASE | regex.UNICODE
@ -31,22 +33,35 @@ def matcher(rule):
def apply_rules(tag, rules): def apply_rules(tag, rules):
for rule, matches in rules: ans = []
ltag = icu_lower(tag) tags = deque()
if matches(ltag): tags.append(tag)
ac = rule['action'] while tags:
if ac == 'remove': tag = tags.popleft()
return None for rule, matches in rules:
if ac == 'keep': ltag = icu_lower(tag)
return tag if matches(ltag):
if ac == 'replace': ac = rule['action']
if 'matches' in rule['match_type']: if ac == 'remove':
tag = regex.sub(rule['query'], rule['replace'], tag, flags=REGEX_FLAGS) break
else: if ac == 'keep':
tag = rule['replace'] ans.append(tag)
if ',' in tag: break
tag = [x.strip() for x in tag.split(',')] if ac == 'replace':
return tag if 'matches' in rule['match_type']:
tag = regex.sub(rule['query'], rule['replace'], tag, flags=REGEX_FLAGS)
else:
tag = rule['replace']
if ',' in tag:
tags.extendleft(x.strip() for x in reversed(tag.split(',')))
else:
tags.appendleft(tag)
break
else: # no rule matched, default keep
ans.append(tag)
ans.extend(tags)
return ans
def uniq(vals, kmap=icu_lower): def uniq(vals, kmap=icu_lower):
''' Remove all duplicates from vals, while preserving order. kmap must be a ''' Remove all duplicates from vals, while preserving order. kmap must be a
@ -66,8 +81,7 @@ def map_tags(tags, rules=()):
rules = [(r, matcher(r)) for r in rules] rules = [(r, matcher(r)) for r in rules]
ans = [] ans = []
for t in tags: for t in tags:
mapped = apply_rules(t, rules) ans.extend(apply_rules(t, rules))
(ans.extend if isinstance(mapped, list) else ans.append)(mapped)
return uniq(filter(None, ans)) return uniq(filter(None, ans))
def test(): def test():
@ -79,3 +93,8 @@ def test():
assert map_tags(['t1', 'x1'], rules) == ['t2', 't3', 'x1'] assert map_tags(['t1', 'x1'], rules) == ['t2', 't3', 'x1']
rules = [{'action':'replace', 'query':'(.)1', 'match_type':'matches', 'replace':r'\g<1>2,3'}] rules = [{'action':'replace', 'query':'(.)1', 'match_type':'matches', 'replace':r'\g<1>2,3'}]
assert map_tags(['t1', 'x1'], rules) == ['t2', '3', 'x2'] assert map_tags(['t1', 'x1'], rules) == ['t2', '3', 'x2']
rules = [
{'action':'replace', 'query':'t1', 'match_type':'one_of', 'replace':r't2,t3'},
{'action':'remove', 'query':'t2', 'match_type':'one_of'},
]
assert map_tags(['t1', 'x1'], rules) == ['t3', 'x1']