mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Heuristic processing: Fix the italicize common patterns algorithm breaking on some HTML markup. Fixes #922317 (Private bug)
This commit is contained in:
parent
565fc2d479
commit
bf034f4c5b
@ -157,7 +157,7 @@ class HeuristicProcessor(object):
|
||||
|
||||
ITALICIZE_STYLE_PATS = [
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
||||
@ -172,8 +172,11 @@ class HeuristicProcessor(object):
|
||||
for word in ITALICIZE_WORDS:
|
||||
html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
|
||||
|
||||
def sub(mo):
|
||||
return '<i>%s</i>'%mo.group('words')
|
||||
|
||||
for pat in ITALICIZE_STYLE_PATS:
|
||||
html = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), html)
|
||||
html = re.sub(pat, sub, html)
|
||||
|
||||
return html
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user