mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Heuristic processing: Fix the italicize common patterns algorithm breaking on some HTML markup. Fixes #922317 (Private bug)
This commit is contained in:
parent
565fc2d479
commit
bf034f4c5b
@ -157,7 +157,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
ITALICIZE_STYLE_PATS = [
|
ITALICIZE_STYLE_PATS = [
|
||||||
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
|
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
|
||||||
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
|
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
|
||||||
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
||||||
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
||||||
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
||||||
@ -172,8 +172,11 @@ class HeuristicProcessor(object):
|
|||||||
for word in ITALICIZE_WORDS:
|
for word in ITALICIZE_WORDS:
|
||||||
html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
|
html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
|
||||||
|
|
||||||
|
def sub(mo):
|
||||||
|
return '<i>%s</i>'%mo.group('words')
|
||||||
|
|
||||||
for pat in ITALICIZE_STYLE_PATS:
|
for pat in ITALICIZE_STYLE_PATS:
|
||||||
html = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), html)
|
html = re.sub(pat, sub, html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user