From bf034f4c5bd937a5894d4097a3b59eca716d7b2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 28 Jan 2012 16:43:25 +0530 Subject: [PATCH] Heuristic processing: Fix the italicize common patterns algorithm breaking on some HTML markup. Fixes #922317 (Private bug) --- src/calibre/ebooks/conversion/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index cbc8b41529..91141af1d1 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -157,7 +157,7 @@ class HeuristicProcessor(object): ITALICIZE_STYLE_PATS = [ ur'(?msu)(?<=[\s>"“\'‘])_(?P[^_]+)_', - ur'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*>]+)/', + ur'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*><]+)/', ur'(?msu)(?<=[\s>"“\'‘])~~(?P[^~]+)~~', ur'(?msu)(?<=[\s>"“\'‘])\*(?P[^\*]+)\*', ur'(?msu)(?<=[\s>"“\'‘])~(?P[^~]+)~', @@ -172,8 +172,11 @@ class HeuristicProcessor(object): for word in ITALICIZE_WORDS: html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '%s' % word, html) + def sub(mo): + return '%s'%mo.group('words') + for pat in ITALICIZE_STYLE_PATS: - html = re.sub(pat, lambda mo: '%s' % mo.group('words'), html) + html = re.sub(pat, sub, html) return html