diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index d0dc81405b..74afbe7a42 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -159,7 +159,7 @@ class HeuristicProcessor(object):
]
for word in ITALICIZE_WORDS:
- html = re.sub(r'(?<=\s|>)' + word + r'(?=\s|<)', '%s' % word, html)
+ html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '%s' % word, html)
for pat in ITALICIZE_STYLE_PATS:
html = re.sub(pat, lambda mo: '%s' % mo.group('words'), html)
@@ -375,8 +375,8 @@ class HeuristicProcessor(object):
html = re.sub(ur'\s*
/]*/>', '
', html) # Get rid of empty span, bold, font, em, & italics tags html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*(font|[ibu]|em|strong)>\s*){0,2}\s*(font|[ibu]|em|strong)>", " ", html) @@ -463,7 +463,6 @@ class HeuristicProcessor(object): def __call__(self, html): self.log.debug("********* Heuristic processing HTML *********") - # Count the words in the document to estimate how many chapters to look for and whether # other types of processing are attempted try: @@ -477,7 +476,7 @@ class HeuristicProcessor(object): # Arrange line feeds and tags so the line_length and no_markup functions work correctly html = self.arrange_htm_line_endings(html) - + self.dump(html, 'after_arrange_line_endings') if self.cleanup_required(): ###### Check Markup ###### # @@ -580,7 +579,9 @@ class HeuristicProcessor(object): if blanks_count >= 1: html = self.merge_blanks(html, blanks_count) # Center separator lines, use a bit larger margin in this case - html = re.sub(u'<(?P
' + '\g
' + '\g
]*>\s*
', '