diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 7449a74ba9..35afb492b6 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -436,12 +436,12 @@ class HeuristicProcessor(object): # Re-open self closing paragraph tags html = re.sub('
/]*/>', '
', html) # Get rid of empty span, bold, font, em, & italics tags - html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) - html = re.sub( - r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*(font|[ibu]|em|strong)>\s*){0,2}\s*(font|[ibu]|em|strong)>", " ", html) - html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) - html = re.sub( - r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*(font|[ibu]|em|strong)>\s*){0,2}\s*(font|[ibu]|em|strong)>", " ", html) + fmt_tags = 'font|[ibu]|em|strong' + open_fmt_pat, close_fmt_pat = r'<(?:{})(?:\s[^>]*)?>'.format(fmt_tags), '(?:{})>'.format(fmt_tags) + for i in range(2): + html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) + html = re.sub( + r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html) # delete surrounding divs from empty paragraphs html = re.sub('
]*>\s*
\s*', html) # Empty heading tags