diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index edd4d54cba..63eca10714 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -28,8 +28,8 @@ class HeuristicProcessor(object): self.linereg = re.compile('(?<=
)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P ]*>)\s*(?P ]*>)\s*(?P ]*>\s*
]*>\s*
(\s*]*>\s*
)', re.IGNORECASE) @@ -384,6 +384,8 @@ class HeuristicProcessor(object): html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*(font|[ibu]|em|strong)>\s*){0,2}\s*(font|[ibu]|em|strong)>", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*(font|[ibu]|em|strong)>\s*){0,2}\s*(font|[ibu]|em|strong)>", " ", html) + # delete surrounding divs from empty paragraphs + html = re.sub(']*>\s*
\s*', html) # Empty heading tags html = re.sub(r'(?i)