diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index aabb1b8bc4..14eca46b07 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -367,6 +367,8 @@ class HeuristicProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Delete microsoft 'smart' tags html = re.sub('(?i)', '', html) + # Delete self closing paragraph tags + html = re.sub('', '', html) # Get rid of empty span, bold, font, em, & italics tags html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*\s*){0,2}\s*", " ", html) diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 6ec986f26a..7f3ff21fe0 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -587,11 +587,11 @@ TXT input supports a number of options to differentiate how paragraphs are detec Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when the next line that starts with an indent is reached:: - This is the + This is the first. - This is the second. + This is the second. - This is the + This is the third. :guilabel:`Paragraph Style: Unformatted` @@ -603,7 +603,7 @@ TXT input supports a number of options to differentiate how paragraphs are detec formatting will be applied. :guilabel:`Formatting Style: Heuristic` - Analyses the document for common chapter headings, scene breaks, and italicized words and applies the + Analyzes the document for common chapter headings, scene breaks, and italicized words and applies the appropriate html markup during conversion. :guilabel:`Formatting Style: Markdown`