diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 15522d25e6..d9e5246223 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -23,6 +23,7 @@ class HeuristicProcessor(object):
self.min_chapters = 1
self.chapters_no_title = 0
self.chapters_with_title = 0
+ self.blanks_deleted = False
self.linereg = re.compile('(?<=
)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*]*>\s*
){2,}', re.IGNORECASE)
@@ -422,6 +423,7 @@ class HeuristicProcessor(object):
# blank paragraphs then delete blank lines to clean up spacing
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines")
+ self.blanks_deleted = True
html = self.multi_blank.sub('\n
', html)
html = self.blankreg.sub('', html)
@@ -479,6 +481,9 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'format_scene_breaks', False):
# Center separator lines
html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•]+\s*)+)\s*((?P=inner3)>)?\s*((?P=inner2)>)?\s*((?P=inner1)>)?\s*(?P=outer)>', '' + '\g' + '
', html)
+ if not self.blanks_deleted:
+ html = self.multi_blank.sub('\n
', html)
+ html = re.sub(']*>\s*
', '
', html)
if self.deleted_nbsps:
# put back non-breaking spaces in empty paragraphs to preserve original formatting
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index 3383708b72..96a8e30e3c 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -530,17 +530,18 @@ more blank lines are a paragraph boundary::
TXT input supports a number of options to differentiate how paragraphs are detected.
- :guilabel:`Auto`
- Analyzes the text file and attempts to determine how paragraphs are defined.
+ :guilabel:`Paragraph Style: Auto`
+ Analyzes the text file and attempts to automatically determine how paragraphs are defined. This
+ option will generally work fine, if you achieve undesirable results try one of the manual options.
- :guilabel:`Treat each line as a paragraph`
+ :guilabel:`Paragraph Style: Single`
Assumes that every line is a paragraph::
This is the first.
This is the second.
This is the third.
- :guilabel:`Assume print formatting`
+ :guilabel:`Paragraph Style: Print`
Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when
the next line that starts with an indent is reached::
@@ -551,11 +552,13 @@ TXT input supports a number of options to differentiate how paragraphs are detec
This is the
third.
- :guilabel:`Unformatted`
+ :guilabel:`Paragraph Style: Unformatted`
Assumes that the document has no formatting, but does use hard line breaks. Punctuation
and median line length are used to attempt to re-create paragraphs.
- :guilabel:`Process using Textile`
+ :guilabel:`Formatting Style: Auto`
+
+ :guilabel:`Formatting Style: Heuristic`
:guilabel:`Process using markdown`
|app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown