diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 15522d25e6..d9e5246223 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -23,6 +23,7 @@ class HeuristicProcessor(object): self.min_chapters = 1 self.chapters_no_title = 0 self.chapters_with_title = 0 + self.blanks_deleted = False self.linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL) self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*]*>\s*

){2,}', re.IGNORECASE) @@ -422,6 +423,7 @@ class HeuristicProcessor(object): # blank paragraphs then delete blank lines to clean up spacing if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): self.log.debug("deleting blank lines") + self.blanks_deleted = True html = self.multi_blank.sub('\n

', html) html = self.blankreg.sub('', html) @@ -479,6 +481,9 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'format_scene_breaks', False): # Center separator lines html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•]+\s*)+)\s*()?\s*()?\s*()?\s*', '

' + '\g' + '

', html) + if not self.blanks_deleted: + html = self.multi_blank.sub('\n

', html) + html = re.sub(']*>\s*

', '

', html) if self.deleted_nbsps: # put back non-breaking spaces in empty paragraphs to preserve original formatting diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 3383708b72..96a8e30e3c 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -530,17 +530,18 @@ more blank lines are a paragraph boundary:: TXT input supports a number of options to differentiate how paragraphs are detected. - :guilabel:`Auto` - Analyzes the text file and attempts to determine how paragraphs are defined. + :guilabel:`Paragraph Style: Auto` + Analyzes the text file and attempts to automatically determine how paragraphs are defined. This + option will generally work fine, if you achieve undesirable results try one of the manual options. - :guilabel:`Treat each line as a paragraph` + :guilabel:`Paragraph Style: Single` Assumes that every line is a paragraph:: This is the first. This is the second. This is the third. - :guilabel:`Assume print formatting` + :guilabel:`Paragraph Style: Print` Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when the next line that starts with an indent is reached:: @@ -551,11 +552,13 @@ TXT input supports a number of options to differentiate how paragraphs are detec This is the third. - :guilabel:`Unformatted` + :guilabel:`Paragraph Style: Unformatted` Assumes that the document has no formatting, but does use hard line breaks. Punctuation and median line length are used to attempt to re-create paragraphs. - :guilabel:`Process using Textile` + :guilabel:`Formatting Style: Auto` + + :guilabel:`Formatting Style: Heuristic` :guilabel:`Process using markdown` |app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown