diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 286fad1aaa..96bd303933 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -294,8 +294,8 @@ class PreProcessor(object):
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
# blank paragraphs then delete blank lines to clean up spacing
linereg = re.compile('(?<=
)', re.IGNORECASE|re.DOTALL)
- blankreg = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
- #multi_blank = re.compile(r'(\s*]*>\s*(<(b|i|u)>)?\s*((b|i|u)>)?\s*
){2,}', re.IGNORECASE)
+ blankreg = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
+ multi_blank = re.compile(r'(\s*]*>\s*
){2,}', re.IGNORECASE)
blanklines = blankreg.findall(html)
lines = linereg.findall(html)
blanks_between_paragraphs = False
@@ -303,11 +303,8 @@ class PreProcessor(object):
if len(lines) > 1:
self.log("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
- if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
- 'delete_blank_paragraphs', False):
- self.log("deleting blank lines")
- html = blankreg.sub('', html)
- elif float(len(blanklines)) / float(len(lines)) > 0.40:
+
+ if float(len(blanklines)) / float(len(lines)) > 0.40:
blanks_between_paragraphs = True
print "blanks between paragraphs is marked True"
else:
@@ -319,7 +316,12 @@ class PreProcessor(object):
html = self.markup_chapters(html, totalwords, blanks_between_paragraphs)
-
+ if blanks_between_paragraphs and getattr(self.extra_opts,
+ 'delete_blank_paragraphs', False):
+ self.log("deleting blank lines")
+ html = multi_blank.sub('\n
', html)
+ html = blankreg.sub('', html)
+
###### Unwrap lines ######
#
# Some OCR sourced files have line breaks in the html using a combination of span & p tags