diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index b80f05ef60..f14e153057 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -335,7 +335,7 @@ class PreProcessor(object): # Fix pdftohtml markup PDFTOHTML = [ # Remove
tags - (re.compile(r'', re.IGNORECASE), lambda match: ' '), + (re.compile(r'', re.IGNORECASE), lambda match: '
'), # Remove page numbers (re.compile(r'\d+
', re.IGNORECASE), lambda match: ''), # Remove
and replace

with

diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 292ae0b50b..b301854684 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -122,7 +122,7 @@ class HTMLConverter(object, LoggingInterface): # Fix pdftohtml markup PDFTOHTML = [ # Remove


tags - (re.compile(r'', re.IGNORECASE), lambda match: ' '), + (re.compile(r'', re.IGNORECASE), lambda match: '
'), # Remove page numbers (re.compile(r'\d+
', re.IGNORECASE), lambda match: ''), # Remove
and replace

with