diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 667dab0c21..811787d78e 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -96,12 +96,18 @@ class HTMLConverter(object): # remove

tags from within tags (re.compile(r'(.*?)', re.DOTALL|re.IGNORECASE), lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())), + + # Replace common line break patterns with line breaks + (re.compile(r'

( |\s)*

', re.IGNORECASE), lambda m: '
'), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), # Remove comments from within style tags as they can mess up BeatifulSoup (re.compile(r'()', re.IGNORECASE|re.DOTALL), strip_style_comments), + + ] # Fix Baen markup BAEN = [