mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Replace common line break patterns with line breaks
This commit is contained in:
parent
debc7c26de
commit
c8cce1e355
@ -96,12 +96,18 @@ class HTMLConverter(object):
|
|||||||
# remove <p> tags from within <a> tags
|
# remove <p> tags from within <a> tags
|
||||||
(re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())),
|
lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())),
|
||||||
|
|
||||||
|
# Replace common line break patterns with line breaks
|
||||||
|
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
|
||||||
# Replace entities
|
# Replace entities
|
||||||
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
||||||
exceptions=['lt', 'gt', 'amp'])),
|
exceptions=['lt', 'gt', 'amp'])),
|
||||||
# Remove comments from within style tags as they can mess up BeatifulSoup
|
# Remove comments from within style tags as they can mess up BeatifulSoup
|
||||||
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
||||||
strip_style_comments),
|
strip_style_comments),
|
||||||
|
|
||||||
|
|
||||||
]
|
]
|
||||||
# Fix Baen markup
|
# Fix Baen markup
|
||||||
BAEN = [
|
BAEN = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user