mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
had to include heading tags with the previous change, because lxml/beautifulsoup were equally unhappy when heading tags contained paragraph tags
This commit is contained in:
parent
0013d4e059
commit
38e08a7bd3
@ -477,8 +477,8 @@ class MobiReader(object):
|
||||
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
|
||||
self.processed_html = re.sub(r'<?xml[^>]*>', '', self.processed_html)
|
||||
# Rearrange various style tags & paragraph tags so that lxml and BeautifulSoup don't get upset
|
||||
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||
|
||||
def remove_random_bytes(self, html):
|
||||
return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08',
|
||||
|
Loading…
x
Reference in New Issue
Block a user