mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input: Handle files that have spurious closing </body> and/or </html> tags in their markup. Fixes #925833 (prc file fails to read or convert)
This commit is contained in:
parent
5f4f7b4c09
commit
b56891cdae
@ -516,6 +516,17 @@ class MobiReader(object):
|
||||
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||
bods = htmls = 0
|
||||
for x in re.finditer(ur'</body>|</html>', self.processed_html):
|
||||
if x == '</body>': bods +=1
|
||||
else: htmls += 1
|
||||
if bods > 1 and htmls > 1:
|
||||
break
|
||||
if bods > 1:
|
||||
self.processed_html = self.processed_html.replace('</body>', '')
|
||||
if htmls > 1:
|
||||
self.processed_html = self.processed_html.replace('</html>', '')
|
||||
|
||||
|
||||
|
||||
def remove_random_bytes(self, html):
|
||||
|
Loading…
x
Reference in New Issue
Block a user