MOBI Input: Handle files that have extra closing <html> tags sprinkled throughtout their markup. Fixes #925833 (prc file fails to read or convert)

This commit is contained in:
Kovid Goyal 2012-02-03 09:18:31 +05:30
parent 6062bc3eac
commit b8d0e1a6b0

View File

@ -364,7 +364,10 @@ class MobiReader(object):
self.processed_html = self.remove_random_bytes(self.processed_html) self.processed_html = self.remove_random_bytes(self.processed_html)
root = fromstring(self.processed_html) root = fromstring(self.processed_html)
if len(root.xpath('body/descendant::*')) < 1: if len(root.xpath('body/descendant::*')) < 1:
raise Exception('Failed to parse the markup in this MOBI file') # There are probably stray </html>s in the markup
self.processed_html = self.processed_html.replace('</html>',
'')
root = fromstring(self.processed_html)
if root.tag != 'html': if root.tag != 'html':
self.log.warn('File does not have opening <html> tag') self.log.warn('File does not have opening <html> tag')