mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Make handling of malformed/fragmentary HTML more robust
This commit is contained in:
parent
2330c2a88e
commit
74875fff97
@ -815,15 +815,28 @@ class Manifest(object):
|
||||
data = etree.fromstring(data, parser=RECOVER_PARSER)
|
||||
return data
|
||||
data = first_pass(data)
|
||||
# Force into the XHTML namespace
|
||||
|
||||
# Handle weird (non-HTML/fragment) files
|
||||
if barename(data.tag) != 'html':
|
||||
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
|
||||
nroot = etree.fromstring('<html></html>')
|
||||
has_body = False
|
||||
for child in list(data):
|
||||
if barename(child.tag) == 'body':
|
||||
has_body = True
|
||||
break
|
||||
parent = nroot
|
||||
if not has_body:
|
||||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data):
|
||||
child.getparent().remove(child)
|
||||
nroot.append(child)
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
elif not namespace(data.tag):
|
||||
|
||||
# Force into the XHTML namespace
|
||||
if not namespace(data.tag):
|
||||
data.attrib['xmlns'] = XHTML_NS
|
||||
data = etree.tostring(data, encoding=unicode)
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user