HTML Input: Handle HTML fragments more gracefully. Fixes #4854 (Imported HTML fragments get converted to ZIPs containing no HTML)

This commit is contained in:
Kovid Goyal 2010-02-10 15:31:11 -07:00
parent a0671a64d4
commit b33bfe2e43
2 changed files with 5 additions and 3 deletions

View File

@ -111,7 +111,7 @@ class HTMLFile(object):
raise IOError(msg) raise IOError(msg)
raise IgnoreFile(msg, err.errno) raise IgnoreFile(msg, err.errno)
self.is_binary = not bool(self.HTML_PAT.search(src[:4096])) self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
if not self.is_binary: if not self.is_binary:
if encoding is None: if encoding is None:
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1] encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]

View File

@ -851,8 +851,10 @@ class Manifest(object):
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href) self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
nroot = etree.fromstring('<html><body/></html>') nroot = etree.fromstring('<html><body/></html>')
parent = nroot[0] parent = nroot[0]
for child in list(data): for child in list(data.iter()):
child.getparent().remove(child) oparent = child.getparent()
if oparent is not None:
oparent.remove(child)
parent.append(child) parent.append(child)
data = nroot data = nroot