mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
HTML Input: Handle HTML fragments more gracefully. Fixes #4854 (Imported HTML fragments get converted to ZIPs containing no HTML)
This commit is contained in:
parent
a0671a64d4
commit
b33bfe2e43
@ -111,7 +111,7 @@ class HTMLFile(object):
|
||||
raise IOError(msg)
|
||||
raise IgnoreFile(msg, err.errno)
|
||||
|
||||
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
|
||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
||||
if not self.is_binary:
|
||||
if encoding is None:
|
||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||
|
@ -851,8 +851,10 @@ class Manifest(object):
|
||||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data):
|
||||
child.getparent().remove(child)
|
||||
for child in list(data.iter()):
|
||||
oparent = child.getparent()
|
||||
if oparent is not None:
|
||||
oparent.remove(child)
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user