From b33bfe2e43ad45b547ca1382f8e40a154d74c3ad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 10 Feb 2010 15:31:11 -0700 Subject: [PATCH] HTML Input: Handle HTML fragments more gracefully. Fixes #4854 (Imported HTML fragments get converted to ZIPs containing no HTML) --- src/calibre/ebooks/html/input.py | 2 +- src/calibre/ebooks/oeb/base.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 8d33023e43..e15454207f 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -111,7 +111,7 @@ class HTMLFile(object): raise IOError(msg) raise IgnoreFile(msg, err.errno) - self.is_binary = not bool(self.HTML_PAT.search(src[:4096])) + self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096])) if not self.is_binary: if encoding is None: encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1] diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 7c2efe20ef..b885f08962 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -851,8 +851,10 @@ class Manifest(object): self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href) nroot = etree.fromstring('') parent = nroot[0] - for child in list(data): - child.getparent().remove(child) + for child in list(data.iter()): + oparent = child.getparent() + if oparent is not None: + oparent.remove(child) parent.append(child) data = nroot