From b33bfe2e43ad45b547ca1382f8e40a154d74c3ad Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 10 Feb 2010 15:31:11 -0700
Subject: [PATCH] HTML Input: Handle HTML fragments more gracefully. Fixes
 #4854 (Imported HTML fragments get converted to ZIPs containing no HTML)

---
 src/calibre/ebooks/html/input.py | 2 +-
 src/calibre/ebooks/oeb/base.py   | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index 8d33023e43..e15454207f 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -111,7 +111,7 @@ class HTMLFile(object):
                 raise IOError(msg)
             raise IgnoreFile(msg, err.errno)
 
-        self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
+        self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
         if not self.is_binary:
             if encoding is None:
                 encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 7c2efe20ef..b885f08962 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -851,8 +851,10 @@ class Manifest(object):
                     self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
                     nroot = etree.fromstring('<html><body/></html>')
                     parent = nroot[0]
-                for child in list(data):
-                    child.getparent().remove(child)
+                for child in list(data.iter()):
+                    oparent = child.getparent()
+                    if oparent is not None:
+                        oparent.remove(child)
                     parent.append(child)
                 data = nroot