diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 03d45a3dad..1f2df923dc 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -815,15 +815,28 @@ class Manifest(object): data = etree.fromstring(data, parser=RECOVER_PARSER) return data data = first_pass(data) - # Force into the XHTML namespace + + # Handle weird (non-HTML/fragment) files if barename(data.tag) != 'html': self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href) nroot = etree.fromstring('') + has_body = False + for child in list(data): + if barename(child.tag) == 'body': + has_body = True + break + parent = nroot + if not has_body: + self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href) + nroot = etree.fromstring('') + parent = nroot[0] for child in list(data): child.getparent().remove(child) - nroot.append(child) + parent.append(child) data = nroot - elif not namespace(data.tag): + + # Force into the XHTML namespace + if not namespace(data.tag): data.attrib['xmlns'] = XHTML_NS data = etree.tostring(data, encoding=unicode) try: