From 0350cd79e34cf2af2a591933cd4850560250d67d Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift"
Date: Mon, 26 Jan 2009 08:47:58 -0500
Subject: [PATCH] Fix #1649 (2). Yet more handling for broken (X)HTML.
---
src/calibre/ebooks/oeb/base.py | 18 ++++++++++++++++++
.../ebooks/oeb/transforms/trimmanifest.py | 3 ++-
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 163ac34cef..1510cb6c32 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -350,6 +350,24 @@ class Manifest(object):
data = etree.fromstring(data)
for meta in self.META_XP(data):
meta.getparent().remove(meta)
+ head = xpath(data, '/h:html/h:head')
+ head = head[0] if head else None
+ if head is None:
+ self.oeb.logger.warn(
+ 'File %r missing element' % self.href)
+ head = etree.Element(XHTML('head'))
+ data.insert(0, head)
+ title = etree.SubElement(head, XHTML('title'))
+ title.text = self.oeb.translate(__('Unknown'))
+ elif not xpath(data, '/h:html/h:head/h:title'):
+ self.oeb.logger.warn(
+ 'File %r missing
element' % self.href)
+ title = etree.SubElement(head, XHTML('title'))
+ title.text = self.oeb.translate(__('Unknown'))
+ if not xpath(data, '/h:html/h:body'):
+ self.oeb.logger.warn(
+ 'File %r missing