diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 163ac34cef..1510cb6c32 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -350,6 +350,24 @@ class Manifest(object): data = etree.fromstring(data) for meta in self.META_XP(data): meta.getparent().remove(meta) + head = xpath(data, '/h:html/h:head') + head = head[0] if head else None + if head is None: + self.oeb.logger.warn( + 'File %r missing element' % self.href) + head = etree.Element(XHTML('head')) + data.insert(0, head) + title = etree.SubElement(head, XHTML('title')) + title.text = self.oeb.translate(__('Unknown')) + elif not xpath(data, '/h:html/h:head/h:title'): + self.oeb.logger.warn( + 'File %r missing element' % self.href) + title = etree.SubElement(head, XHTML('title')) + title.text = self.oeb.translate(__('Unknown')) + if not xpath(data, '/h:html/h:body'): + self.oeb.logger.warn( + 'File %r missing <body/> element' % self.href) + etree.SubElement(data, XHTML('body')) return data def data(): diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index b150a12831..bc95b43343 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -13,6 +13,7 @@ from urlparse import urldefrag from lxml import etree import cssutils from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS +from calibre.ebooks.oeb.base import urlnormalize LINK_SELECTORS = [] for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data', @@ -46,7 +47,7 @@ class ManifestTrimmer(object): item.data is not None: hrefs = [sel(item.data) for sel in LINK_SELECTORS] for href in chain(*hrefs): - href = item.abshref(href) + href = item.abshref(urlnormalize(href)) if href in oeb.manifest.hrefs: found = oeb.manifest.hrefs[href] if found not in used: