diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 461c067382..02fc98d9df 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -11,6 +11,7 @@ import sys, struct, cStringIO, os import functools import re from urlparse import urldefrag +from urllib import unquote as urlunquote from lxml import etree from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP @@ -611,6 +612,8 @@ class LitReader(object): offset, raw = u32(raw), raw[4:] internal, raw = consume_sized_utf8_string(raw) original, raw = consume_sized_utf8_string(raw) + # The path should be stored unquoted, but not always + original = urlunquote(original) # Is this last one UTF-8 or ASCIIZ? mime_type, raw = consume_sized_utf8_string(raw, zpad=True) self.manifest[internal] = ManifestItem( diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2bc898748d..1510cb6c32 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -331,6 +331,13 @@ class Manifest(object): def _force_xhtml(self, data): if self.oeb.encoding is not None: data = data.decode(self.oeb.encoding, 'replace') + # Handle broken XHTML w/ SVG (ugh) + if 'svg:' in data and SVG_NS not in data: + data = data.replace( + ' element' % self.href) + head = etree.Element(XHTML('head')) + data.insert(0, head) + title = etree.SubElement(head, XHTML('title')) + title.text = self.oeb.translate(__('Unknown')) + elif not xpath(data, '/h:html/h:head/h:title'): + self.oeb.logger.warn( + 'File %r missing element' % self.href) + title = etree.SubElement(head, XHTML('title')) + title.text = self.oeb.translate(__('Unknown')) + if not xpath(data, '/h:html/h:body'): + self.oeb.logger.warn( + 'File %r missing <body/> element' % self.href) + etree.SubElement(data, XHTML('body')) return data def data(): diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index c2d12f317e..29c6c5b2b4 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -110,7 +110,8 @@ class Stylizer(object): def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']): self.profile = profile - base = os.path.dirname(path) + self.logger = oeb.logger + item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [HTML_CSS_STYLESHEET] @@ -128,8 +129,12 @@ class Stylizer(object): and elem.get('rel', 'stylesheet') == 'stylesheet' \ and elem.get('type', CSS_MIME) in OEB_STYLES: href = urlnormalize(elem.attrib['href']) - path = os.path.join(base, href) - path = os.path.normpath(path).replace('\\', '/') + path = item.abshref(href) + if path not in oeb.manifest.hrefs: + self.logger.warn( + 'Stylesheet %r referenced by file %r not in manifest' % + (path, item.href)) + continue if path in self.STYLESHEETS: stylesheet = self.STYLESHEETS[path] else: diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index b150a12831..bc95b43343 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -13,6 +13,7 @@ from urlparse import urldefrag from lxml import etree import cssutils from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS +from calibre.ebooks.oeb.base import urlnormalize LINK_SELECTORS = [] for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data', @@ -46,7 +47,7 @@ class ManifestTrimmer(object): item.data is not None: hrefs = [sel(item.data) for sel in LINK_SELECTORS] for href in chain(*hrefs): - href = item.abshref(href) + href = item.abshref(urlnormalize(href)) if href in oeb.manifest.hrefs: found = oeb.manifest.hrefs[href] if found not in used: