From c5acda690f0ba76469c1f7f88aa3fce89e9d4507 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift"
Date: Sun, 25 Jan 2009 18:30:06 -0500
Subject: [PATCH 1/4] Handle missing stylesheets
---
src/calibre/ebooks/oeb/stylizer.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index c2d12f317e..29c6c5b2b4 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -110,7 +110,8 @@ class Stylizer(object):
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
self.profile = profile
- base = os.path.dirname(path)
+ self.logger = oeb.logger
+ item = oeb.manifest.hrefs[path]
basename = os.path.basename(path)
cssname = os.path.splitext(basename)[0] + '.css'
stylesheets = [HTML_CSS_STYLESHEET]
@@ -128,8 +129,12 @@ class Stylizer(object):
and elem.get('rel', 'stylesheet') == 'stylesheet' \
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = urlnormalize(elem.attrib['href'])
- path = os.path.join(base, href)
- path = os.path.normpath(path).replace('\\', '/')
+ path = item.abshref(href)
+ if path not in oeb.manifest.hrefs:
+ self.logger.warn(
+ 'Stylesheet %r referenced by file %r not in manifest' %
+ (path, item.href))
+ continue
if path in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[path]
else:
From 69a1126bf9c54a1aa8b6bb8a24b00e198981b87a Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift"
Date: Sun, 25 Jan 2009 18:30:30 -0500
Subject: [PATCH 2/4] Correct for broken (Penguin) in-line SVG
---
src/calibre/ebooks/oeb/base.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 2bc898748d..163ac34cef 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -331,6 +331,13 @@ class Manifest(object):
def _force_xhtml(self, data):
if self.oeb.encoding is not None:
data = data.decode(self.oeb.encoding, 'replace')
+ # Handle broken XHTML w/ SVG (ugh)
+ if 'svg:' in data and SVG_NS not in data:
+ data = data.replace(
+ '
Date: Mon, 26 Jan 2009 08:47:58 -0500
Subject: [PATCH 3/4] Fix #1649 (2). Yet more handling for broken (X)HTML.
---
src/calibre/ebooks/oeb/base.py | 18 ++++++++++++++++++
.../ebooks/oeb/transforms/trimmanifest.py | 3 ++-
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 163ac34cef..1510cb6c32 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -350,6 +350,24 @@ class Manifest(object):
data = etree.fromstring(data)
for meta in self.META_XP(data):
meta.getparent().remove(meta)
+ head = xpath(data, '/h:html/h:head')
+ head = head[0] if head else None
+ if head is None:
+ self.oeb.logger.warn(
+ 'File %r missing element' % self.href)
+ head = etree.Element(XHTML('head'))
+ data.insert(0, head)
+ title = etree.SubElement(head, XHTML('title'))
+ title.text = self.oeb.translate(__('Unknown'))
+ elif not xpath(data, '/h:html/h:head/h:title'):
+ self.oeb.logger.warn(
+ 'File %r missing element' % self.href)
+ title = etree.SubElement(head, XHTML('title'))
+ title.text = self.oeb.translate(__('Unknown'))
+ if not xpath(data, '/h:html/h:body'):
+ self.oeb.logger.warn(
+ 'File %r missing element' % self.href)
+ etree.SubElement(data, XHTML('body'))
return data
def data():
diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
index b150a12831..bc95b43343 100644
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@@ -13,6 +13,7 @@ from urlparse import urldefrag
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
+from calibre.ebooks.oeb.base import urlnormalize
LINK_SELECTORS = []
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
@@ -46,7 +47,7 @@ class ManifestTrimmer(object):
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
- href = item.abshref(href)
+ href = item.abshref(urlnormalize(href))
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:
From cde9d8864c008b3f12622217d50613771886b1b9 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift"
Date: Mon, 26 Jan 2009 12:10:16 -0500
Subject: [PATCH 4/4] Fix #1700. Handle LIT files with URL-encoded filenames
in their manifests.
---
src/calibre/ebooks/lit/reader.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 461c067382..02fc98d9df 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -11,6 +11,7 @@ import sys, struct, cStringIO, os
import functools
import re
from urlparse import urldefrag
+from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
@@ -611,6 +612,8 @@ class LitReader(object):
offset, raw = u32(raw), raw[4:]
internal, raw = consume_sized_utf8_string(raw)
original, raw = consume_sized_utf8_string(raw)
+ # The path should be stored unquoted, but not always
+ original = urlunquote(original)
# Is this last one UTF-8 or ASCIIZ?
mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
self.manifest[internal] = ManifestItem(