diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index bebba8938b..73216057b5 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -27,7 +27,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \ CSS_MIME, OPF_MIME, XML_NS, XML from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ urlnormalize, xpath -from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener @@ -732,7 +732,7 @@ def option_parser(): return parser def oeb2lit(opts, inpath): - logger = Logger(logging.getLogger('oeb2lit')) + logger = logging.getLogger('oeb2lit') logger.setup_cli_handler(opts.verbose) outpath = opts.output if outpath is None: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 59ce1f7b95..1e91fbe17d 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -13,8 +13,11 @@ from collections import defaultdict from itertools import count from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote +import logging from lxml import etree, html import calibre +from cssutils import CSSParser +from cssutils.css import CSSStyleSheet from calibre.translations.dynamic import translate from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.entitydefs import ENTITYDEFS @@ -99,6 +102,8 @@ PNG_MIME = types_map['.png'] SVG_MIME = types_map['.svg'] BINARY_MIME = 'application/octet-stream' +XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS + OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css']) OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document']) @@ -565,7 +570,7 @@ class Manifest(object): return 'Item(id=%r, href=%r, media_type=%r)' \ % (self.id, self.href, self.media_type) - def _force_xhtml(self, data): + def _parse_xhtml(self, data): # Convert to Unicode and normalize line endings data = self.oeb.decode(data) data = XMLDECL_RE.sub('', data) @@ -645,6 +650,27 @@ class Manifest(object): 'File %r missing element' % self.href) etree.SubElement(data, XHTML('body')) return data + + def _parse_css(self, data): + data = self.oeb.decode(data) + data = XHTML_CSS_NAMESPACE + data + parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING, + fetcher=self._fetch_css) + data = parser.parseString(data, href=self.href) + data.namespaces['h'] = XHTML_NS + return data + + def _fetch_css(self, path): + hrefs = self.oeb.manifest.hrefs + if path not in hrefs: + self.oeb.logger.warn('CSS import of missing file %r' % path) + return (None, None) + item = hrefs[path] + if item.media_type not in OEB_STYLES: + self.oeb.logger.warn('CSS import of non-CSS file %r' % path) + return (None, None) + data = item.data.cssText + return ('utf-8', data) @dynamic_property def data(self): @@ -661,15 +687,19 @@ class Manifest(object): special parsing. """ def fget(self): - if self._data is not None: - return self._data - data = self._loader(self.href) - if self.media_type in OEB_DOCS: - data = self._force_xhtml(data) + data = self._data + if data is None: + if self._loader is None: + return None + data = self._loader(self.href) + if not isinstance(data, basestring): + pass # already parsed + elif self.media_type in OEB_DOCS: + data = self._parse_xhtml(data) elif self.media_type[-4:] in ('+xml', '/xml'): data = etree.fromstring(data) elif self.media_type in OEB_STYLES: - data = self.oeb.decode(data) + data = self._parse_css(data) self._data = data return data def fset(self, value): @@ -677,7 +707,7 @@ class Manifest(object): def fdel(self): self._data = None return property(fget, fset, fdel, doc=doc) - + def __str__(self): data = self.data if isinstance(data, etree._Element): @@ -726,7 +756,7 @@ class Manifest(object): if frag: relhref = '#'.join((relhref, frag)) return relhref - + def abshref(self, href): """Convert the URL provided in :param:`href` from a reference relative to this manifest item to a book-absolute reference. @@ -748,7 +778,7 @@ class Manifest(object): self.items = set() self.ids = {} self.hrefs = {} - + def add(self, id, href, media_type, fallback=None, loader=None, data=None): """Add a new item to the book manifest. @@ -765,7 +795,7 @@ class Manifest(object): self.ids[item.id] = item self.hrefs[item.href] = item return item - + def remove(self, item): """Removes :param:`item` from the manifest.""" if item in self.ids: @@ -775,7 +805,7 @@ class Manifest(object): self.items.remove(item) if item in self.oeb.spine: self.oeb.spine.remove(item) - + def generate(self, id=None, href=None): """Generate a new unique identifier and/or internal path for use in creating a new manifest item, using the provided :param:`id` and/or @@ -803,13 +833,13 @@ class Manifest(object): def __iter__(self): for item in self.items: yield item - + def values(self): return list(self.items) def __contains__(self, item): return item in self.items - + def to_opf1(self, parent=None): elem = element(parent, 'manifest') for item in self.items: diff --git a/src/calibre/ebooks/oeb/factory.py b/src/calibre/ebooks/oeb/factory.py index 684451044b..8add71d20d 100644 --- a/src/calibre/ebooks/oeb/factory.py +++ b/src/calibre/ebooks/oeb/factory.py @@ -8,6 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys, os, logging from itertools import chain +import calibre from calibre.ebooks.oeb.base import OEBError from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.writer import OEBWriter @@ -15,7 +16,7 @@ from calibre.ebooks.lit.reader import LitReader from calibre.ebooks.lit.writer import LitWriter from calibre.ebooks.mobi.reader import MobiReader from calibre.ebooks.mobi.writer import MobiWriter -from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.profile import Context from calibre.utils.config import Config @@ -77,8 +78,8 @@ def main(argv=sys.argv): if len(args) != 0: parser.print_help() return 1 - logger = Logger(logging.getLogger('ebook-convert')) - logger.setup_cli_handler(opts.verbose) + logger = logging.getLogger('ebook-convert') + calibre.setup_cli_handlers(logger, logging.DEBUG) encoding = opts.encoding pretty_print = opts.pretty_print oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index dbafa5afac..c62540e15a 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -181,7 +181,7 @@ class OEBReader(object): if not scheme and href not in known: new.add(href) elif item.media_type in OEB_STYLES: - for match in CSSURL_RE.finditer(item.data): + for match in CSSURL_RE.finditer(item.data.cssText): href, _ = urldefrag(match.group('url')) href = item.abshref(urlnormalize(href)) scheme = urlparse(href).scheme diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 3b5c3e19d0..8bc82883e3 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -115,8 +115,7 @@ class Stylizer(object): cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [HTML_CSS_STYLESHEET] head = xpath(tree, '/h:html/h:head')[0] - parser = cssutils.CSSParser() - parser.setFetcher(self._fetch_css_file) + parser = cssutils.CSSParser(fetcher=self._fetch_css_file) for elem in head: if elem.tag == XHTML('style') and elem.text \ and elem.get('type', CSS_MIME) in OEB_STYLES: @@ -135,14 +134,7 @@ class Stylizer(object): 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue - if sitem in self.STYLESHEETS: - stylesheet = self.STYLESHEETS[sitem] - else: - data = self._fetch_css_file(path)[1] - stylesheet = parser.parseString(data, href=path) - stylesheet.namespaces['h'] = XHTML_NS - self.STYLESHEETS[sitem] = stylesheet - stylesheets.append(stylesheet) + stylesheets.append(sitem.data) rules = [] index = 0 self.stylesheets = set() @@ -159,9 +151,9 @@ class Stylizer(object): for _, _, cssdict, text, _ in rules: try: selector = CSSSelector(text) - except (AssertionError, ExpressionError, etree.XPathSyntaxError,\ - NameError, # gets thrown on OS X instead of SelectorSyntaxError - SelectorSyntaxError): + except (AssertionError, ExpressionError, etree.XPathSyntaxError, + NameError, # thrown on OS X instead of SelectorSyntaxError + SelectorSyntaxError): continue for elem in selector(tree): self.style(elem)._update_cssdict(cssdict) @@ -171,9 +163,13 @@ class Stylizer(object): def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: + self.logger.warn('CSS import of missing file %r' % path) return (None, None) - data = hrefs[path].data - data = XHTML_CSS_NAMESPACE + data + item = hrefs[path] + if item.media_type not in OEB_STYLES: + self.logger.warn('CSS import of non-CSS file %r' % path) + return (None, None) + data = item.data.cssText return ('utf-8', data) def flatten_rule(self, rule, href, index): diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index c731800999..119ebcc73d 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -53,7 +53,7 @@ class ManifestTrimmer(object): if found not in used: new.add(found) elif item.media_type == CSS_MIME: - for match in CSSURL_RE.finditer(item.data): + for match in CSSURL_RE.finditer(item.data.cssText): href = match.group('url') href = item.abshref(urlnormalize(href)) if href in oeb.manifest.hrefs: diff --git a/src/calibre/ebooks/oeb/writer.py b/src/calibre/ebooks/oeb/writer.py index 235965b50f..8789d03470 100644 --- a/src/calibre/ebooks/oeb/writer.py +++ b/src/calibre/ebooks/oeb/writer.py @@ -8,7 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys, os, logging from calibre.ebooks.oeb.base import OPF_MIME, xml2str -from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook +from calibre.ebooks.oeb.base import DirContainer, OEBBook __all__ = ['OEBWriter']