IGN:...

2025-08-30 23:00:21 -04:00 · 2009-03-19 19:12:07 -07:00 · 2009-03-19 19:12:07 -07:00 · d7257ad5f2
commit d7257ad5f2
parent fe918ab068 29486d653e
8 changed files with 76 additions and 61 deletions
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -129,8 +129,6 @@ class UnBinary(object):
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
        self.is_html = map is HTML_MAP
        self.tag_atoms, self.attr_atoms = atoms
        self.opf = map is OPF_MAP
        self.bin = bin
        self.dir = os.path.dirname(path)
        buf = StringIO()
        self.binary_to_text(bin, buf)
@ -210,7 +208,8 @@ class UnBinary(object):
                        continue
                    if flags & FLAG_ATOM:
                        if not self.tag_atoms or tag not in self.tag_atoms:
-                            raise LitError("atom tag %d not in atom tag list" % tag)
+                            raise LitError(
                                "atom tag %d not in atom tag list" % tag)
                        tag_name = self.tag_atoms[tag]
                        current_map = self.attr_atoms
                    elif tag < len(self.tag_map):
@ -295,7 +294,7 @@ class UnBinary(object):
                            c = '&quot;'
                        elif c == '<':
                            c = '&lt;'
-                        self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                        buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
@ -841,24 +840,7 @@ class LitFile(object):
        if len(attrs) != nentries:
            self._warn("damaged or invalid atoms attributes table")
        return (tags, attrs)
-    
+
    def get_entry_content(self, entry, pretty_print=False):
        if 'spine' in entry.state:
            name = '/'.join(('/data', entry.internal, 'content'))
            path = entry.path
            raw = self.get_file(name)
            decl, map = (OPF_DECL, OPF_MAP) \
                if name == '/meta' else (HTML_DECL, HTML_MAP)
            atoms = self.get_atoms(entry)
            content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
            if pretty_print:
                content = self._pretty_print(content)
            content = content.encode('utf-8')
        else:
            internal = '/'.join(('/data', entry.internal))
            content = self._litfile.get_file(internal)
        return content
 class LitContainer(object):
    """Simple Container-interface, read-only accessor for LIT files."""
@ -879,9 +861,15 @@ class LitContainer(object):
        elif 'spine' in entry.state:
            internal = '/'.join(('/data', entry.internal, 'content'))
            raw = self._litfile.get_file(internal)
-            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
+            manifest = self._litfile.manifest
            atoms = self._litfile.get_atoms(entry)
            unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
            content = HTML_DECL + str(unbin)
-   
+        else:
            internal = '/'.join(('/data', entry.internal))
            content = self._litfile.get_file(internal)
        return content
    def _read_meta(self):
        path = 'content.opf'
        raw = self._litfile.get_file('/meta')
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -27,7 +27,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
    CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
    urlnormalize, xpath
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
@ -732,7 +732,7 @@ def option_parser():
    return parser
 def oeb2lit(opts, inpath):
-    logger = Logger(logging.getLogger('oeb2lit'))
+    logger = logging.getLogger('oeb2lit')
    logger.setup_cli_handler(opts.verbose)
    outpath = opts.output
    if outpath is None:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -13,8 +13,11 @@ from collections import defaultdict
 from itertools import count
 from urlparse import urldefrag, urlparse, urlunparse
 from urllib import unquote as urlunquote
 import logging
 from lxml import etree, html
 import calibre
 from cssutils import CSSParser
 from cssutils.css import CSSStyleSheet
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -99,6 +102,8 @@ PNG_MIME       = types_map['.png']
 SVG_MIME       = types_map['.svg']
 BINARY_MIME    = 'application/octet-stream'
 XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
 OEB_STYLES        = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
 OEB_DOCS          = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
                         'text/x-oeb-document'])
@ -565,7 +570,7 @@ class Manifest(object):
            return 'Item(id=%r, href=%r, media_type=%r)' \
                % (self.id, self.href, self.media_type)
-        def _force_xhtml(self, data):
+        def _parse_xhtml(self, data):
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = XMLDECL_RE.sub('', data)
@ -645,6 +650,27 @@ class Manifest(object):
                    'File %r missing <body/> element' % self.href)
                etree.SubElement(data, XHTML('body'))
            return data
        def _parse_css(self, data):
            data = self.oeb.decode(data)
            data = XHTML_CSS_NAMESPACE + data
            parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
                               fetcher=self._fetch_css)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
            return data
        def _fetch_css(self, path):
            hrefs = self.oeb.manifest.hrefs
            if path not in hrefs:
                self.oeb.logger.warn('CSS import of missing file %r' % path)
                return (None, None)
            item = hrefs[path]
            if item.media_type not in OEB_STYLES:
                self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
                return (None, None)
            data = item.data.cssText
            return ('utf-8', data)
        @dynamic_property
        def data(self):
@ -661,15 +687,19 @@ class Manifest(object):
              special parsing.
            """
            def fget(self):
-                if self._data is not None:
+                data = self._data
-                    return self._data
+                if data is None:
-                data = self._loader(self.href)
+                    if self._loader is None:
-                if self.media_type in OEB_DOCS:
+                        return None
-                    data = self._force_xhtml(data)
+                    data = self._loader(self.href)
                if not isinstance(data, basestring):
                    pass # already parsed
                elif self.media_type in OEB_DOCS:
                    data = self._parse_xhtml(data)
                elif self.media_type[-4:] in ('+xml', '/xml'):
                    data = etree.fromstring(data)
                elif self.media_type in OEB_STYLES:
-                    data = self.oeb.decode(data)
+                    data = self._parse_css(data)
                self._data = data
                return data
            def fset(self, value):
@ -677,7 +707,7 @@ class Manifest(object):
            def fdel(self):
                self._data = None
            return property(fget, fset, fdel, doc=doc)
-                
+        
        def __str__(self):
            data = self.data
            if isinstance(data, etree._Element):
@ -726,7 +756,7 @@ class Manifest(object):
            if frag:
                relhref = '#'.join((relhref, frag))
            return relhref
-
+        
        def abshref(self, href):
            """Convert the URL provided in :param:`href` from a reference
            relative to this manifest item to a book-absolute reference.
@ -748,7 +778,7 @@ class Manifest(object):
        self.items = set()
        self.ids = {}
        self.hrefs = {}
-
+    
    def add(self, id, href, media_type, fallback=None, loader=None, data=None):
        """Add a new item to the book manifest.
@ -765,7 +795,7 @@ class Manifest(object):
        self.ids[item.id] = item
        self.hrefs[item.href] = item
        return item
-
+    
    def remove(self, item):
        """Removes :param:`item` from the manifest."""
        if item in self.ids:
@ -775,7 +805,7 @@ class Manifest(object):
        self.items.remove(item)
        if item in self.oeb.spine:
            self.oeb.spine.remove(item)
-
+    
    def generate(self, id=None, href=None):
        """Generate a new unique identifier and/or internal path for use in
        creating a new manifest item, using the provided :param:`id` and/or
@ -803,13 +833,13 @@ class Manifest(object):
    def __iter__(self):
        for item in self.items:
            yield item
-
+    
    def values(self):
        return list(self.items)
    def __contains__(self, item):
        return item in self.items
-
+    
    def to_opf1(self, parent=None):
        elem = element(parent, 'manifest')
        for item in self.items:
--- a/src/calibre/ebooks/oeb/factory.py
+++ b/src/calibre/ebooks/oeb/factory.py
@ -8,6 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import sys, os, logging
 from itertools import chain
 import calibre
 from calibre.ebooks.oeb.base import OEBError
 from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks.oeb.writer import OEBWriter
@ -15,7 +16,7 @@ from calibre.ebooks.lit.reader import LitReader
 from calibre.ebooks.lit.writer import LitWriter
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.mobi.writer import MobiWriter
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.utils.config import Config
@ -77,8 +78,8 @@ def main(argv=sys.argv):
    if len(args) != 0:
        parser.print_help()
        return 1
-    logger = Logger(logging.getLogger('ebook-convert'))
+    logger = logging.getLogger('ebook-convert')
-    logger.setup_cli_handler(opts.verbose)
+    calibre.setup_cli_handlers(logger, logging.DEBUG)
    encoding = opts.encoding
    pretty_print = opts.pretty_print
    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -181,7 +181,7 @@ class OEBReader(object):
                        if not scheme and href not in known:
                            new.add(href)
                elif item.media_type in OEB_STYLES:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                        href, _ = urldefrag(match.group('url'))
                        href = item.abshref(urlnormalize(href))
                        scheme = urlparse(href).scheme
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -115,8 +115,7 @@ class Stylizer(object):
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [HTML_CSS_STYLESHEET]
        head = xpath(tree, '/h:html/h:head')[0]
-        parser = cssutils.CSSParser()
+        parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
        parser.setFetcher(self._fetch_css_file)
        for elem in head:
            if elem.tag == XHTML('style') and elem.text \
               and elem.get('type', CSS_MIME) in OEB_STYLES:
@ -135,14 +134,7 @@ class Stylizer(object):
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
-                if sitem in self.STYLESHEETS:
+                stylesheets.append(sitem.data)
                    stylesheet = self.STYLESHEETS[sitem]
                else:
                    data = self._fetch_css_file(path)[1]
                    stylesheet = parser.parseString(data, href=path)
                    stylesheet.namespaces['h'] = XHTML_NS
                    self.STYLESHEETS[sitem] = stylesheet
                stylesheets.append(stylesheet)
        rules = []
        index = 0
        self.stylesheets = set()
@ -159,9 +151,9 @@ class Stylizer(object):
        for _, _, cssdict, text, _ in rules:
            try:
                selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,\
+            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
-                NameError, # gets thrown on OS X instead of SelectorSyntaxError
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
-                SelectorSyntaxError):
+                    SelectorSyntaxError):
                continue
            for elem in selector(tree):
                self.style(elem)._update_cssdict(cssdict)
@ -171,9 +163,13 @@ class Stylizer(object):
    def _fetch_css_file(self, path):
        hrefs = self.oeb.manifest.hrefs
        if path not in hrefs:
            self.logger.warn('CSS import of missing file %r' % path)
            return (None, None)
-        data = hrefs[path].data
+        item = hrefs[path]
-        data = XHTML_CSS_NAMESPACE + data
+        if item.media_type not in OEB_STYLES:
            self.logger.warn('CSS import of non-CSS file %r' % path)
            return (None, None)
        data = item.data.cssText
        return ('utf-8', data)
    def flatten_rule(self, rule, href, index):
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -53,7 +53,7 @@ class ManifestTrimmer(object):
                            if found not in used:
                                new.add(found)
                elif item.media_type == CSS_MIME:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                        href = match.group('url')
                        href = item.abshref(urlnormalize(href))
                        if href in oeb.manifest.hrefs:
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -8,7 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import sys, os, logging
 from calibre.ebooks.oeb.base import OPF_MIME, xml2str
-from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
+from calibre.ebooks.oeb.base import DirContainer, OEBBook
 __all__ = ['OEBWriter']