From 0a1c9f9919b7e0642913166f6d4918a2a4e302aa Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 8 Mar 2009 14:03:23 -0400
Subject: [PATCH 1/2] Clean up merge artifacts.

---
 src/calibre/ebooks/lit/reader.py | 36 +++++++++++---------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 1ac68f3866..f32a65e010 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -129,8 +129,6 @@ class UnBinary(object):
         self.tag_map, self.attr_map, self.tag_to_attr_map = map
         self.is_html = map is HTML_MAP
         self.tag_atoms, self.attr_atoms = atoms
-        self.opf = map is OPF_MAP
-        self.bin = bin
         self.dir = os.path.dirname(path)
         buf = StringIO()
         self.binary_to_text(bin, buf)
@@ -210,7 +208,8 @@ class UnBinary(object):
                         continue
                     if flags & FLAG_ATOM:
                         if not self.tag_atoms or tag not in self.tag_atoms:
-                            raise LitError("atom tag %d not in atom tag list" % tag)
+                            raise LitError(
+                                "atom tag %d not in atom tag list" % tag)
                         tag_name = self.tag_atoms[tag]
                         current_map = self.attr_atoms
                     elif tag < len(self.tag_map):
@@ -295,7 +294,7 @@ class UnBinary(object):
                             c = '&quot;'
                         elif c == '<':
                             c = '&lt;'
-                        self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                        buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                     count -= 1
                 if count == 0:
                     if not in_censorship:
@@ -841,24 +840,7 @@ class LitFile(object):
         if len(attrs) != nentries:
             self._warn("damaged or invalid atoms attributes table")
         return (tags, attrs)
-    
-    def get_entry_content(self, entry, pretty_print=False):
-        if 'spine' in entry.state:
-            name = '/'.join(('/data', entry.internal, 'content'))
-            path = entry.path
-            raw = self.get_file(name)
-            decl, map = (OPF_DECL, OPF_MAP) \
-                if name == '/meta' else (HTML_DECL, HTML_MAP)
-            atoms = self.get_atoms(entry)
-            content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
-            if pretty_print:
-                content = self._pretty_print(content)
-            content = content.encode('utf-8')
-        else:
-            internal = '/'.join(('/data', entry.internal))
-            content = self._litfile.get_file(internal)
-        return content
- 
+
 
 class LitContainer(object):
     """Simple Container-interface, read-only accessor for LIT files."""
@@ -879,9 +861,15 @@ class LitContainer(object):
         elif 'spine' in entry.state:
             internal = '/'.join(('/data', entry.internal, 'content'))
             raw = self._litfile.get_file(internal)
-            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
+            manifest = self._litfile.manifest
+            atoms = self._litfile.get_atoms(entry)
+            unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
             content = HTML_DECL + str(unbin)
-   
+        else:
+            internal = '/'.join(('/data', entry.internal))
+            content = self._litfile.get_file(internal)
+        return content
+    
     def _read_meta(self):
         path = 'content.opf'
         raw = self._litfile.get_file('/meta')

From 29486d653e262f4174bcfb0a1189e6490166fd68 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 18 Mar 2009 19:51:35 -0400
Subject: [PATCH 2/2] Convert OEBBook to store cssutils-parsed CSS.

---
 src/calibre/ebooks/lit/writer.py              |  4 +-
 src/calibre/ebooks/oeb/base.py                | 58 ++++++++++++++-----
 src/calibre/ebooks/oeb/factory.py             |  7 ++-
 src/calibre/ebooks/oeb/reader.py              |  2 +-
 src/calibre/ebooks/oeb/stylizer.py            | 26 ++++-----
 .../ebooks/oeb/transforms/trimmanifest.py     |  2 +-
 src/calibre/ebooks/oeb/writer.py              |  2 +-
 7 files changed, 64 insertions(+), 37 deletions(-)

diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index bebba8938b..73216057b5 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -27,7 +27,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
     CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
     urlnormalize, xpath
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
@@ -732,7 +732,7 @@ def option_parser():
     return parser
 
 def oeb2lit(opts, inpath):
-    logger = Logger(logging.getLogger('oeb2lit'))
+    logger = logging.getLogger('oeb2lit')
     logger.setup_cli_handler(opts.verbose)
     outpath = opts.output
     if outpath is None:
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 59ce1f7b95..1e91fbe17d 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -13,8 +13,11 @@ from collections import defaultdict
 from itertools import count
 from urlparse import urldefrag, urlparse, urlunparse
 from urllib import unquote as urlunquote
+import logging
 from lxml import etree, html
 import calibre
+from cssutils import CSSParser
+from cssutils.css import CSSStyleSheet
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@@ -99,6 +102,8 @@ PNG_MIME       = types_map['.png']
 SVG_MIME       = types_map['.svg']
 BINARY_MIME    = 'application/octet-stream'
 
+XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
+
 OEB_STYLES        = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
 OEB_DOCS          = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
                          'text/x-oeb-document'])
@@ -565,7 +570,7 @@ class Manifest(object):
             return 'Item(id=%r, href=%r, media_type=%r)' \
                 % (self.id, self.href, self.media_type)
 
-        def _force_xhtml(self, data):
+        def _parse_xhtml(self, data):
             # Convert to Unicode and normalize line endings
             data = self.oeb.decode(data)
             data = XMLDECL_RE.sub('', data)
@@ -645,6 +650,27 @@ class Manifest(object):
                     'File %r missing <body/> element' % self.href)
                 etree.SubElement(data, XHTML('body'))
             return data
+
+        def _parse_css(self, data):
+            data = self.oeb.decode(data)
+            data = XHTML_CSS_NAMESPACE + data
+            parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
+                               fetcher=self._fetch_css)
+            data = parser.parseString(data, href=self.href)
+            data.namespaces['h'] = XHTML_NS
+            return data
+        
+        def _fetch_css(self, path):
+            hrefs = self.oeb.manifest.hrefs
+            if path not in hrefs:
+                self.oeb.logger.warn('CSS import of missing file %r' % path)
+                return (None, None)
+            item = hrefs[path]
+            if item.media_type not in OEB_STYLES:
+                self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
+                return (None, None)
+            data = item.data.cssText
+            return ('utf-8', data)
         
         @dynamic_property
         def data(self):
@@ -661,15 +687,19 @@ class Manifest(object):
               special parsing.
             """
             def fget(self):
-                if self._data is not None:
-                    return self._data
-                data = self._loader(self.href)
-                if self.media_type in OEB_DOCS:
-                    data = self._force_xhtml(data)
+                data = self._data
+                if data is None:
+                    if self._loader is None:
+                        return None
+                    data = self._loader(self.href)
+                if not isinstance(data, basestring):
+                    pass # already parsed
+                elif self.media_type in OEB_DOCS:
+                    data = self._parse_xhtml(data)
                 elif self.media_type[-4:] in ('+xml', '/xml'):
                     data = etree.fromstring(data)
                 elif self.media_type in OEB_STYLES:
-                    data = self.oeb.decode(data)
+                    data = self._parse_css(data)
                 self._data = data
                 return data
             def fset(self, value):
@@ -677,7 +707,7 @@ class Manifest(object):
             def fdel(self):
                 self._data = None
             return property(fget, fset, fdel, doc=doc)
-                
+        
         def __str__(self):
             data = self.data
             if isinstance(data, etree._Element):
@@ -726,7 +756,7 @@ class Manifest(object):
             if frag:
                 relhref = '#'.join((relhref, frag))
             return relhref
-
+        
         def abshref(self, href):
             """Convert the URL provided in :param:`href` from a reference
             relative to this manifest item to a book-absolute reference.
@@ -748,7 +778,7 @@ class Manifest(object):
         self.items = set()
         self.ids = {}
         self.hrefs = {}
-
+    
     def add(self, id, href, media_type, fallback=None, loader=None, data=None):
         """Add a new item to the book manifest.
 
@@ -765,7 +795,7 @@ class Manifest(object):
         self.ids[item.id] = item
         self.hrefs[item.href] = item
         return item
-
+    
     def remove(self, item):
         """Removes :param:`item` from the manifest."""
         if item in self.ids:
@@ -775,7 +805,7 @@ class Manifest(object):
         self.items.remove(item)
         if item in self.oeb.spine:
             self.oeb.spine.remove(item)
-
+    
     def generate(self, id=None, href=None):
         """Generate a new unique identifier and/or internal path for use in
         creating a new manifest item, using the provided :param:`id` and/or
@@ -803,13 +833,13 @@ class Manifest(object):
     def __iter__(self):
         for item in self.items:
             yield item
-
+    
     def values(self):
         return list(self.items)
     
     def __contains__(self, item):
         return item in self.items
-
+    
     def to_opf1(self, parent=None):
         elem = element(parent, 'manifest')
         for item in self.items:
diff --git a/src/calibre/ebooks/oeb/factory.py b/src/calibre/ebooks/oeb/factory.py
index 684451044b..8add71d20d 100644
--- a/src/calibre/ebooks/oeb/factory.py
+++ b/src/calibre/ebooks/oeb/factory.py
@@ -8,6 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
 import sys, os, logging
 from itertools import chain
+import calibre
 from calibre.ebooks.oeb.base import OEBError
 from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks.oeb.writer import OEBWriter
@@ -15,7 +16,7 @@ from calibre.ebooks.lit.reader import LitReader
 from calibre.ebooks.lit.writer import LitWriter
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.mobi.writer import MobiWriter
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.utils.config import Config
 
@@ -77,8 +78,8 @@ def main(argv=sys.argv):
     if len(args) != 0:
         parser.print_help()
         return 1
-    logger = Logger(logging.getLogger('ebook-convert'))
-    logger.setup_cli_handler(opts.verbose)
+    logger = logging.getLogger('ebook-convert')
+    calibre.setup_cli_handlers(logger, logging.DEBUG)
     encoding = opts.encoding
     pretty_print = opts.pretty_print
     oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py
index dbafa5afac..c62540e15a 100644
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@@ -181,7 +181,7 @@ class OEBReader(object):
                         if not scheme and href not in known:
                             new.add(href)
                 elif item.media_type in OEB_STYLES:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                         href, _ = urldefrag(match.group('url'))
                         href = item.abshref(urlnormalize(href))
                         scheme = urlparse(href).scheme
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 3b5c3e19d0..8bc82883e3 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -115,8 +115,7 @@ class Stylizer(object):
         cssname = os.path.splitext(basename)[0] + '.css'
         stylesheets = [HTML_CSS_STYLESHEET]
         head = xpath(tree, '/h:html/h:head')[0]
-        parser = cssutils.CSSParser()
-        parser.setFetcher(self._fetch_css_file)
+        parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
         for elem in head:
             if elem.tag == XHTML('style') and elem.text \
                and elem.get('type', CSS_MIME) in OEB_STYLES:
@@ -135,14 +134,7 @@ class Stylizer(object):
                         'Stylesheet %r referenced by file %r not in manifest' %
                         (path, item.href))
                     continue
-                if sitem in self.STYLESHEETS:
-                    stylesheet = self.STYLESHEETS[sitem]
-                else:
-                    data = self._fetch_css_file(path)[1]
-                    stylesheet = parser.parseString(data, href=path)
-                    stylesheet.namespaces['h'] = XHTML_NS
-                    self.STYLESHEETS[sitem] = stylesheet
-                stylesheets.append(stylesheet)
+                stylesheets.append(sitem.data)
         rules = []
         index = 0
         self.stylesheets = set()
@@ -159,9 +151,9 @@ class Stylizer(object):
         for _, _, cssdict, text, _ in rules:
             try:
                 selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,\
-                NameError, # gets thrown on OS X instead of SelectorSyntaxError
-                SelectorSyntaxError):
+            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
                 continue
             for elem in selector(tree):
                 self.style(elem)._update_cssdict(cssdict)
@@ -171,9 +163,13 @@ class Stylizer(object):
     def _fetch_css_file(self, path):
         hrefs = self.oeb.manifest.hrefs
         if path not in hrefs:
+            self.logger.warn('CSS import of missing file %r' % path)
             return (None, None)
-        data = hrefs[path].data
-        data = XHTML_CSS_NAMESPACE + data
+        item = hrefs[path]
+        if item.media_type not in OEB_STYLES:
+            self.logger.warn('CSS import of non-CSS file %r' % path)
+            return (None, None)
+        data = item.data.cssText
         return ('utf-8', data)
     
     def flatten_rule(self, rule, href, index):
diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
index c731800999..119ebcc73d 100644
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@@ -53,7 +53,7 @@ class ManifestTrimmer(object):
                             if found not in used:
                                 new.add(found)
                 elif item.media_type == CSS_MIME:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                         href = match.group('url')
                         href = item.abshref(urlnormalize(href))
                         if href in oeb.manifest.hrefs:
diff --git a/src/calibre/ebooks/oeb/writer.py b/src/calibre/ebooks/oeb/writer.py
index 235965b50f..8789d03470 100644
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@@ -8,7 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
 import sys, os, logging
 from calibre.ebooks.oeb.base import OPF_MIME, xml2str
-from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
+from calibre.ebooks.oeb.base import DirContainer, OEBBook
 
 __all__ = ['OEBWriter']