mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Convert OEBBook to store cssutils-parsed CSS.
This commit is contained in:
parent
57aebdff7d
commit
29486d653e
@ -27,7 +27,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
|
||||
CSS_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
|
||||
urlnormalize, xpath
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
@ -732,7 +732,7 @@ def option_parser():
|
||||
return parser
|
||||
|
||||
def oeb2lit(opts, inpath):
|
||||
logger = Logger(logging.getLogger('oeb2lit'))
|
||||
logger = logging.getLogger('oeb2lit')
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
outpath = opts.output
|
||||
if outpath is None:
|
||||
|
@ -13,8 +13,11 @@ from collections import defaultdict
|
||||
from itertools import count
|
||||
from urlparse import urldefrag, urlparse, urlunparse
|
||||
from urllib import unquote as urlunquote
|
||||
import logging
|
||||
from lxml import etree, html
|
||||
import calibre
|
||||
from cssutils import CSSParser
|
||||
from cssutils.css import CSSStyleSheet
|
||||
from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
@ -99,6 +102,8 @@ PNG_MIME = types_map['.png']
|
||||
SVG_MIME = types_map['.svg']
|
||||
BINARY_MIME = 'application/octet-stream'
|
||||
|
||||
XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
|
||||
|
||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
||||
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
|
||||
'text/x-oeb-document'])
|
||||
@ -565,7 +570,7 @@ class Manifest(object):
|
||||
return 'Item(id=%r, href=%r, media_type=%r)' \
|
||||
% (self.id, self.href, self.media_type)
|
||||
|
||||
def _force_xhtml(self, data):
|
||||
def _parse_xhtml(self, data):
|
||||
# Convert to Unicode and normalize line endings
|
||||
data = self.oeb.decode(data)
|
||||
data = XMLDECL_RE.sub('', data)
|
||||
@ -645,6 +650,27 @@ class Manifest(object):
|
||||
'File %r missing <body/> element' % self.href)
|
||||
etree.SubElement(data, XHTML('body'))
|
||||
return data
|
||||
|
||||
def _parse_css(self, data):
|
||||
data = self.oeb.decode(data)
|
||||
data = XHTML_CSS_NAMESPACE + data
|
||||
parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
|
||||
fetcher=self._fetch_css)
|
||||
data = parser.parseString(data, href=self.href)
|
||||
data.namespaces['h'] = XHTML_NS
|
||||
return data
|
||||
|
||||
def _fetch_css(self, path):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
if path not in hrefs:
|
||||
self.oeb.logger.warn('CSS import of missing file %r' % path)
|
||||
return (None, None)
|
||||
item = hrefs[path]
|
||||
if item.media_type not in OEB_STYLES:
|
||||
self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
|
||||
return (None, None)
|
||||
data = item.data.cssText
|
||||
return ('utf-8', data)
|
||||
|
||||
@dynamic_property
|
||||
def data(self):
|
||||
@ -661,15 +687,19 @@ class Manifest(object):
|
||||
special parsing.
|
||||
"""
|
||||
def fget(self):
|
||||
if self._data is not None:
|
||||
return self._data
|
||||
data = self._loader(self.href)
|
||||
if self.media_type in OEB_DOCS:
|
||||
data = self._force_xhtml(data)
|
||||
data = self._data
|
||||
if data is None:
|
||||
if self._loader is None:
|
||||
return None
|
||||
data = self._loader(self.href)
|
||||
if not isinstance(data, basestring):
|
||||
pass # already parsed
|
||||
elif self.media_type in OEB_DOCS:
|
||||
data = self._parse_xhtml(data)
|
||||
elif self.media_type[-4:] in ('+xml', '/xml'):
|
||||
data = etree.fromstring(data)
|
||||
elif self.media_type in OEB_STYLES:
|
||||
data = self.oeb.decode(data)
|
||||
data = self._parse_css(data)
|
||||
self._data = data
|
||||
return data
|
||||
def fset(self, value):
|
||||
@ -677,7 +707,7 @@ class Manifest(object):
|
||||
def fdel(self):
|
||||
self._data = None
|
||||
return property(fget, fset, fdel, doc=doc)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
data = self.data
|
||||
if isinstance(data, etree._Element):
|
||||
@ -726,7 +756,7 @@ class Manifest(object):
|
||||
if frag:
|
||||
relhref = '#'.join((relhref, frag))
|
||||
return relhref
|
||||
|
||||
|
||||
def abshref(self, href):
|
||||
"""Convert the URL provided in :param:`href` from a reference
|
||||
relative to this manifest item to a book-absolute reference.
|
||||
@ -748,7 +778,7 @@ class Manifest(object):
|
||||
self.items = set()
|
||||
self.ids = {}
|
||||
self.hrefs = {}
|
||||
|
||||
|
||||
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
||||
"""Add a new item to the book manifest.
|
||||
|
||||
@ -765,7 +795,7 @@ class Manifest(object):
|
||||
self.ids[item.id] = item
|
||||
self.hrefs[item.href] = item
|
||||
return item
|
||||
|
||||
|
||||
def remove(self, item):
|
||||
"""Removes :param:`item` from the manifest."""
|
||||
if item in self.ids:
|
||||
@ -775,7 +805,7 @@ class Manifest(object):
|
||||
self.items.remove(item)
|
||||
if item in self.oeb.spine:
|
||||
self.oeb.spine.remove(item)
|
||||
|
||||
|
||||
def generate(self, id=None, href=None):
|
||||
"""Generate a new unique identifier and/or internal path for use in
|
||||
creating a new manifest item, using the provided :param:`id` and/or
|
||||
@ -803,13 +833,13 @@ class Manifest(object):
|
||||
def __iter__(self):
|
||||
for item in self.items:
|
||||
yield item
|
||||
|
||||
|
||||
def values(self):
|
||||
return list(self.items)
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.items
|
||||
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'manifest')
|
||||
for item in self.items:
|
||||
|
@ -8,6 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys, os, logging
|
||||
from itertools import chain
|
||||
import calibre
|
||||
from calibre.ebooks.oeb.base import OEBError
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
@ -15,7 +16,7 @@ from calibre.ebooks.lit.reader import LitReader
|
||||
from calibre.ebooks.lit.writer import LitWriter
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
from calibre.ebooks.mobi.writer import MobiWriter
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.utils.config import Config
|
||||
|
||||
@ -77,8 +78,8 @@ def main(argv=sys.argv):
|
||||
if len(args) != 0:
|
||||
parser.print_help()
|
||||
return 1
|
||||
logger = Logger(logging.getLogger('ebook-convert'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
logger = logging.getLogger('ebook-convert')
|
||||
calibre.setup_cli_handlers(logger, logging.DEBUG)
|
||||
encoding = opts.encoding
|
||||
pretty_print = opts.pretty_print
|
||||
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
|
||||
|
@ -181,7 +181,7 @@ class OEBReader(object):
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
elif item.media_type in OEB_STYLES:
|
||||
for match in CSSURL_RE.finditer(item.data):
|
||||
for match in CSSURL_RE.finditer(item.data.cssText):
|
||||
href, _ = urldefrag(match.group('url'))
|
||||
href = item.abshref(urlnormalize(href))
|
||||
scheme = urlparse(href).scheme
|
||||
|
@ -115,8 +115,7 @@ class Stylizer(object):
|
||||
cssname = os.path.splitext(basename)[0] + '.css'
|
||||
stylesheets = [HTML_CSS_STYLESHEET]
|
||||
head = xpath(tree, '/h:html/h:head')[0]
|
||||
parser = cssutils.CSSParser()
|
||||
parser.setFetcher(self._fetch_css_file)
|
||||
parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
|
||||
for elem in head:
|
||||
if elem.tag == XHTML('style') and elem.text \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
@ -135,14 +134,7 @@ class Stylizer(object):
|
||||
'Stylesheet %r referenced by file %r not in manifest' %
|
||||
(path, item.href))
|
||||
continue
|
||||
if sitem in self.STYLESHEETS:
|
||||
stylesheet = self.STYLESHEETS[sitem]
|
||||
else:
|
||||
data = self._fetch_css_file(path)[1]
|
||||
stylesheet = parser.parseString(data, href=path)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
self.STYLESHEETS[sitem] = stylesheet
|
||||
stylesheets.append(stylesheet)
|
||||
stylesheets.append(sitem.data)
|
||||
rules = []
|
||||
index = 0
|
||||
self.stylesheets = set()
|
||||
@ -159,9 +151,9 @@ class Stylizer(object):
|
||||
for _, _, cssdict, text, _ in rules:
|
||||
try:
|
||||
selector = CSSSelector(text)
|
||||
except (AssertionError, ExpressionError, etree.XPathSyntaxError,\
|
||||
NameError, # gets thrown on OS X instead of SelectorSyntaxError
|
||||
SelectorSyntaxError):
|
||||
except (AssertionError, ExpressionError, etree.XPathSyntaxError,
|
||||
NameError, # thrown on OS X instead of SelectorSyntaxError
|
||||
SelectorSyntaxError):
|
||||
continue
|
||||
for elem in selector(tree):
|
||||
self.style(elem)._update_cssdict(cssdict)
|
||||
@ -171,9 +163,13 @@ class Stylizer(object):
|
||||
def _fetch_css_file(self, path):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
if path not in hrefs:
|
||||
self.logger.warn('CSS import of missing file %r' % path)
|
||||
return (None, None)
|
||||
data = hrefs[path].data
|
||||
data = XHTML_CSS_NAMESPACE + data
|
||||
item = hrefs[path]
|
||||
if item.media_type not in OEB_STYLES:
|
||||
self.logger.warn('CSS import of non-CSS file %r' % path)
|
||||
return (None, None)
|
||||
data = item.data.cssText
|
||||
return ('utf-8', data)
|
||||
|
||||
def flatten_rule(self, rule, href, index):
|
||||
|
@ -53,7 +53,7 @@ class ManifestTrimmer(object):
|
||||
if found not in used:
|
||||
new.add(found)
|
||||
elif item.media_type == CSS_MIME:
|
||||
for match in CSSURL_RE.finditer(item.data):
|
||||
for match in CSSURL_RE.finditer(item.data.cssText):
|
||||
href = match.group('url')
|
||||
href = item.abshref(urlnormalize(href))
|
||||
if href in oeb.manifest.hrefs:
|
||||
|
@ -8,7 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys, os, logging
|
||||
from calibre.ebooks.oeb.base import OPF_MIME, xml2str
|
||||
from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
|
||||
from calibre.ebooks.oeb.base import DirContainer, OEBBook
|
||||
|
||||
__all__ = ['OEBWriter']
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user