Conversion pipeline: Fix handling of relative resource URLs in the CSS of the input document

This commit is contained in:
Kovid Goyal 2012-04-26 14:17:53 +05:30
parent 68e26b945c
commit 5c91febe06
2 changed files with 33 additions and 11 deletions

View File

@ -11,8 +11,8 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os, itertools, re, logging, copy, unicodedata import os, itertools, re, logging, copy, unicodedata
from weakref import WeakKeyDictionary from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError from xml.dom import SyntaxErr as CSSSyntaxError
from cssutils.css import (CSSStyleRule, CSSPageRule, CSSStyleDeclaration, from cssutils.css import (CSSStyleRule, CSSPageRule, CSSFontFaceRule,
CSSFontFaceRule, cssproperties) cssproperties)
try: try:
from cssutils.css import CSSValueList from cssutils.css import CSSValueList
CSSValueList CSSValueList
@ -20,7 +20,7 @@ except ImportError:
# cssutils >= 0.9.8 # cssutils >= 0.9.8
from cssutils.css import PropertyValue as CSSValueList from cssutils.css import PropertyValue as CSSValueList
from cssutils import (profile as cssprofiles, parseString, parseStyle, log as from cssutils import (profile as cssprofiles, parseString, parseStyle, log as
cssutils_log, CSSParser, profiles) cssutils_log, CSSParser, profiles, replaceUrls)
from lxml import etree from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
from calibre import force_unicode from calibre import force_unicode
@ -221,6 +221,10 @@ class Stylizer(object):
stylesheet = parser.parseString(text, href=cssname) stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet) stylesheets.append(stylesheet)
# Make links to resources absolute, since these rules will
# be folded into a stylesheet at the root
replaceUrls(stylesheet, item.abshref,
ignoreImportRules=True)
elif elem.tag == XHTML('link') and elem.get('href') \ elif elem.tag == XHTML('link') and elem.get('href') \
and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
and elem.get('type', CSS_MIME).lower() in OEB_STYLES: and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
@ -295,7 +299,7 @@ class Stylizer(object):
for elem in matches: for elem in matches:
self.style(elem)._update_cssdict(cssdict) self.style(elem)._update_cssdict(cssdict)
for elem in xpath(tree, '//h:*[@style]'): for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_attr() self.style(elem)._apply_style_attr(url_replacer=item.abshref)
num_pat = re.compile(r'\d+$') num_pat = re.compile(r'\d+$')
for elem in xpath(tree, '//h:img[@width or @height]'): for elem in xpath(tree, '//h:img[@width or @height]'):
style = self.style(elem) style = self.style(elem)
@ -493,7 +497,7 @@ class Style(object):
def _update_cssdict(self, cssdict): def _update_cssdict(self, cssdict):
self._style.update(cssdict) self._style.update(cssdict)
def _apply_style_attr(self): def _apply_style_attr(self, url_replacer=None):
attrib = self._element.attrib attrib = self._element.attrib
if 'style' not in attrib: if 'style' not in attrib:
return return
@ -501,10 +505,17 @@ class Style(object):
css = filter(None, (x.strip() for x in css)) css = filter(None, (x.strip() for x in css))
css = [x.strip() for x in css] css = [x.strip() for x in css]
css = [x for x in css if self.MS_PAT.match(x) is None] css = [x for x in css if self.MS_PAT.match(x) is None]
css = '; '.join(css)
try: try:
style = CSSStyleDeclaration('; '.join(css)) style = parseStyle(css)
except CSSSyntaxError: except CSSSyntaxError:
return return
if url_replacer is not None:
# Fool replaceUrls into processing our style declaration
class Fool:
def __init__(self, s):
self.style = s
replaceUrls(Fool(style), url_replacer, ignoreImportRules=True)
self._style.update(self._stylizer.flatten_style(style)) self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self): def _has_parent(self):

View File

@ -6,11 +6,12 @@ from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import re import re, operator, math
import operator
import math
from collections import defaultdict from collections import defaultdict
from lxml import etree from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath) namespace, barename, XPath)
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
@ -133,6 +134,13 @@ class CSSFlattener(object):
self.oeb.log.debug('Filtering CSS properties: %s'% self.oeb.log.debug('Filtering CSS properties: %s'%
', '.join(self.filter_css)) ', '.join(self.filter_css))
for item in oeb.manifest.values():
# Make all links to resources absolute, as these sheets will be
# consolidated into a single stylesheet at the root of the document
if item.media_type in OEB_STYLES:
cssutils.replaceUrls(item.data, item.abshref,
ignoreImportRules=True)
self.stylize_spine() self.stylize_spine()
self.sbase = self.baseline_spine() if self.fbase else None self.sbase = self.baseline_spine() if self.fbase else None
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey) self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
@ -394,8 +402,11 @@ class CSSFlattener(object):
style = etree.SubElement(head, XHTML('style'), type=CSS_MIME) style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
style.text = "\n\t\t@page { %s; }" % css style.text = "\n\t\t@page { %s; }" % css
rules = [r.cssText for r in stylizer.font_face_rules] rules = [r.cssText for r in stylizer.font_face_rules]
for r in rules: raw = '\n\n'.join(rules)
style.text += '\n\t\t'+r+'\n' # Make URLs referring to fonts relative to this item
sheet = cssutils.parseString(raw)
cssutils.replaceUrls(sheet, item.relhref, ignoreImportRules=True)
style.text += '\n' + sheet.cssText
def replace_css(self, css): def replace_css(self, css):
manifest = self.oeb.manifest manifest = self.oeb.manifest