Conversion pipeline: Fix handling of relative resource URLs in the CSS of the input document

This commit is contained in:
Kovid Goyal 2012-04-26 14:17:53 +05:30
parent 68e26b945c
commit 5c91febe06
2 changed files with 33 additions and 11 deletions

View File

@ -11,8 +11,8 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os, itertools, re, logging, copy, unicodedata
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
from cssutils.css import (CSSStyleRule, CSSPageRule, CSSStyleDeclaration,
CSSFontFaceRule, cssproperties)
from cssutils.css import (CSSStyleRule, CSSPageRule, CSSFontFaceRule,
cssproperties)
try:
from cssutils.css import CSSValueList
CSSValueList
@ -20,7 +20,7 @@ except ImportError:
# cssutils >= 0.9.8
from cssutils.css import PropertyValue as CSSValueList
from cssutils import (profile as cssprofiles, parseString, parseStyle, log as
cssutils_log, CSSParser, profiles)
cssutils_log, CSSParser, profiles, replaceUrls)
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
from calibre import force_unicode
@ -221,6 +221,10 @@ class Stylizer(object):
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet)
# Make links to resources absolute, since these rules will
# be folded into a stylesheet at the root
replaceUrls(stylesheet, item.abshref,
ignoreImportRules=True)
elif elem.tag == XHTML('link') and elem.get('href') \
and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
@ -295,7 +299,7 @@ class Stylizer(object):
for elem in matches:
self.style(elem)._update_cssdict(cssdict)
for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_attr()
self.style(elem)._apply_style_attr(url_replacer=item.abshref)
num_pat = re.compile(r'\d+$')
for elem in xpath(tree, '//h:img[@width or @height]'):
style = self.style(elem)
@ -493,7 +497,7 @@ class Style(object):
def _update_cssdict(self, cssdict):
self._style.update(cssdict)
def _apply_style_attr(self):
def _apply_style_attr(self, url_replacer=None):
attrib = self._element.attrib
if 'style' not in attrib:
return
@ -501,10 +505,17 @@ class Style(object):
css = filter(None, (x.strip() for x in css))
css = [x.strip() for x in css]
css = [x for x in css if self.MS_PAT.match(x) is None]
css = '; '.join(css)
try:
style = CSSStyleDeclaration('; '.join(css))
style = parseStyle(css)
except CSSSyntaxError:
return
if url_replacer is not None:
# Fool replaceUrls into processing our style declaration
class Fool:
def __init__(self, s):
self.style = s
replaceUrls(Fool(style), url_replacer, ignoreImportRules=True)
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self):

View File

@ -6,11 +6,12 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import re
import operator
import math
import re, operator, math
from collections import defaultdict
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath)
from calibre.ebooks.oeb.stylizer import Stylizer
@ -133,6 +134,13 @@ class CSSFlattener(object):
self.oeb.log.debug('Filtering CSS properties: %s'%
', '.join(self.filter_css))
for item in oeb.manifest.values():
# Make all links to resources absolute, as these sheets will be
# consolidated into a single stylesheet at the root of the document
if item.media_type in OEB_STYLES:
cssutils.replaceUrls(item.data, item.abshref,
ignoreImportRules=True)
self.stylize_spine()
self.sbase = self.baseline_spine() if self.fbase else None
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
@ -394,8 +402,11 @@ class CSSFlattener(object):
style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
style.text = "\n\t\t@page { %s; }" % css
rules = [r.cssText for r in stylizer.font_face_rules]
for r in rules:
style.text += '\n\t\t'+r+'\n'
raw = '\n\n'.join(rules)
# Make URLs referring to fonts relative to this item
sheet = cssutils.parseString(raw)
cssutils.replaceUrls(sheet, item.relhref, ignoreImportRules=True)
style.text += '\n' + sheet.cssText
def replace_css(self, css):
manifest = self.oeb.manifest