HTML Input: Handle @import directives in linked css files. Fixes #5135 (style import error during HTML conversion with ebook-convert.exe)

This commit is contained in:
Kovid Goyal 2010-06-23 21:39:33 -06:00
parent 7ea679768e
commit ffb9002c32
3 changed files with 71 additions and 13 deletions

View File

@ -107,9 +107,21 @@ class CSSPreProcessor(object):
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}') PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
def __call__(self, data): def __call__(self, data, add_namespace=False):
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
data = self.PAGE_PAT.sub('', data) data = self.PAGE_PAT.sub('', data)
return data if not add_namespace:
return data
ans, namespaced = [], False
for line in data.splitlines():
ll = line.lstrip()
if not (namespaced or ll.startswith('@import') or
ll.startswith('@charset')):
ans.append(XHTML_CSS_NAMESPACE.strip())
namespaced = True
ans.append(line)
return u'\n'.join(ans)
class HTMLPreProcessor(object): class HTMLPreProcessor(object):

View File

@ -312,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
xpath xpath
from calibre import guess_type from calibre import guess_type
import cssutils import cssutils
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self, oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False) encoding=opts.input_encoding, populate=False)
self.oeb = oeb self.oeb = oeb
@ -376,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.media_type in OEB_STYLES: if item.media_type in self.OEB_STYLES:
dpath = None dpath = None
for path, href in self.added_resources.items(): for path, href in self.added_resources.items():
if href == item.href: if href == item.href:
@ -414,25 +415,30 @@ class HTMLInput(InputFormatPlugin):
oeb.container = DirContainer(os.getcwdu(), oeb.log) oeb.container = DirContainer(os.getcwdu(), oeb.log)
return oeb return oeb
def link_to_local_path(self, link_, base=None):
def resource_adder(self, link_, base=None):
if not isinstance(link_, unicode): if not isinstance(link_, unicode):
try: try:
link_ = link_.decode('utf-8', 'error') link_ = link_.decode('utf-8', 'error')
except: except:
self.log.warn('Failed to decode link %r. Ignoring'%link_) self.log.warn('Failed to decode link %r. Ignoring'%link_)
return link_ return None, None
try: try:
l = Link(link_, base if base else os.path.getcwdu()) l = Link(link_, base if base else os.getcwdu())
except: except:
self.log.exception('Failed to process link: %r'%link_) self.log.exception('Failed to process link: %r'%link_)
return link_ return None, None
if l.path is None: if l.path is None:
# Not a local resource # Not a local resource
return link_ return None, None
link = l.path.replace('/', os.sep).strip() link = l.path.replace('/', os.sep).strip()
frag = l.fragment frag = l.fragment
if not link: if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None):
link, frag = self.link_to_local_path(link_, base=base)
if link is None:
return link_ return link_
try: try:
if base and not os.path.isabs(link): if base and not os.path.isabs(link):
@ -460,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
item = self.oeb.manifest.add(id, href, media_type) item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref item.html_input_href = bhref
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data item.data
self.added_resources[link] = href self.added_resources[link] = href
@ -468,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
nlink = '#'.join((nlink, frag)) nlink = '#'.join((nlink, frag))
return nlink return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -17,6 +17,7 @@ from urlparse import urljoin
from lxml import etree, html from lxml import etree, html
from cssutils import CSSParser from cssutils import CSSParser
from cssutils.css import CSSRule
import calibre import calibre
from calibre.constants import filesystem_encoding from calibre.constants import filesystem_encoding
@ -762,6 +763,7 @@ class Manifest(object):
self.href = self.path = urlnormalize(href) self.href = self.path = urlnormalize(href)
self.media_type = media_type self.media_type = media_type
self.fallback = fallback self.fallback = fallback
self.override_css_fetch = None
self.spine_position = None self.spine_position = None
self.linear = True self.linear = True
if loader is None and data is None: if loader is None and data is None:
@ -982,15 +984,40 @@ class Manifest(object):
def _parse_css(self, data): def _parse_css(self, data):
def get_style_rules_from_import(import_rule):
ans = []
if not import_rule.styleSheet:
return ans
rules = import_rule.styleSheet.cssRules
for rule in rules:
if rule.type == CSSRule.IMPORT_RULE:
ans.extend(get_style_rules_from_import(rule))
elif rule.type in (CSSRule.FONT_FACE_RULE,
CSSRule.STYLE_RULE):
ans.append(rule)
return ans
self.oeb.log.debug('Parsing', self.href, '...') self.oeb.log.debug('Parsing', self.href, '...')
data = self.oeb.decode(data) data = self.oeb.decode(data)
data = self.oeb.css_preprocessor(data) data = self.oeb.css_preprocessor(data, add_namespace=True)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(loglevel=logging.WARNING, parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css, fetcher=self.override_css_fetch or self._fetch_css,
log=_css_logger) log=_css_logger)
data = parser.parseString(data, href=self.href) data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS data.namespaces['h'] = XHTML_NS
import_rules = list(data.cssRules.rulesOfType(CSSRule.IMPORT_RULE))
rules_to_append = []
insert_index = None
for r in data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
insert_index = data.cssRules.index(r)
break
for rule in import_rules:
rules_to_append.extend(get_style_rules_from_import(rule))
for r in reversed(rules_to_append):
data.insertRule(r, index=insert_index)
for rule in import_rules:
data.deleteRule(rule)
return data return data
def _fetch_css(self, path): def _fetch_css(self, path):