mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Error checking for CSS (stylesheets and inside <style> tags
This commit is contained in:
parent
6897a97342
commit
ed50bc7ed5
@ -8,11 +8,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from future_builtins import map
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||
from calibre.ebooks.oeb.polish.container import guess_type
|
||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing
|
||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag
|
||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||
from calibre.ebooks.oeb.polish.check.links import check_links
|
||||
|
||||
@ -23,13 +23,15 @@ def run_checks(container):
|
||||
errors = []
|
||||
|
||||
# Check parsing
|
||||
xml_items, html_items, raster_images = [], [], []
|
||||
xml_items, html_items, raster_images, stylesheets = [], [], [], []
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
items = None
|
||||
if mt in XML_TYPES:
|
||||
items = xml_items
|
||||
elif mt in OEB_DOCS:
|
||||
items = html_items
|
||||
elif mt in OEB_STYLES:
|
||||
items = stylesheets
|
||||
elif is_raster_image(mt):
|
||||
items = raster_images
|
||||
if items is not None:
|
||||
@ -38,6 +40,15 @@ def run_checks(container):
|
||||
errors.extend(run_checkers(check_xml_parsing, html_items))
|
||||
errors.extend(run_checkers(check_raster_images, raster_images))
|
||||
|
||||
# cssutils is not thread safe
|
||||
for name, mt, raw in stylesheets:
|
||||
errors.extend(check_css_parsing(name, raw))
|
||||
for name, mt, raw in html_items:
|
||||
root = container.parsed(name)
|
||||
for style in root.xpath('//*[local-name()="style"]'):
|
||||
if style.get('type', 'text/css') == 'text/css':
|
||||
errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
|
||||
|
||||
errors += check_links(container)
|
||||
|
||||
return errors
|
||||
@ -46,8 +57,13 @@ def fix_errors(container, errors):
|
||||
# Fix parsing
|
||||
changed = False
|
||||
for name in {e.name for e in errors if getattr(e, 'is_parsing_error', False)}:
|
||||
container.parsed(name)
|
||||
root = container.parsed(name)
|
||||
container.dirty(name)
|
||||
if container.mime_map[name] in OEB_DOCS:
|
||||
for style in root.xpath('//*[local-name()="style"]'):
|
||||
if style.get('type', 'text/css') == 'text/css' and style.text and style.text.strip():
|
||||
fix_style_tag(container, style)
|
||||
|
||||
changed = True
|
||||
|
||||
for err in errors:
|
||||
|
@ -9,10 +9,12 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import re
|
||||
|
||||
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
||||
import cssutils
|
||||
|
||||
from calibre import force_unicode
|
||||
from calibre.ebooks.html_entities import html5_entities
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||
|
||||
HTML_ENTITTIES = frozenset(html5_entities)
|
||||
@ -21,6 +23,15 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES
|
||||
|
||||
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
||||
|
||||
def fix_style_tag(container, style):
|
||||
prev = style.getprevious()
|
||||
ws = style.getparent().text if prev is None else prev.tail
|
||||
ws = ws.splitlines()[-1]
|
||||
indent = ws[len(ws.rstrip()):]
|
||||
|
||||
sheet = container.parse_css(style.text)
|
||||
style.text = '\n' + force_unicode(sheet.cssText, 'utf-8') + '\n' + indent
|
||||
|
||||
class XMLParseError(BaseError):
|
||||
|
||||
is_parsing_error = True
|
||||
@ -131,3 +142,73 @@ def check_xml_parsing(name, mt, raw):
|
||||
|
||||
return errors
|
||||
|
||||
class CSSError(BaseError):
|
||||
|
||||
is_parsing_error = True
|
||||
|
||||
def __init__(self, level, msg, name, line, col):
|
||||
self.level = level
|
||||
prefix = 'CSS: '
|
||||
BaseError.__init__(self, prefix + msg, name, line, col)
|
||||
if level == WARN:
|
||||
self.HELP = _('This CSS construct is not recognized. That means that it'
|
||||
' most likely will not work on reader devices. Consider'
|
||||
' replacing it with something else.')
|
||||
else:
|
||||
self.HELP = _('Some reader programs are very'
|
||||
' finicky about CSS stylesheets and will ignore the whole'
|
||||
' sheet if there is an error. These errors can often'
|
||||
' be fixed automatically, however, automatic fixing will'
|
||||
' typically remove unrecognized items, instead of correcting them.')
|
||||
self.INDIVIDUAL_FIX = _('Try to fix parsing errors in this stylesheet automatically')
|
||||
|
||||
def __call__(self, container):
|
||||
root = container.parsed(self.name)
|
||||
container.dirty(self.name)
|
||||
if container.mime_map[self.name] in OEB_DOCS:
|
||||
for style in root.xpath('//*[local-name()="style"]'):
|
||||
if style.get('type', 'text/css') == 'text/css' and style.text and style.text.strip():
|
||||
fix_style_tag(container, style)
|
||||
return True
|
||||
|
||||
pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)'))
|
||||
|
||||
class ErrorHandler(object):
|
||||
|
||||
' Replacement logger to get useful error/warning info out of cssutils during parsing '
|
||||
|
||||
def __init__(self, name):
|
||||
# may be disabled during setting of known valid items
|
||||
self.name = name
|
||||
self.errors = []
|
||||
|
||||
def __noop(self, *args, **kwargs):
|
||||
pass
|
||||
info = debug = setLevel = getEffectiveLevel = addHandler = removeHandler = __noop
|
||||
|
||||
def __handle(self, level, *args):
|
||||
msg = ' '.join(map(unicode, args))
|
||||
line = col = None
|
||||
for pat in pos_pats:
|
||||
m = pat.search(msg)
|
||||
if m is not None:
|
||||
line, col = int(m.group(1)), int(m.group(2))
|
||||
if msg and line is not None:
|
||||
# Ignore error messages with no line numbers as these are usually
|
||||
# summary messages for an underlying error with a line number
|
||||
self.errors.append(CSSError(level, msg, self.name, line, col))
|
||||
|
||||
def error(self, *args):
|
||||
self.__handle(ERROR, *args)
|
||||
|
||||
def warn(self, *args):
|
||||
self.__handle(WARN, *args)
|
||||
warning = warn
|
||||
|
||||
def check_css_parsing(name, raw, line_offset=0):
|
||||
log = ErrorHandler(name)
|
||||
parser = cssutils.CSSParser(fetcher=lambda x: (None, None), log=log)
|
||||
parser.parseString(raw, validate=True)
|
||||
for err in log.errors:
|
||||
err.line += line_offset
|
||||
return log.errors
|
||||
|
@ -434,6 +434,7 @@ class Container(object): # {{{
|
||||
from cssutils import CSSParser, log
|
||||
log.setLevel(logging.WARN)
|
||||
log.raiseExceptions = False
|
||||
if isinstance(data, bytes):
|
||||
data = self.decode(data)
|
||||
if not self.tweak_mode:
|
||||
data = self.css_preprocessor(data)
|
||||
|
Loading…
x
Reference in New Issue
Block a user