mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Error checking for CSS (stylesheets and inside <style> tags
This commit is contained in:
parent
6897a97342
commit
ed50bc7ed5
@ -8,11 +8,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
from future_builtins import map
|
from future_builtins import map
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||||
from calibre.ebooks.oeb.polish.container import guess_type
|
from calibre.ebooks.oeb.polish.container import guess_type
|
||||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing
|
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag
|
||||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||||
from calibre.ebooks.oeb.polish.check.links import check_links
|
from calibre.ebooks.oeb.polish.check.links import check_links
|
||||||
|
|
||||||
@ -23,13 +23,15 @@ def run_checks(container):
|
|||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
# Check parsing
|
# Check parsing
|
||||||
xml_items, html_items, raster_images = [], [], []
|
xml_items, html_items, raster_images, stylesheets = [], [], [], []
|
||||||
for name, mt in container.mime_map.iteritems():
|
for name, mt in container.mime_map.iteritems():
|
||||||
items = None
|
items = None
|
||||||
if mt in XML_TYPES:
|
if mt in XML_TYPES:
|
||||||
items = xml_items
|
items = xml_items
|
||||||
elif mt in OEB_DOCS:
|
elif mt in OEB_DOCS:
|
||||||
items = html_items
|
items = html_items
|
||||||
|
elif mt in OEB_STYLES:
|
||||||
|
items = stylesheets
|
||||||
elif is_raster_image(mt):
|
elif is_raster_image(mt):
|
||||||
items = raster_images
|
items = raster_images
|
||||||
if items is not None:
|
if items is not None:
|
||||||
@ -38,6 +40,15 @@ def run_checks(container):
|
|||||||
errors.extend(run_checkers(check_xml_parsing, html_items))
|
errors.extend(run_checkers(check_xml_parsing, html_items))
|
||||||
errors.extend(run_checkers(check_raster_images, raster_images))
|
errors.extend(run_checkers(check_raster_images, raster_images))
|
||||||
|
|
||||||
|
# cssutils is not thread safe
|
||||||
|
for name, mt, raw in stylesheets:
|
||||||
|
errors.extend(check_css_parsing(name, raw))
|
||||||
|
for name, mt, raw in html_items:
|
||||||
|
root = container.parsed(name)
|
||||||
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
|
if style.get('type', 'text/css') == 'text/css':
|
||||||
|
errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
|
||||||
|
|
||||||
errors += check_links(container)
|
errors += check_links(container)
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
@ -46,8 +57,13 @@ def fix_errors(container, errors):
|
|||||||
# Fix parsing
|
# Fix parsing
|
||||||
changed = False
|
changed = False
|
||||||
for name in {e.name for e in errors if getattr(e, 'is_parsing_error', False)}:
|
for name in {e.name for e in errors if getattr(e, 'is_parsing_error', False)}:
|
||||||
container.parsed(name)
|
root = container.parsed(name)
|
||||||
container.dirty(name)
|
container.dirty(name)
|
||||||
|
if container.mime_map[name] in OEB_DOCS:
|
||||||
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
|
if style.get('type', 'text/css') == 'text/css' and style.text and style.text.strip():
|
||||||
|
fix_style_tag(container, style)
|
||||||
|
|
||||||
changed = True
|
changed = True
|
||||||
|
|
||||||
for err in errors:
|
for err in errors:
|
||||||
|
@ -9,10 +9,12 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
||||||
|
import cssutils
|
||||||
|
|
||||||
|
from calibre import force_unicode
|
||||||
from calibre.ebooks.html_entities import html5_entities
|
from calibre.ebooks.html_entities import html5_entities
|
||||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
||||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
|
|
||||||
HTML_ENTITTIES = frozenset(html5_entities)
|
HTML_ENTITTIES = frozenset(html5_entities)
|
||||||
@ -21,6 +23,15 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES
|
|||||||
|
|
||||||
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
||||||
|
|
||||||
|
def fix_style_tag(container, style):
|
||||||
|
prev = style.getprevious()
|
||||||
|
ws = style.getparent().text if prev is None else prev.tail
|
||||||
|
ws = ws.splitlines()[-1]
|
||||||
|
indent = ws[len(ws.rstrip()):]
|
||||||
|
|
||||||
|
sheet = container.parse_css(style.text)
|
||||||
|
style.text = '\n' + force_unicode(sheet.cssText, 'utf-8') + '\n' + indent
|
||||||
|
|
||||||
class XMLParseError(BaseError):
|
class XMLParseError(BaseError):
|
||||||
|
|
||||||
is_parsing_error = True
|
is_parsing_error = True
|
||||||
@ -131,3 +142,73 @@ def check_xml_parsing(name, mt, raw):
|
|||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
class CSSError(BaseError):
|
||||||
|
|
||||||
|
is_parsing_error = True
|
||||||
|
|
||||||
|
def __init__(self, level, msg, name, line, col):
|
||||||
|
self.level = level
|
||||||
|
prefix = 'CSS: '
|
||||||
|
BaseError.__init__(self, prefix + msg, name, line, col)
|
||||||
|
if level == WARN:
|
||||||
|
self.HELP = _('This CSS construct is not recognized. That means that it'
|
||||||
|
' most likely will not work on reader devices. Consider'
|
||||||
|
' replacing it with something else.')
|
||||||
|
else:
|
||||||
|
self.HELP = _('Some reader programs are very'
|
||||||
|
' finicky about CSS stylesheets and will ignore the whole'
|
||||||
|
' sheet if there is an error. These errors can often'
|
||||||
|
' be fixed automatically, however, automatic fixing will'
|
||||||
|
' typically remove unrecognized items, instead of correcting them.')
|
||||||
|
self.INDIVIDUAL_FIX = _('Try to fix parsing errors in this stylesheet automatically')
|
||||||
|
|
||||||
|
def __call__(self, container):
|
||||||
|
root = container.parsed(self.name)
|
||||||
|
container.dirty(self.name)
|
||||||
|
if container.mime_map[self.name] in OEB_DOCS:
|
||||||
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
|
if style.get('type', 'text/css') == 'text/css' and style.text and style.text.strip():
|
||||||
|
fix_style_tag(container, style)
|
||||||
|
return True
|
||||||
|
|
||||||
|
pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)'))
|
||||||
|
|
||||||
|
class ErrorHandler(object):
|
||||||
|
|
||||||
|
' Replacement logger to get useful error/warning info out of cssutils during parsing '
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
# may be disabled during setting of known valid items
|
||||||
|
self.name = name
|
||||||
|
self.errors = []
|
||||||
|
|
||||||
|
def __noop(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
info = debug = setLevel = getEffectiveLevel = addHandler = removeHandler = __noop
|
||||||
|
|
||||||
|
def __handle(self, level, *args):
|
||||||
|
msg = ' '.join(map(unicode, args))
|
||||||
|
line = col = None
|
||||||
|
for pat in pos_pats:
|
||||||
|
m = pat.search(msg)
|
||||||
|
if m is not None:
|
||||||
|
line, col = int(m.group(1)), int(m.group(2))
|
||||||
|
if msg and line is not None:
|
||||||
|
# Ignore error messages with no line numbers as these are usually
|
||||||
|
# summary messages for an underlying error with a line number
|
||||||
|
self.errors.append(CSSError(level, msg, self.name, line, col))
|
||||||
|
|
||||||
|
def error(self, *args):
|
||||||
|
self.__handle(ERROR, *args)
|
||||||
|
|
||||||
|
def warn(self, *args):
|
||||||
|
self.__handle(WARN, *args)
|
||||||
|
warning = warn
|
||||||
|
|
||||||
|
def check_css_parsing(name, raw, line_offset=0):
|
||||||
|
log = ErrorHandler(name)
|
||||||
|
parser = cssutils.CSSParser(fetcher=lambda x: (None, None), log=log)
|
||||||
|
parser.parseString(raw, validate=True)
|
||||||
|
for err in log.errors:
|
||||||
|
err.line += line_offset
|
||||||
|
return log.errors
|
||||||
|
@ -434,6 +434,7 @@ class Container(object): # {{{
|
|||||||
from cssutils import CSSParser, log
|
from cssutils import CSSParser, log
|
||||||
log.setLevel(logging.WARN)
|
log.setLevel(logging.WARN)
|
||||||
log.raiseExceptions = False
|
log.raiseExceptions = False
|
||||||
|
if isinstance(data, bytes):
|
||||||
data = self.decode(data)
|
data = self.decode(data)
|
||||||
if not self.tweak_mode:
|
if not self.tweak_mode:
|
||||||
data = self.css_preprocessor(data)
|
data = self.css_preprocessor(data)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user