Edit book: Check book: Improved checking of CSS, with support for CSS 3

This commit is contained in:
Kovid Goyal 2020-01-08 18:53:57 +05:30
parent 7a16568127
commit 850b2b8a9d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 301 additions and 23 deletions

View File

@ -0,0 +1,268 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import atexit
import json
import numbers
import sys
from collections import namedtuple
from PyQt5 import sip
from PyQt5.Qt import QApplication, QEventLoop, pyqtSignal
from PyQt5.QtWebEngineWidgets import (
QWebEnginePage, QWebEngineProfile, QWebEngineScript
)
from calibre import detect_ncpus as cpu_count, prints
from calibre.ebooks.oeb.polish.check.base import ERROR, WARN, BaseError
from calibre.gui2 import must_use_qt
from calibre.gui2.webengine import secure_webengine
class CSSParseError(BaseError):
level = ERROR
is_parsing_error = True
class CSSError(BaseError):
level = ERROR
class CSSWarning(BaseError):
level = WARN
def as_int_or_none(x):
if x is not None and not isinstance(x, numbers.Integral):
try:
x = int(x)
except Exception:
x = None
return x
def message_to_error(message, name, line_offset=0):
rule = message.get('rule', {})
if rule.get('browsers', 'All') != 'All':
return
rule_id = rule.get('id') or ''
cls = CSSWarning
if message.get('type') == 'error':
cls = CSSParseError if rule.get('name') == 'Parsing Errors' else CSSError
title = message.get('message') or _('Unknown error')
line = as_int_or_none(message.get('line'))
col = as_int_or_none(message.get('col'))
if col is not None:
col -= 1
if line is not None:
line += line_offset
ans = cls(title, name, line, col)
ans.HELP = rule.get('desc') or ''
ans.css_rule_id = rule_id
if ans.HELP and 'url' in rule:
ans.HELP += ' ' + _('See {}').format(rule['url'])
return ans
def csslint_js():
ans = getattr(csslint_js, 'ans', None)
if ans is None:
ans = csslint_js.ans = P('csslint.js', data=True, allow_user_override=False).decode('utf-8') + '''
window.check_css = function(src) {
var rules = CSSLint.getRules();
var ruleset = {};
var ignored_rules = {
'order-alphabetical': 1,
'font-sizes': 1,
'zero-units': 1,
'bulletproof-font-face': 1,
'import': 1,
'box-model': 1,
'adjoining-classes': 1,
'box-sizing': 1,
'compatible-vendor-prefixes': 1,
'text-indent': 1,
'fallback-colors': 1,
'font-faces': 1,
'regex-selectors': 1,
'universal-selector': 1,
'unqualified-attributes': 1,
'overqualified-elements': 1,
'shorthand': 1,
'duplicate-background-images': 1,
'floats': 1,
'ids': 1,
'gradients': 1
};
var error_rules = {
'known-properties': 1,
'duplicate-properties': 1,
'vendor-prefix': 1
};
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (!ignored_rules[rule.id]) ruleset[rule.id] = error_rules[rule.id] ? 2 : 1;
}
var result = CSSLint.verify(src, ruleset);
return result;
}
document.title = 'ready';
'''
return ans
def create_profile():
ans = getattr(create_profile, 'ans', None)
if ans is None:
ans = create_profile.ans = QWebEngineProfile(QApplication.instance())
s = QWebEngineScript()
s.setName('csslint.js')
s.setSourceCode(csslint_js())
s.setWorldId(QWebEngineScript.ApplicationWorld)
ans.scripts().insert(s)
return ans
class Worker(QWebEnginePage):
work_done = pyqtSignal(object, object)
def __init__(self):
must_use_qt()
QWebEnginePage.__init__(self, create_profile(), QApplication.instance())
self.titleChanged.connect(self.title_changed)
secure_webengine(self.settings())
self.console_messages = []
self.ready = False
self.working = False
self.pending = None
self.setHtml('')
def title_changed(self, new_title):
if new_title == 'ready':
self.ready = True
if self.pending is not None:
self.check_css(self.pending)
self.pending = None
def javaScriptConsoleMessage(self, level, msg, lineno, source_id):
msg = '{}:{}:{}'.format(source_id, lineno, msg)
self.console_messages.append(msg)
try:
print(msg)
except Exception:
pass
def check_css(self, src):
self.working = True
self.console_messages = []
self.runJavaScript(
'window.check_css({})'.format(json.dumps(src)), QWebEngineScript.ApplicationWorld, self.check_done)
def check_css_when_ready(self, src):
if self.ready:
self.check_css(src)
else:
self.working = True
self.pending = src
def check_done(self, result):
self.working = False
self.work_done.emit(self, result)
class Pool(object):
def __init__(self):
self.workers = []
self.max_workers = cpu_count()
def add_worker(self):
w = Worker()
w.work_done.connect(self.work_done)
self.workers.append(w)
def check_css(self, css_sources):
self.pending = list(enumerate(css_sources))
self.results = list(range(len(css_sources)))
self.working = True
self.assign_work()
app = QApplication.instance()
while self.working:
app.processEvents(QEventLoop.WaitForMoreEvents | QEventLoop.ExcludeUserInputEvents)
return self.results
def assign_work(self):
while self.pending:
if len(self.workers) < self.max_workers:
self.add_worker()
for w in self.workers:
if not w.working:
idx, src = self.pending.pop()
w.result_idx = idx
w.check_css_when_ready(src)
break
else:
break
def work_done(self, worker, result):
self.assign_work()
if not isinstance(result, dict):
result = worker.console_messages
self.results[worker.result_idx] = result
if not self.pending and not [w for w in self.workers if w.working]:
self.working = False
def shutdown(self):
tuple(map(sip.delete, self.workers))
self.workers = []
pool = Pool()
shutdown = pool.shutdown
atexit.register(shutdown)
Job = namedtuple('Job', 'name css line_offset')
def create_job(name, css, line_offset=0, is_declaration=False):
if is_declaration:
css = 'a{\n' + css + '\n}'
line_offset -= 1
return Job(name, css, line_offset)
def check_css(jobs):
results = pool.check_css([j.css for j in jobs])
errors = []
for job, result in zip(jobs, results):
if isinstance(result, dict):
for msg in result['messages']:
err = message_to_error(msg, job.name, job.line_offset)
if err is not None:
errors.append(err)
elif isinstance(result, list) and result:
errors.append(CSSParseError(_('Failed to process {name} with errors: {errors}').format(
name=job.name, errors='\n'.join(result)), job.name))
else:
errors.append(CSSParseError(_('Failed to process {name}').format(name=job.name), job.name))
return errors
def main():
with open(sys.argv[-1], 'rb') as f:
css = f.read().decode('utf-8')
errors = check_css([create_job(sys.argv[-1], css)])
for error in errors:
prints(error)
if __name__ == '__main__':
try:
main()
finally:
shutdown()

View File

@ -12,16 +12,34 @@ from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.cover import is_raster_image
from calibre.ebooks.oeb.polish.check.base import run_checkers, WARN
from calibre.ebooks.oeb.polish.check.parsing import (
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag,
check_filenames, check_xml_parsing, fix_style_tag,
check_html_size, check_ids, check_markup, EmptyFile, check_encoding_declarations)
from calibre.ebooks.oeb.polish.check.images import check_raster_images
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
from calibre.ebooks.oeb.polish.check.opf import check_opf
from polyglot.builtins import as_unicode
XML_TYPES = frozenset(map(guess_type, ('a.xml', 'a.svg', 'a.opf', 'a.ncx'))) | {'application/oebps-page-map+xml'}
class CSSChecker(object):
def __init__(self):
self.jobs = []
def create_job(self, name, raw, line_offset=0, is_declaration=False):
from calibre.ebooks.oeb.polish.check.css import create_job
self.jobs.append(create_job(name, as_unicode(raw), line_offset, is_declaration))
def __call__(self):
from calibre.ebooks.oeb.polish.check.css import check_css
if not self.jobs:
return ()
return check_css(self.jobs)
def run_checks(container):
errors = []
@ -49,28 +67,32 @@ def run_checks(container):
if err.level > WARN:
return errors
# css_parser is not thread safe
# css uses its own worker pool
css_checker = CSSChecker()
for name, mt, raw in stylesheets:
if not raw:
errors.append(EmptyFile(name))
continue
errors.extend(check_css_parsing(name, raw))
css_checker.create_job(name, raw)
errors.extend(css_checker())
for name, mt, raw in html_items + xml_items:
errors.extend(check_encoding_declarations(name, container))
css_checker = CSSChecker()
for name, mt, raw in html_items:
if not raw:
continue
root = container.parsed(name)
for style in root.xpath('//*[local-name()="style"]'):
if style.get('type', 'text/css') == 'text/css' and style.text:
errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
css_checker.create_job(name, style.text, line_offset=style.sourceline - 1)
for elem in root.xpath('//*[@style]'):
raw = elem.get('style')
if raw:
errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True))
css_checker.create_job(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)
errors.extend(css_checker())
errors += check_mimetypes(container)
errors += check_links(container) + check_link_destinations(container)
errors += check_fonts(container)

View File

@ -8,7 +8,6 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from lxml.etree import XMLSyntaxError
import css_parser
from calibre import force_unicode, human_readable, prepare_string_for_xml
from calibre.ebooks.chardet import replace_encoding_declarations, find_declared_encoding
@ -465,21 +464,6 @@ class ErrorHandler(object):
warning = warn
def check_css_parsing(name, raw, line_offset=0, is_declaration=False):
log = ErrorHandler(name)
parser = css_parser.CSSParser(fetcher=lambda x: (None, None), log=log)
if is_declaration:
parser.parseStyle(raw, validate=True)
else:
try:
parser.parseString(raw, validate=True)
except UnicodeDecodeError:
return [DecodeError(name)]
for err in log.errors:
err.line += line_offset
return log.errors
def check_filenames(container):
errors = []
all_names = set(container.name_path_map) - container.names_that_must_not_be_changed

View File

@ -1294,7 +1294,7 @@ class Boss(QObject):
if is_mult:
editor.go_to_line(*(item.all_locations[item.current_location_index][1:3]))
else:
editor.go_to_line(item.line, item.col)
editor.go_to_line(item.line or 0, item.col or 0)
editor.set_focus()
@in_thread_job

View File

@ -11,6 +11,7 @@ from PyQt5.Qt import QIcon
from PyQt5.QtWebEngineCore import QWebEngineUrlScheme
from calibre.constants import EDITOR_APP_UID, FAKE_PROTOCOL, islinux
from calibre.ebooks.oeb.polish.check.css import shutdown as shutdown_css_check_pool
from calibre.gui2 import (
Application, decouple, set_gui_prefs, setup_gui_option_parser
)
@ -94,7 +95,10 @@ def _run(args, notify=None):
def main(args=sys.argv):
_run(args)
try:
_run(args)
finally:
shutdown_css_check_pool()
if __name__ == '__main__':