From 850b2b8a9d11c13de5a30324af79b17354843916 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Jan 2020 18:53:57 +0530 Subject: [PATCH] Edit book: Check book: Improved checking of CSS, with support for CSS 3 --- src/calibre/ebooks/oeb/polish/check/css.py | 268 ++++++++++++++++++ src/calibre/ebooks/oeb/polish/check/main.py | 32 ++- .../ebooks/oeb/polish/check/parsing.py | 16 -- src/calibre/gui2/tweak_book/boss.py | 2 +- src/calibre/gui2/tweak_book/main.py | 6 +- 5 files changed, 301 insertions(+), 23 deletions(-) create mode 100644 src/calibre/ebooks/oeb/polish/check/css.py diff --git a/src/calibre/ebooks/oeb/polish/check/css.py b/src/calibre/ebooks/oeb/polish/check/css.py new file mode 100644 index 0000000000..9e4087013f --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/check/css.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2020, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +import atexit +import json +import numbers +import sys +from collections import namedtuple + +from PyQt5 import sip +from PyQt5.Qt import QApplication, QEventLoop, pyqtSignal +from PyQt5.QtWebEngineWidgets import ( + QWebEnginePage, QWebEngineProfile, QWebEngineScript +) + +from calibre import detect_ncpus as cpu_count, prints +from calibre.ebooks.oeb.polish.check.base import ERROR, WARN, BaseError +from calibre.gui2 import must_use_qt +from calibre.gui2.webengine import secure_webengine + + +class CSSParseError(BaseError): + level = ERROR + is_parsing_error = True + + +class CSSError(BaseError): + level = ERROR + + +class CSSWarning(BaseError): + level = WARN + + +def as_int_or_none(x): + if x is not None and not isinstance(x, numbers.Integral): + try: + x = int(x) + except Exception: + x = None + return x + + +def message_to_error(message, name, line_offset=0): + rule = message.get('rule', {}) + if rule.get('browsers', 'All') != 'All': + return + rule_id = rule.get('id') or '' + cls = CSSWarning + if message.get('type') == 'error': + cls = CSSParseError if rule.get('name') == 'Parsing Errors' else CSSError + title = message.get('message') or _('Unknown error') + line = as_int_or_none(message.get('line')) + col = as_int_or_none(message.get('col')) + if col is not None: + col -= 1 + if line is not None: + line += line_offset + ans = cls(title, name, line, col) + ans.HELP = rule.get('desc') or '' + ans.css_rule_id = rule_id + if ans.HELP and 'url' in rule: + ans.HELP += ' ' + _('See {}').format(rule['url']) + return ans + + +def csslint_js(): + ans = getattr(csslint_js, 'ans', None) + if ans is None: + ans = csslint_js.ans = P('csslint.js', data=True, allow_user_override=False).decode('utf-8') + ''' + + window.check_css = function(src) { + var rules = CSSLint.getRules(); + var ruleset = {}; + var ignored_rules = { + 'order-alphabetical': 1, + 'font-sizes': 1, + 'zero-units': 1, + 'bulletproof-font-face': 1, + 'import': 1, + 'box-model': 1, + 'adjoining-classes': 1, + 'box-sizing': 1, + 'compatible-vendor-prefixes': 1, + 'text-indent': 1, + 'fallback-colors': 1, + 'font-faces': 1, + 'regex-selectors': 1, + 'universal-selector': 1, + 'unqualified-attributes': 1, + 'overqualified-elements': 1, + 'shorthand': 1, + 'duplicate-background-images': 1, + 'floats': 1, + 'ids': 1, + 'gradients': 1 + }; + var error_rules = { + 'known-properties': 1, + 'duplicate-properties': 1, + 'vendor-prefix': 1 + }; + + for (var i = 0; i < rules.length; i++) { + var rule = rules[i]; + if (!ignored_rules[rule.id]) ruleset[rule.id] = error_rules[rule.id] ? 2 : 1; + } + var result = CSSLint.verify(src, ruleset); + return result; + } + document.title = 'ready'; + ''' + return ans + + +def create_profile(): + ans = getattr(create_profile, 'ans', None) + if ans is None: + ans = create_profile.ans = QWebEngineProfile(QApplication.instance()) + s = QWebEngineScript() + s.setName('csslint.js') + s.setSourceCode(csslint_js()) + s.setWorldId(QWebEngineScript.ApplicationWorld) + ans.scripts().insert(s) + return ans + + +class Worker(QWebEnginePage): + + work_done = pyqtSignal(object, object) + + def __init__(self): + must_use_qt() + QWebEnginePage.__init__(self, create_profile(), QApplication.instance()) + self.titleChanged.connect(self.title_changed) + secure_webengine(self.settings()) + self.console_messages = [] + self.ready = False + self.working = False + self.pending = None + self.setHtml('') + + def title_changed(self, new_title): + if new_title == 'ready': + self.ready = True + if self.pending is not None: + self.check_css(self.pending) + self.pending = None + + def javaScriptConsoleMessage(self, level, msg, lineno, source_id): + msg = '{}:{}:{}'.format(source_id, lineno, msg) + self.console_messages.append(msg) + try: + print(msg) + except Exception: + pass + + def check_css(self, src): + self.working = True + self.console_messages = [] + self.runJavaScript( + 'window.check_css({})'.format(json.dumps(src)), QWebEngineScript.ApplicationWorld, self.check_done) + + def check_css_when_ready(self, src): + if self.ready: + self.check_css(src) + else: + self.working = True + self.pending = src + + def check_done(self, result): + self.working = False + self.work_done.emit(self, result) + + +class Pool(object): + + def __init__(self): + self.workers = [] + self.max_workers = cpu_count() + + def add_worker(self): + w = Worker() + w.work_done.connect(self.work_done) + self.workers.append(w) + + def check_css(self, css_sources): + self.pending = list(enumerate(css_sources)) + self.results = list(range(len(css_sources))) + self.working = True + self.assign_work() + app = QApplication.instance() + while self.working: + app.processEvents(QEventLoop.WaitForMoreEvents | QEventLoop.ExcludeUserInputEvents) + return self.results + + def assign_work(self): + while self.pending: + if len(self.workers) < self.max_workers: + self.add_worker() + for w in self.workers: + if not w.working: + idx, src = self.pending.pop() + w.result_idx = idx + w.check_css_when_ready(src) + break + else: + break + + def work_done(self, worker, result): + self.assign_work() + if not isinstance(result, dict): + result = worker.console_messages + self.results[worker.result_idx] = result + if not self.pending and not [w for w in self.workers if w.working]: + self.working = False + + def shutdown(self): + tuple(map(sip.delete, self.workers)) + self.workers = [] + + +pool = Pool() +shutdown = pool.shutdown +atexit.register(shutdown) +Job = namedtuple('Job', 'name css line_offset') + + +def create_job(name, css, line_offset=0, is_declaration=False): + if is_declaration: + css = 'a{\n' + css + '\n}' + line_offset -= 1 + return Job(name, css, line_offset) + + +def check_css(jobs): + results = pool.check_css([j.css for j in jobs]) + errors = [] + for job, result in zip(jobs, results): + if isinstance(result, dict): + for msg in result['messages']: + err = message_to_error(msg, job.name, job.line_offset) + if err is not None: + errors.append(err) + elif isinstance(result, list) and result: + errors.append(CSSParseError(_('Failed to process {name} with errors: {errors}').format( + name=job.name, errors='\n'.join(result)), job.name)) + else: + errors.append(CSSParseError(_('Failed to process {name}').format(name=job.name), job.name)) + return errors + + +def main(): + with open(sys.argv[-1], 'rb') as f: + css = f.read().decode('utf-8') + errors = check_css([create_job(sys.argv[-1], css)]) + for error in errors: + prints(error) + + +if __name__ == '__main__': + try: + main() + finally: + shutdown() diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index c19d743239..b9c43ff6f2 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -12,16 +12,34 @@ from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.cover import is_raster_image from calibre.ebooks.oeb.polish.check.base import run_checkers, WARN from calibre.ebooks.oeb.polish.check.parsing import ( - check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, + check_filenames, check_xml_parsing, fix_style_tag, check_html_size, check_ids, check_markup, EmptyFile, check_encoding_declarations) from calibre.ebooks.oeb.polish.check.images import check_raster_images from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations from calibre.ebooks.oeb.polish.check.fonts import check_fonts from calibre.ebooks.oeb.polish.check.opf import check_opf +from polyglot.builtins import as_unicode + XML_TYPES = frozenset(map(guess_type, ('a.xml', 'a.svg', 'a.opf', 'a.ncx'))) | {'application/oebps-page-map+xml'} +class CSSChecker(object): + + def __init__(self): + self.jobs = [] + + def create_job(self, name, raw, line_offset=0, is_declaration=False): + from calibre.ebooks.oeb.polish.check.css import create_job + self.jobs.append(create_job(name, as_unicode(raw), line_offset, is_declaration)) + + def __call__(self): + from calibre.ebooks.oeb.polish.check.css import check_css + if not self.jobs: + return () + return check_css(self.jobs) + + def run_checks(container): errors = [] @@ -49,28 +67,32 @@ def run_checks(container): if err.level > WARN: return errors - # css_parser is not thread safe + # css uses its own worker pool + css_checker = CSSChecker() for name, mt, raw in stylesheets: if not raw: errors.append(EmptyFile(name)) continue - errors.extend(check_css_parsing(name, raw)) + css_checker.create_job(name, raw) + errors.extend(css_checker()) for name, mt, raw in html_items + xml_items: errors.extend(check_encoding_declarations(name, container)) + css_checker = CSSChecker() for name, mt, raw in html_items: if not raw: continue root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: - errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1)) + css_checker.create_job(name, style.text, line_offset=style.sourceline - 1) for elem in root.xpath('//*[@style]'): raw = elem.get('style') if raw: - errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) + css_checker.create_job(name, raw, line_offset=elem.sourceline - 1, is_declaration=True) + errors.extend(css_checker()) errors += check_mimetypes(container) errors += check_links(container) + check_link_destinations(container) errors += check_fonts(container) diff --git a/src/calibre/ebooks/oeb/polish/check/parsing.py b/src/calibre/ebooks/oeb/polish/check/parsing.py index 66c91418dd..61d82b4b59 100644 --- a/src/calibre/ebooks/oeb/polish/check/parsing.py +++ b/src/calibre/ebooks/oeb/polish/check/parsing.py @@ -8,7 +8,6 @@ __copyright__ = '2013, Kovid Goyal ' import re from lxml.etree import XMLSyntaxError -import css_parser from calibre import force_unicode, human_readable, prepare_string_for_xml from calibre.ebooks.chardet import replace_encoding_declarations, find_declared_encoding @@ -465,21 +464,6 @@ class ErrorHandler(object): warning = warn -def check_css_parsing(name, raw, line_offset=0, is_declaration=False): - log = ErrorHandler(name) - parser = css_parser.CSSParser(fetcher=lambda x: (None, None), log=log) - if is_declaration: - parser.parseStyle(raw, validate=True) - else: - try: - parser.parseString(raw, validate=True) - except UnicodeDecodeError: - return [DecodeError(name)] - for err in log.errors: - err.line += line_offset - return log.errors - - def check_filenames(container): errors = [] all_names = set(container.name_path_map) - container.names_that_must_not_be_changed diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 036e9e3e25..2ea8e11093 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -1294,7 +1294,7 @@ class Boss(QObject): if is_mult: editor.go_to_line(*(item.all_locations[item.current_location_index][1:3])) else: - editor.go_to_line(item.line, item.col) + editor.go_to_line(item.line or 0, item.col or 0) editor.set_focus() @in_thread_job diff --git a/src/calibre/gui2/tweak_book/main.py b/src/calibre/gui2/tweak_book/main.py index fd83516468..76b888c765 100644 --- a/src/calibre/gui2/tweak_book/main.py +++ b/src/calibre/gui2/tweak_book/main.py @@ -11,6 +11,7 @@ from PyQt5.Qt import QIcon from PyQt5.QtWebEngineCore import QWebEngineUrlScheme from calibre.constants import EDITOR_APP_UID, FAKE_PROTOCOL, islinux +from calibre.ebooks.oeb.polish.check.css import shutdown as shutdown_css_check_pool from calibre.gui2 import ( Application, decouple, set_gui_prefs, setup_gui_option_parser ) @@ -94,7 +95,10 @@ def _run(args, notify=None): def main(args=sys.argv): - _run(args) + try: + _run(args) + finally: + shutdown_css_check_pool() if __name__ == '__main__':