diff --git a/resources/compiled_coffeescript.zip b/resources/compiled_coffeescript.zip index 5e9f6ae63b..b9272bc19b 100644 Binary files a/resources/compiled_coffeescript.zip and b/resources/compiled_coffeescript.zip differ diff --git a/src/calibre/ebooks/oeb/polish/font_stats.coffee b/src/calibre/ebooks/oeb/polish/font_stats.coffee index 62f9e55dc3..227949fe91 100644 --- a/src/calibre/ebooks/oeb/polish/font_stats.coffee +++ b/src/calibre/ebooks/oeb/polish/font_stats.coffee @@ -50,6 +50,29 @@ process_font_face_rule = (rule, font_faces) -> fd['src'] = rule.style.getPropertyValue('src') font_faces.push(fd) +fl_pat = /:{1,2}(first-letter|first-line)/i + +process_sheet_for_pseudo = (sheet, rules) -> + for rule in sheet.cssRules + if rule.type == rule.STYLE_RULE + st = rule.selectorText + m = fl_pat.exec(st) + if m + pseudo = m[1].toLowerCase() + ff = rule.style.getPropertyValue('font-family') + if ff + process_style_rule(st, rule.style, rules, pseudo) + else if rule.type == rule.IMPORT_RULE and rule.styleSheet + process_sheet_for_pseudo(rule.styleSheet, rules) + +process_style_rule = (selector_text, style, rules, pseudo) -> + selector_text = selector_text.replace(fl_pat, '') + fd = font_dict(style) + for element in document.querySelectorAll(selector_text) + text = element.innerText + if text + rules.push([fd, text, pseudo]) + class FontStats # This class is a namespace to expose functions via the # window.font_stats object. @@ -75,6 +98,12 @@ class FontStats ans.push(usage) py_bridge.value = ans + get_pseudo_element_font_usage: () -> + ans = [] + for sheet in document.styleSheets + process_sheet_for_pseudo(sheet, ans) + py_bridge.value = ans + get_font_families: () -> ans = {} for node in document.getElementsByTagName('*') diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index 60b24a6c11..652d6e0ede 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -11,6 +11,7 @@ import json, sys, os, logging from urllib import unquote from collections import defaultdict +import regex from cssutils import CSSParser from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer, pyqtSlot) @@ -101,6 +102,31 @@ def get_matching_rules(rules, font): return m return [] +def parse_font_families(parser, raw): + style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') + for x in style.propertyValue: + x = x.value + if x: + yield x + +def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser): + ans = [] + for font_dict, text, pseudo in pseudo_element_font_usage: + text = text.strip() + if pseudo == 'first-letter': + prefix = first_letter_pat.match(text) + if prefix is not None: + text = prefix + text[len(prefix):].lstrip()[:1] + else: + text = text[:1] + if text: + font = font_dict.copy() + font['text'] = text + font['font-family'] = list(parse_font_families(parser, font['font-family'])) + ans.append(font) + + return ans + class Page(QWebPage): # {{{ def __init__(self, log): @@ -164,6 +190,7 @@ class StatsCollector(object): self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) + self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE) self.loop = QEventLoop() self.view = QWebView() @@ -186,6 +213,14 @@ class StatsCollector(object): if self.loop.exec_() == 1: raise Exception('Failed to gather statistics from book, see log for details') + def log_exception(self, *args): + orig = self.log.filter_level + try: + self.log.filter_level = self.log.DEBUG + self.log.exception(*args) + finally: + self.log.filter_level = orig + def render_book(self): try: if not self.render_queue: @@ -193,7 +228,7 @@ class StatsCollector(object): else: self.render_next() except: - self.logger.exception('Rendering failed') + self.log_exception('Rendering failed') self.loop.exit(1) def render_next(self): @@ -210,7 +245,7 @@ class StatsCollector(object): self.page.load_js() self.collect_font_stats() except: - self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) + self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return @@ -283,6 +318,11 @@ class StatsCollector(object): font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') + self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()') + pseudo_element_font_usage = self.page.bridge_value + if not isinstance(pseudo_element_font_usage, list): + raise Exception('Unknown error occurred while reading pseudo element font usage') + font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser) exclude = {'\n', '\r', '\t'} self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} @@ -314,11 +354,11 @@ class StatsCollector(object): if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() + for font_dict, text, pseudo in pseudo_element_font_usage: + font_families[font_dict['font-family']] = True for raw in font_families.iterkeys(): - style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') - for x in style.propertyValue: - x = x.value - if x and x.lower() not in bad_fonts: + for x in parse_font_families(self.parser, raw): + if x.lower() not in bad_fonts: fs.add(x) if __name__ == '__main__': @@ -327,5 +367,3 @@ if __name__ == '__main__': default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) print (StatsCollector(ebook, do_embed=True).font_stats) - -