diff --git a/src/calibre/ebooks/oeb/polish/font_stats.coffee b/src/calibre/ebooks/oeb/polish/font_stats.coffee deleted file mode 100644 index e96d3184ed..0000000000 --- a/src/calibre/ebooks/oeb/polish/font_stats.coffee +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env coffee -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -### - Copyright 2013, Kovid Goyal - Released under the GPLv3 License -### - - -if window?.calibre_utils - log = window.calibre_utils.log - -font_dict = (style, computed=false) -> - if computed - fams = [] - family = style.getPropertyCSSValue('font-family') - if family.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE - fams.push(family.getStringValue()) - else - for f in family - fams.push(f.getStringValue()) - else - fams = style.getPropertyValue('font-family') - return { - 'font-family':fams, - 'font-weight':style.getPropertyValue('font-weight'), - 'font-style':style.getPropertyValue('font-style'), - 'font-stretch':style.getPropertyValue('font-stretch'), - 'text-transform':style.getPropertyValue('text-transform'), - 'font-variant':style.getPropertyValue('font-variant'), - } - -font_usage = (node) -> - style = window.getComputedStyle(node, null) - ans = font_dict(style, true) - text = [] - for child in node.childNodes - if child.nodeType == Node.TEXT_NODE - text.push(child.nodeValue) - ans['text'] = text - return ans - -process_sheet = (sheet, font_faces) -> - for rule in sheet.cssRules - if rule.type == rule.FONT_FACE_RULE - process_font_face_rule(rule, font_faces) - else if rule.type == rule.IMPORT_RULE and rule.styleSheet - process_sheet(rule.styleSheet, font_faces) - -process_font_face_rule = (rule, font_faces) -> - fd = font_dict(rule.style) - fd['src'] = rule.style.getPropertyValue('src') - font_faces.push(fd) - -fl_pat = /:{1,2}(first-letter|first-line)/i - -process_sheet_for_pseudo = (sheet, rules) -> - for rule in sheet.cssRules - if rule.type == rule.STYLE_RULE - st = rule.selectorText - m = fl_pat.exec(st) - if m - pseudo = m[1].toLowerCase() - ff = rule.style.getPropertyValue('font-family') - if ff - process_style_rule(st, rule.style, rules, pseudo) - else if rule.type == rule.IMPORT_RULE and rule.styleSheet - process_sheet_for_pseudo(rule.styleSheet, rules) - -process_style_rule = (selector_text, style, rules, pseudo) -> - selector_text = selector_text.replace(fl_pat, '') - fd = font_dict(style) - for element in document.querySelectorAll(selector_text) - text = element.innerText - if text - rules.push([fd, text, pseudo]) - -class FontStats - # This class is a namespace to expose functions via the - # window.font_stats object. - - constructor: () -> - if not this instanceof arguments.callee - throw new Error('FontStats constructor called as function') - - get_font_face_rules: () -> - font_faces = [] - for sheet in document.styleSheets - process_sheet(sheet, font_faces) - py_bridge.value = font_faces - - get_font_usage: () -> - ans = [] - busage = font_usage(document.body) - if busage != null - ans.push(busage) - for node in document.body.getElementsByTagName('*') - usage = font_usage(node) - if usage != null - ans.push(usage) - py_bridge.value = ans - - get_pseudo_element_font_usage: () -> - ans = [] - for sheet in document.styleSheets - process_sheet_for_pseudo(sheet, ans) - py_bridge.value = ans - - get_font_families: () -> - ans = {} - for node in document.getElementsByTagName('*') - rules = document.defaultView.getMatchedCSSRules(node, '') - if rules - for rule in rules - style = rule.style - family = style.getPropertyValue('font-family') - if family - ans[family] = true - if node.getAttribute('style') - family = node.style.getPropertyValue('font-family') - if family - ans[family] = true - py_bridge.value = ans - -if window? - window.font_stats = new FontStats() - diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index afa60b3dbd..0772410ad6 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -7,19 +7,15 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import json, sys, os, logging -from urllib import unquote -from collections import defaultdict +import sys +from functools import partial +from lxml.etree import tostring import regex -from cssutils import CSSParser -from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer, - pyqtSlot) -from PyQt5.QtWebKitWidgets import QWebPage, QWebView -from calibre.constants import iswindows -from calibre.ebooks.oeb.display.webview import load_html -from calibre.gui2 import must_use_qt +from calibre.ebooks.oeb.base import XHTML +from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, iterdeclaration +from calibre.utils.icu import ord_string, safe_chr def normalize_font_properties(font): w = font.get('font-weight', None) @@ -102,286 +98,200 @@ def get_matching_rules(rules, font): return m return [] -def parse_font_families(parser, raw): - style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') - for x in style.propertyValue: - x = x.value - if x: - yield x +def get_css_text(elem, resolve_pseudo_property, which='before'): + text = resolve_pseudo_property(elem, which, 'content')[0].value + if text and len(text) > 2 and text[0] == '"' and text[-1] == '"': + return text[1:-1] + return '' -def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser): +caps_variants = {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'} + +def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize_pat, for_pseudo=None): ans = [] - for font_dict, text, pseudo in pseudo_element_font_usage: - text = text.strip() - if pseudo == 'first-letter': - prefix = first_letter_pat.match(text) - if prefix is not None: - text = prefix + text[len(prefix):].lstrip()[:1] - else: - text = text[:1] - if text: - font = font_dict.copy() - font['text'] = text - font['font-family'] = list(parse_font_families(parser, font['font-family'])) - ans.append(font) - + before = get_css_text(elem, resolve_pseudo_property) + if before: + ans.append(before) + if for_pseudo is not None: + ans.append(tostring(elem, method='text', encoding=unicode, with_tail=False)) + else: + if elem.text: + ans.append(elem.text) + for child in elem.iterchildren(): + t = getattr(child, 'tail', '') + if t: + ans.append(t) + after = get_css_text(elem, resolve_pseudo_property, 'after') + if after: + ans.append(after) + ans = ''.join(ans) + if for_pseudo is not None: + tt = resolve_pseudo_property(elem, for_pseudo, 'text-transform')[0].value + fv = resolve_pseudo_property(elem, for_pseudo, 'font-variant')[0].value + else: + tt = resolve_property(elem, 'text-transform')[0].value + fv = resolve_property(elem, 'font-variant')[0].value + if fv in caps_variants: + ans += icu_upper(ans) + if tt != 'none': + if tt == 'uppercase': + ans = icu_upper(ans) + elif tt == 'lowercase': + ans = icu_lower(ans) + elif tt == 'capitalize': + m = capitalize_pat.search(ans) + if m is not None: + ans += icu_upper(m.group()) return ans -class Page(QWebPage): # {{{ +def get_font_dict(elem, resolve_property, pseudo=None): + ans = {} + if pseudo is None: + ff = resolve_property(elem, 'font-family') + else: + ff = resolve_property(elem, pseudo, 'font-family') + ans['font-family'] = tuple(x.value for x in ff) + for p in 'weight', 'style', 'stretch': + p = 'font-' + p + rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p) + ans[p] = type('')(rp[0].value) + normalize_font_properties(ans) + return ans - def __init__(self, log): - self.log = log - QWebPage.__init__(self) - self.js = None - self.evaljs = self.mainFrame().evaluateJavaScript - self.bridge_value = None - nam = self.networkAccessManager() - nam.setNetworkAccessible(nam.NotAccessible) - self.longjs_counter = 0 +bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} +exclude_chars = frozenset(ord_string('\n\r\t')) +skip_tags = {XHTML(x) for x in 'script style title meta link'.split()} +font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} - def javaScriptConsoleMessage(self, msg, lineno, msgid): - self.log(u'JS:', unicode(msg)) - - def javaScriptAlert(self, frame, msg): - self.log(unicode(msg)) - - @pyqtSlot(result=bool) - def shouldInterruptJavaScript(self): - if self.longjs_counter < 5: - self.log('Long running javascript, letting it proceed') - self.longjs_counter += 1 - return False - self.log.warn('Long running javascript, aborting it') - return True - - def _pass_json_value_getter(self): - val = json.dumps(self.bridge_value) - return val - - def _pass_json_value_setter(self, value): - # Qt WebKit in Qt 4.x adds extra null bytes to the end of the string - # if the JSON contains non-BMP characters - self.bridge_value = json.loads(unicode(value).rstrip('\0')) - - _pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter, - fset=_pass_json_value_setter) - - def load_js(self): - self.longjs_counter = 0 - if self.js is None: - from calibre.utils.resources import compiled_coffeescript - self.js = compiled_coffeescript('ebooks.oeb.display.utils') - self.js += compiled_coffeescript('ebooks.oeb.polish.font_stats') - self.mainFrame().addToJavaScriptWindowObject("py_bridge", self) - self.evaljs(self.js) - self.evaljs(''' - Object.defineProperty(py_bridge, 'value', { - get : function() { return JSON.parse(this._pass_json_value); }, - set : function(val) { this._pass_json_value = JSON.stringify(val); } - }); - ''') -# }}} +def prepare_font_rule(cssdict): + cssdict['font-family'] = frozenset(cssdict['font-family'][:1]) + cssdict['width'] = widths[cssdict['font-stretch']] + cssdict['weight'] = int(cssdict['font-weight']) class StatsCollector(object): + first_letter_pat = capitalize_pat = None + def __init__(self, container, do_embed=False): - self.container = container - self.log = self.logger = container.log - self.do_embed = do_embed - must_use_qt() - self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) - self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE) - self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE) + if self.first_letter_pat is None: + StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile( + r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE) + StatsCollector.capitalize_pat = self.capitalize_pat = regex.compile( + r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE) - self.loop = QEventLoop() - self.view = QWebView() - self.page = Page(self.log) - self.view.setPage(self.page) - self.page.setViewportSize(QSize(1200, 1600)) + self.collect_font_stats(container, do_embed) - self.view.loadFinished.connect(self.collect, - type=Qt.QueuedConnection) + def collect_font_face_rules(self, container, processed, spine_name, sheet, sheet_name): + if sheet_name in processed: + sheet_rules = processed[sheet_name] + else: + sheet_rules = [] + if sheet_name != spine_name: + processed[sheet_name] = sheet_rules + for rule, base_name, rule_index in iterrules(container, sheet_name, rules=sheet, rule_type='FONT_FACE_RULE'): + cssdict = {} + for prop in iterdeclaration(rule.style): + if prop.name == 'font-family': + cssdict['font-family'] = [icu_lower(x.value) for x in prop.propertyValue] + elif prop.name.startswith('font-'): + cssdict[prop.name] = prop.propertyValue[0].value + elif prop.name == 'src': + for val in prop.propertyValue: + x = val.value + fname = container.href_to_name(x, sheet_name) + if container.has_name(fname): + cssdict['src'] = fname + break + else: + container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % prop.propertyValue.cssText) + if 'src' not in cssdict: + continue + ff = cssdict.get('font-family') + if not ff or ff[0] in bad_fonts: + continue + normalize_font_properties(cssdict) + prepare_font_rule(cssdict) + sheet_rules.append(cssdict) + self.font_rule_map[spine_name].extend(sheet_rules) - self.render_queue = list(container.spine_items) + def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec): + text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat) + if not text: + return + + def update_usage_for_embed(font, chars): + if not do_embed: + return + ff = [icu_lower(x) for x in font.get('font-family', ())] + if ff and ff[0] not in bad_fonts: + key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys)) + val = font_usage_map.get(key) + if val is None: + val = font_usage_map[key] = {'text': set()} + for k in font_keys: + val[k] = font[k][0] if k == 'font-family' else font[k] + val['text'] |= chars + for ff in font.get('font-family', ()): + if ff and icu_lower(ff) not in bad_fonts: + font_spec.add(ff) + + font = get_font_dict(elem, resolve_property) + chars = frozenset(ord_string(text)) - exclude_chars + update_usage_for_embed(font, chars) + for rule in get_matching_rules(font_face_rules, font): + self.font_stats[rule['src']] |= chars + q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True) + if q is not None: + font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter') + text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter') + m = self.first_letter_pat.search(text.lstrip()) + if m is not None: + chars = frozenset(ord_string(m.group())) - exclude_chars + update_usage_for_embed(font, chars) + for rule in get_matching_rules(font_face_rules, font): + self.font_stats[rule['src']] |= chars + q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True) + if q is not None: + font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter') + text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line') + chars = frozenset(ord_string(text)) - exclude_chars + update_usage_for_embed(font, chars) + for rule in get_matching_rules(font_face_rules, font): + self.font_stats[rule['src']] |= chars + + def get_font_usage(self, container, spine_name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed): + root = container.parsed(spine_name) + for body in root.iterchildren(XHTML('body')): + for elem in body.iter('*'): + if elem.tag not in skip_tags: + self.get_element_font_usage( + elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, + self.font_usage_map[spine_name], self.font_spec_map[spine_name]) + + def collect_font_stats(self, container, do_embed=False): self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} - QTimer.singleShot(0, self.render_book) + processed_sheets = {} + for name, is_linear in container.spine_names: + self.font_rule_map[name] = font_face_rules = [] + resolve_property, resolve_pseudo_property, select = resolve_styles(container, name, sheet_callback=partial( + self.collect_font_face_rules, container, processed_sheets, name)) - if self.loop.exec_() == 1: - raise Exception('Failed to gather statistics from book, see log for details') + for rule in font_face_rules: + self.all_font_rules[rule['src']] = rule + if rule['src'] not in self.font_stats: + self.font_stats[rule['src']] = set() - def log_exception(self, *args): - orig = self.log.filter_level - try: - self.log.filter_level = self.log.DEBUG - self.log.exception(*args) - finally: - self.log.filter_level = orig - - def render_book(self): - try: - if not self.render_queue: - self.loop.exit() - else: - self.render_next() - except: - self.log_exception('Rendering failed') - self.loop.exit(1) - - def render_next(self): - item = unicode(self.render_queue.pop(0)) - self.current_item = item - load_html(item, self.view) - - def collect(self, ok): - if not ok: - self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item)) - self.loop.exit(1) - return - try: - self.page.load_js() - self.collect_font_stats() - except: - self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) - self.loop.exit(1) - return - - self.render_book() - - def href_to_name(self, href, warn_name): - if not href.startswith('file://'): - self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') - return None - src = href[len('file://'):] - if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): - src = src[1:] - src = src.replace('/', os.sep) - src = unquote(src) - name = self.container.abspath_to_name(src) - if not self.container.has_name(name): - self.log.warn('Missing resource', href, 'in', warn_name, - 'ignoring') - return None - return name - - def collect_font_stats(self): - self.page.evaljs('window.font_stats.get_font_face_rules()') - font_face_rules = self.page.bridge_value - if not isinstance(font_face_rules, list): - raise Exception('Unknown error occurred while reading font-face rules') - - # Weed out invalid font-face rules - rules = [] - import tinycss - parser = tinycss.make_full_parser() - for rule in font_face_rules: - ff = rule.get('font-family', None) - if not ff: - continue - style = self.parser.parseStyle('font-family:%s'%ff, validate=False) - ff = [x.value for x in - style.getProperty('font-family').propertyValue] - if not ff or ff[0] == 'inherit': - continue - rule['font-family'] = frozenset(icu_lower(f) for f in ff) - src = rule.get('src', None) - if not src: - continue - try: - tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value - except Exception: - self.log.warn('Failed to parse @font-family src: %s' % src) - continue - for token in tokens: - if token.type == 'URI': - uv = token.value - if uv: - sn = self.href_to_name(uv, '@font-face rule') - if sn is not None: - rule['src'] = sn - break - else: - self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src) - continue - normalize_font_properties(rule) - rule['width'] = widths[rule['font-stretch']] - rule['weight'] = int(rule['font-weight']) - rules.append(rule) - - if not rules and not self.do_embed: - return - - self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules - for rule in rules: - self.all_font_rules[rule['src']] = rule - - for rule in rules: - if rule['src'] not in self.font_stats: - self.font_stats[rule['src']] = set() - - self.page.evaljs('window.font_stats.get_font_usage()') - font_usage = self.page.bridge_value - if not isinstance(font_usage, list): - raise Exception('Unknown error occurred while reading font usage') - self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()') - pseudo_element_font_usage = self.page.bridge_value - if not isinstance(pseudo_element_font_usage, list): - raise Exception('Unknown error occurred while reading pseudo element font usage') - font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser) - exclude = {'\n', '\r', '\t'} - self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) - bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} - for font in font_usage: - text = set() - for t in font['text']: - tt = (font['text-transform'] or '').lower() - if tt != 'none': - if tt == 'uppercase': - t = icu_upper(t) - elif tt == 'lowercase': - t = icu_lower(t) - elif tt == 'capitalize': - m = self.capitalize_pat.search(t) - if m is not None: - t += icu_upper(m.group()) - fv = (font['font-variant'] or '').lower() - if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}: - t += icu_upper(t) # for renderers that try to fake small-caps by using small normal caps - text |= frozenset(t) - text.difference_update(exclude) - if not text: - continue - normalize_font_properties(font) - for rule in get_matching_rules(rules, font): - self.font_stats[rule['src']] |= text - if self.do_embed: - ff = [icu_lower(x) for x in font.get('font-family', [])] - if ff and ff[0] not in bad_fonts: - keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} - key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) - val = fu[key] - if not val: - val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) - val['text'] = set() - val['text'] |= text - self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) - - if self.do_embed: - self.page.evaljs('window.font_stats.get_font_families()') - font_families = self.page.bridge_value - if not isinstance(font_families, dict): - raise Exception('Unknown error occurred while reading font families') - self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() - for font_dict, text, pseudo in pseudo_element_font_usage: - font_families[font_dict['font-family']] = True - for raw in font_families.iterkeys(): - for x in parse_font_families(self.parser, raw): - if x.lower() not in bad_fonts: - fs.add(x) + self.font_usage_map[name] = {} + self.font_spec_map[name] = set() + self.get_font_usage(container, name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed) + self.font_stats = {k:{safe_chr(x) for x in v} for k, v in self.font_stats.iteritems()} + for fum in self.font_usage_map.itervalues(): + for v in fum.itervalues(): + v['text'] = {safe_chr(x) for x in v['text']} if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container diff --git a/src/calibre/ebooks/oeb/polish/tests/cascade.py b/src/calibre/ebooks/oeb/polish/tests/cascade.py index dd9167564e..298e7387c2 100644 --- a/src/calibre/ebooks/oeb/polish/tests/cascade.py +++ b/src/calibre/ebooks/oeb/polish/tests/cascade.py @@ -12,6 +12,7 @@ from calibre.constants import iswindows from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name +from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule from calibre.ebooks.oeb.polish.tests.base import BaseTest from calibre.utils.logging import Log, Stream @@ -45,6 +46,12 @@ class VirtualContainer(ContainerBase): self.parsed_cache[name] = self.files[name] return self.parsed_cache[name] + @property + def spine_names(self): + for name in sorted(self.mime_map): + if self.mime_map[name] in OEB_DOCS: + yield name, True + class CascadeTest(BaseTest): def test_iterrules(self): @@ -131,3 +138,65 @@ class CascadeTest(BaseTest): t('p', 'before', 'font-weight', 'bold') t('p', 'first-letter', 'content') t('p', 'first-letter', 'content', abort_on_missing=True) + + def test_font_stats(self): + embeds = '@font-face { font-family: X; src: url(X.otf) }\n@font-face { font-family: X; src: url(XB.otf); font-weight: bold }' + def get_stats(html, *fonts): + styles = [] + html = '{}'.format(html) + files = {'index.html':html, 'X.otf':b'xxx', 'XB.otf': b'xbxb'} + for font in fonts: + styles.append('@font-face {') + for k, v in font.iteritems(): + if k == 'src': + files[v] = b'xxx' + v = 'url(%s)' % v + styles.append('%s : %s;' % (k, v)) + styles.append('}\n') + html = '{}'.format(html) + files['styles.css'] = embeds + '\n'.join(styles) + c = VirtualContainer(files) + return StatsCollector(c, do_embed=True) + + def font(family, weight=None, style=None): + f = {} + if weight is not None: + f['font-weight'] = weight + if style is not None: + f['font-style'] = style + f = normalize_font_properties(f) + f['font-family'] = [family] + return f + + def font_rule(src, *args, **kw): + ans = font(*args, **kw) + ans['font-family'] = list(map(icu_lower, ans['font-family'])) + prepare_font_rule(ans) + ans['src'] = src + return ans + + def fkey(*args, **kw): + f = font(*args, **kw) + f['font-family'] = icu_lower(f['font-family'][0]) + return frozenset((k, v) for k, v in f.iteritems() if k in font_keys) + + def fu(text, *args, **kw): + key = fkey(*args, **kw) + val = font(*args, **kw) + val['text'] = set(text) + val['font-family'] = val['font-family'][0] + return key, val + + s = get_stats('

abcd\nefghi

u

') + # The normal font must include ghi as it will be used to simulate + # italic by most rendering engines when the italic font is missing + self.assertEqual(s.font_stats, {'XB.otf':set('def'), 'X.otf':set('abcghi')}) + self.assertEqual(s.font_spec_map, {'index.html':set('XU')}) + self.assertEqual(s.all_font_rules, {'X.otf':font_rule('X.otf', 'X'), 'XB.otf':font_rule('XB.otf', 'X', 'bold')}) + self.assertEqual(set(s.font_rule_map), {'index.html'}) + self.assertEqual(s.font_rule_map['index.html'], [font_rule('X.otf', 'X'), font_rule('XB.otf', 'X', 'bold')]) + self.assertEqual(set(s.font_usage_map), {'index.html'}) + self.assertEqual(s.font_usage_map['index.html'], dict([fu('abc', 'X'), fu('def', 'X', weight='bold'), fu('ghi', 'X', style='italic'), fu('u', 'U')])) + + s = get_stats('

abc

d\nef') + self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})