Stop using Qt WebKit to calculate font usage statistics

This commit is contained in:
Kovid Goyal 2016-04-13 10:54:16 +05:30
parent 9a2dc518ad
commit 33e23e50ab
3 changed files with 248 additions and 396 deletions

View File

@ -1,127 +0,0 @@
#!/usr/bin/env coffee
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
###
Copyright 2013, Kovid Goyal <kovid at kovidgoyal.net>
Released under the GPLv3 License
###
if window?.calibre_utils
log = window.calibre_utils.log
font_dict = (style, computed=false) ->
if computed
fams = []
family = style.getPropertyCSSValue('font-family')
if family.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE
fams.push(family.getStringValue())
else
for f in family
fams.push(f.getStringValue())
else
fams = style.getPropertyValue('font-family')
return {
'font-family':fams,
'font-weight':style.getPropertyValue('font-weight'),
'font-style':style.getPropertyValue('font-style'),
'font-stretch':style.getPropertyValue('font-stretch'),
'text-transform':style.getPropertyValue('text-transform'),
'font-variant':style.getPropertyValue('font-variant'),
}
font_usage = (node) ->
style = window.getComputedStyle(node, null)
ans = font_dict(style, true)
text = []
for child in node.childNodes
if child.nodeType == Node.TEXT_NODE
text.push(child.nodeValue)
ans['text'] = text
return ans
process_sheet = (sheet, font_faces) ->
for rule in sheet.cssRules
if rule.type == rule.FONT_FACE_RULE
process_font_face_rule(rule, font_faces)
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
process_sheet(rule.styleSheet, font_faces)
process_font_face_rule = (rule, font_faces) ->
fd = font_dict(rule.style)
fd['src'] = rule.style.getPropertyValue('src')
font_faces.push(fd)
fl_pat = /:{1,2}(first-letter|first-line)/i
process_sheet_for_pseudo = (sheet, rules) ->
for rule in sheet.cssRules
if rule.type == rule.STYLE_RULE
st = rule.selectorText
m = fl_pat.exec(st)
if m
pseudo = m[1].toLowerCase()
ff = rule.style.getPropertyValue('font-family')
if ff
process_style_rule(st, rule.style, rules, pseudo)
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
process_sheet_for_pseudo(rule.styleSheet, rules)
process_style_rule = (selector_text, style, rules, pseudo) ->
selector_text = selector_text.replace(fl_pat, '')
fd = font_dict(style)
for element in document.querySelectorAll(selector_text)
text = element.innerText
if text
rules.push([fd, text, pseudo])
class FontStats
# This class is a namespace to expose functions via the
# window.font_stats object.
constructor: () ->
if not this instanceof arguments.callee
throw new Error('FontStats constructor called as function')
get_font_face_rules: () ->
font_faces = []
for sheet in document.styleSheets
process_sheet(sheet, font_faces)
py_bridge.value = font_faces
get_font_usage: () ->
ans = []
busage = font_usage(document.body)
if busage != null
ans.push(busage)
for node in document.body.getElementsByTagName('*')
usage = font_usage(node)
if usage != null
ans.push(usage)
py_bridge.value = ans
get_pseudo_element_font_usage: () ->
ans = []
for sheet in document.styleSheets
process_sheet_for_pseudo(sheet, ans)
py_bridge.value = ans
get_font_families: () ->
ans = {}
for node in document.getElementsByTagName('*')
rules = document.defaultView.getMatchedCSSRules(node, '')
if rules
for rule in rules
style = rule.style
family = style.getPropertyValue('font-family')
if family
ans[family] = true
if node.getAttribute('style')
family = node.style.getPropertyValue('font-family')
if family
ans[family] = true
py_bridge.value = ans
if window?
window.font_stats = new FontStats()

View File

@ -7,19 +7,15 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import json, sys, os, logging
from urllib import unquote
from collections import defaultdict
import sys
from functools import partial
from lxml.etree import tostring
import regex
from cssutils import CSSParser
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
pyqtSlot)
from PyQt5.QtWebKitWidgets import QWebPage, QWebView
from calibre.constants import iswindows
from calibre.ebooks.oeb.display.webview import load_html
from calibre.gui2 import must_use_qt
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, iterdeclaration
from calibre.utils.icu import ord_string, safe_chr
def normalize_font_properties(font):
w = font.get('font-weight', None)
@ -102,286 +98,200 @@ def get_matching_rules(rules, font):
return m
return []
def parse_font_families(parser, raw):
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x:
yield x
def get_css_text(elem, resolve_pseudo_property, which='before'):
text = resolve_pseudo_property(elem, which, 'content')[0].value
if text and len(text) > 2 and text[0] == '"' and text[-1] == '"':
return text[1:-1]
return ''
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
caps_variants = {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}
def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize_pat, for_pseudo=None):
ans = []
for font_dict, text, pseudo in pseudo_element_font_usage:
text = text.strip()
if pseudo == 'first-letter':
prefix = first_letter_pat.match(text)
if prefix is not None:
text = prefix + text[len(prefix):].lstrip()[:1]
else:
text = text[:1]
if text:
font = font_dict.copy()
font['text'] = text
font['font-family'] = list(parse_font_families(parser, font['font-family']))
ans.append(font)
before = get_css_text(elem, resolve_pseudo_property)
if before:
ans.append(before)
if for_pseudo is not None:
ans.append(tostring(elem, method='text', encoding=unicode, with_tail=False))
else:
if elem.text:
ans.append(elem.text)
for child in elem.iterchildren():
t = getattr(child, 'tail', '')
if t:
ans.append(t)
after = get_css_text(elem, resolve_pseudo_property, 'after')
if after:
ans.append(after)
ans = ''.join(ans)
if for_pseudo is not None:
tt = resolve_pseudo_property(elem, for_pseudo, 'text-transform')[0].value
fv = resolve_pseudo_property(elem, for_pseudo, 'font-variant')[0].value
else:
tt = resolve_property(elem, 'text-transform')[0].value
fv = resolve_property(elem, 'font-variant')[0].value
if fv in caps_variants:
ans += icu_upper(ans)
if tt != 'none':
if tt == 'uppercase':
ans = icu_upper(ans)
elif tt == 'lowercase':
ans = icu_lower(ans)
elif tt == 'capitalize':
m = capitalize_pat.search(ans)
if m is not None:
ans += icu_upper(m.group())
return ans
class Page(QWebPage): # {{{
def get_font_dict(elem, resolve_property, pseudo=None):
ans = {}
if pseudo is None:
ff = resolve_property(elem, 'font-family')
else:
ff = resolve_property(elem, pseudo, 'font-family')
ans['font-family'] = tuple(x.value for x in ff)
for p in 'weight', 'style', 'stretch':
p = 'font-' + p
rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p)
ans[p] = type('')(rp[0].value)
normalize_font_properties(ans)
return ans
def __init__(self, log):
self.log = log
QWebPage.__init__(self)
self.js = None
self.evaljs = self.mainFrame().evaluateJavaScript
self.bridge_value = None
nam = self.networkAccessManager()
nam.setNetworkAccessible(nam.NotAccessible)
self.longjs_counter = 0
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
exclude_chars = frozenset(ord_string('\n\r\t'))
skip_tags = {XHTML(x) for x in 'script style title meta link'.split()}
font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
def javaScriptConsoleMessage(self, msg, lineno, msgid):
self.log(u'JS:', unicode(msg))
def javaScriptAlert(self, frame, msg):
self.log(unicode(msg))
@pyqtSlot(result=bool)
def shouldInterruptJavaScript(self):
if self.longjs_counter < 5:
self.log('Long running javascript, letting it proceed')
self.longjs_counter += 1
return False
self.log.warn('Long running javascript, aborting it')
return True
def _pass_json_value_getter(self):
val = json.dumps(self.bridge_value)
return val
def _pass_json_value_setter(self, value):
# Qt WebKit in Qt 4.x adds extra null bytes to the end of the string
# if the JSON contains non-BMP characters
self.bridge_value = json.loads(unicode(value).rstrip('\0'))
_pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter,
fset=_pass_json_value_setter)
def load_js(self):
self.longjs_counter = 0
if self.js is None:
from calibre.utils.resources import compiled_coffeescript
self.js = compiled_coffeescript('ebooks.oeb.display.utils')
self.js += compiled_coffeescript('ebooks.oeb.polish.font_stats')
self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
self.evaljs(self.js)
self.evaljs('''
Object.defineProperty(py_bridge, 'value', {
get : function() { return JSON.parse(this._pass_json_value); },
set : function(val) { this._pass_json_value = JSON.stringify(val); }
});
''')
# }}}
def prepare_font_rule(cssdict):
cssdict['font-family'] = frozenset(cssdict['font-family'][:1])
cssdict['width'] = widths[cssdict['font-stretch']]
cssdict['weight'] = int(cssdict['font-weight'])
class StatsCollector(object):
first_letter_pat = capitalize_pat = None
def __init__(self, container, do_embed=False):
self.container = container
self.log = self.logger = container.log
self.do_embed = do_embed
must_use_qt()
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
if self.first_letter_pat is None:
StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile(
r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
StatsCollector.capitalize_pat = self.capitalize_pat = regex.compile(
r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
self.loop = QEventLoop()
self.view = QWebView()
self.page = Page(self.log)
self.view.setPage(self.page)
self.page.setViewportSize(QSize(1200, 1600))
self.collect_font_stats(container, do_embed)
self.view.loadFinished.connect(self.collect,
type=Qt.QueuedConnection)
def collect_font_face_rules(self, container, processed, spine_name, sheet, sheet_name):
if sheet_name in processed:
sheet_rules = processed[sheet_name]
else:
sheet_rules = []
if sheet_name != spine_name:
processed[sheet_name] = sheet_rules
for rule, base_name, rule_index in iterrules(container, sheet_name, rules=sheet, rule_type='FONT_FACE_RULE'):
cssdict = {}
for prop in iterdeclaration(rule.style):
if prop.name == 'font-family':
cssdict['font-family'] = [icu_lower(x.value) for x in prop.propertyValue]
elif prop.name.startswith('font-'):
cssdict[prop.name] = prop.propertyValue[0].value
elif prop.name == 'src':
for val in prop.propertyValue:
x = val.value
fname = container.href_to_name(x, sheet_name)
if container.has_name(fname):
cssdict['src'] = fname
break
else:
container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % prop.propertyValue.cssText)
if 'src' not in cssdict:
continue
ff = cssdict.get('font-family')
if not ff or ff[0] in bad_fonts:
continue
normalize_font_properties(cssdict)
prepare_font_rule(cssdict)
sheet_rules.append(cssdict)
self.font_rule_map[spine_name].extend(sheet_rules)
self.render_queue = list(container.spine_items)
def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec):
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat)
if not text:
return
def update_usage_for_embed(font, chars):
if not do_embed:
return
ff = [icu_lower(x) for x in font.get('font-family', ())]
if ff and ff[0] not in bad_fonts:
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys))
val = font_usage_map.get(key)
if val is None:
val = font_usage_map[key] = {'text': set()}
for k in font_keys:
val[k] = font[k][0] if k == 'font-family' else font[k]
val['text'] |= chars
for ff in font.get('font-family', ()):
if ff and icu_lower(ff) not in bad_fonts:
font_spec.add(ff)
font = get_font_dict(elem, resolve_property)
chars = frozenset(ord_string(text)) - exclude_chars
update_usage_for_embed(font, chars)
for rule in get_matching_rules(font_face_rules, font):
self.font_stats[rule['src']] |= chars
q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True)
if q is not None:
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter')
m = self.first_letter_pat.search(text.lstrip())
if m is not None:
chars = frozenset(ord_string(m.group())) - exclude_chars
update_usage_for_embed(font, chars)
for rule in get_matching_rules(font_face_rules, font):
self.font_stats[rule['src']] |= chars
q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True)
if q is not None:
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line')
chars = frozenset(ord_string(text)) - exclude_chars
update_usage_for_embed(font, chars)
for rule in get_matching_rules(font_face_rules, font):
self.font_stats[rule['src']] |= chars
def get_font_usage(self, container, spine_name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed):
root = container.parsed(spine_name)
for body in root.iterchildren(XHTML('body')):
for elem in body.iter('*'):
if elem.tag not in skip_tags:
self.get_element_font_usage(
elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed,
self.font_usage_map[spine_name], self.font_spec_map[spine_name])
def collect_font_stats(self, container, do_embed=False):
self.font_stats = {}
self.font_usage_map = {}
self.font_spec_map = {}
self.font_rule_map = {}
self.all_font_rules = {}
QTimer.singleShot(0, self.render_book)
processed_sheets = {}
for name, is_linear in container.spine_names:
self.font_rule_map[name] = font_face_rules = []
resolve_property, resolve_pseudo_property, select = resolve_styles(container, name, sheet_callback=partial(
self.collect_font_face_rules, container, processed_sheets, name))
if self.loop.exec_() == 1:
raise Exception('Failed to gather statistics from book, see log for details')
for rule in font_face_rules:
self.all_font_rules[rule['src']] = rule
if rule['src'] not in self.font_stats:
self.font_stats[rule['src']] = set()
def log_exception(self, *args):
orig = self.log.filter_level
try:
self.log.filter_level = self.log.DEBUG
self.log.exception(*args)
finally:
self.log.filter_level = orig
def render_book(self):
try:
if not self.render_queue:
self.loop.exit()
else:
self.render_next()
except:
self.log_exception('Rendering failed')
self.loop.exit(1)
def render_next(self):
item = unicode(self.render_queue.pop(0))
self.current_item = item
load_html(item, self.view)
def collect(self, ok):
if not ok:
self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
self.loop.exit(1)
return
try:
self.page.load_js()
self.collect_font_stats()
except:
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
self.loop.exit(1)
return
self.render_book()
def href_to_name(self, href, warn_name):
if not href.startswith('file://'):
self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
return None
src = href[len('file://'):]
if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
src = src[1:]
src = src.replace('/', os.sep)
src = unquote(src)
name = self.container.abspath_to_name(src)
if not self.container.has_name(name):
self.log.warn('Missing resource', href, 'in', warn_name,
'ignoring')
return None
return name
def collect_font_stats(self):
self.page.evaljs('window.font_stats.get_font_face_rules()')
font_face_rules = self.page.bridge_value
if not isinstance(font_face_rules, list):
raise Exception('Unknown error occurred while reading font-face rules')
# Weed out invalid font-face rules
rules = []
import tinycss
parser = tinycss.make_full_parser()
for rule in font_face_rules:
ff = rule.get('font-family', None)
if not ff:
continue
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
ff = [x.value for x in
style.getProperty('font-family').propertyValue]
if not ff or ff[0] == 'inherit':
continue
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
src = rule.get('src', None)
if not src:
continue
try:
tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
except Exception:
self.log.warn('Failed to parse @font-family src: %s' % src)
continue
for token in tokens:
if token.type == 'URI':
uv = token.value
if uv:
sn = self.href_to_name(uv, '@font-face rule')
if sn is not None:
rule['src'] = sn
break
else:
self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
continue
normalize_font_properties(rule)
rule['width'] = widths[rule['font-stretch']]
rule['weight'] = int(rule['font-weight'])
rules.append(rule)
if not rules and not self.do_embed:
return
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
for rule in rules:
self.all_font_rules[rule['src']] = rule
for rule in rules:
if rule['src'] not in self.font_stats:
self.font_stats[rule['src']] = set()
self.page.evaljs('window.font_stats.get_font_usage()')
font_usage = self.page.bridge_value
if not isinstance(font_usage, list):
raise Exception('Unknown error occurred while reading font usage')
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
pseudo_element_font_usage = self.page.bridge_value
if not isinstance(pseudo_element_font_usage, list):
raise Exception('Unknown error occurred while reading pseudo element font usage')
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
exclude = {'\n', '\r', '\t'}
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
for font in font_usage:
text = set()
for t in font['text']:
tt = (font['text-transform'] or '').lower()
if tt != 'none':
if tt == 'uppercase':
t = icu_upper(t)
elif tt == 'lowercase':
t = icu_lower(t)
elif tt == 'capitalize':
m = self.capitalize_pat.search(t)
if m is not None:
t += icu_upper(m.group())
fv = (font['font-variant'] or '').lower()
if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}:
t += icu_upper(t) # for renderers that try to fake small-caps by using small normal caps
text |= frozenset(t)
text.difference_update(exclude)
if not text:
continue
normalize_font_properties(font)
for rule in get_matching_rules(rules, font):
self.font_stats[rule['src']] |= text
if self.do_embed:
ff = [icu_lower(x) for x in font.get('font-family', [])]
if ff and ff[0] not in bad_fonts:
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
val = fu[key]
if not val:
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
val['text'] = set()
val['text'] |= text
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
if self.do_embed:
self.page.evaljs('window.font_stats.get_font_families()')
font_families = self.page.bridge_value
if not isinstance(font_families, dict):
raise Exception('Unknown error occurred while reading font families')
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
for font_dict, text, pseudo in pseudo_element_font_usage:
font_families[font_dict['font-family']] = True
for raw in font_families.iterkeys():
for x in parse_font_families(self.parser, raw):
if x.lower() not in bad_fonts:
fs.add(x)
self.font_usage_map[name] = {}
self.font_spec_map[name] = set()
self.get_font_usage(container, name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed)
self.font_stats = {k:{safe_chr(x) for x in v} for k, v in self.font_stats.iteritems()}
for fum in self.font_usage_map.itervalues():
for v in fum.itervalues():
v['text'] = {safe_chr(x) for x in v['text']}
if __name__ == '__main__':
from calibre.ebooks.oeb.polish.container import get_container

View File

@ -12,6 +12,7 @@ from calibre.constants import iswindows
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule
from calibre.ebooks.oeb.polish.tests.base import BaseTest
from calibre.utils.logging import Log, Stream
@ -45,6 +46,12 @@ class VirtualContainer(ContainerBase):
self.parsed_cache[name] = self.files[name]
return self.parsed_cache[name]
@property
def spine_names(self):
for name in sorted(self.mime_map):
if self.mime_map[name] in OEB_DOCS:
yield name, True
class CascadeTest(BaseTest):
def test_iterrules(self):
@ -131,3 +138,65 @@ class CascadeTest(BaseTest):
t('p', 'before', 'font-weight', 'bold')
t('p', 'first-letter', 'content')
t('p', 'first-letter', 'content', abort_on_missing=True)
def test_font_stats(self):
embeds = '@font-face { font-family: X; src: url(X.otf) }\n@font-face { font-family: X; src: url(XB.otf); font-weight: bold }'
def get_stats(html, *fonts):
styles = []
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
files = {'index.html':html, 'X.otf':b'xxx', 'XB.otf': b'xbxb'}
for font in fonts:
styles.append('@font-face {')
for k, v in font.iteritems():
if k == 'src':
files[v] = b'xxx'
v = 'url(%s)' % v
styles.append('%s : %s;' % (k, v))
styles.append('}\n')
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
files['styles.css'] = embeds + '\n'.join(styles)
c = VirtualContainer(files)
return StatsCollector(c, do_embed=True)
def font(family, weight=None, style=None):
f = {}
if weight is not None:
f['font-weight'] = weight
if style is not None:
f['font-style'] = style
f = normalize_font_properties(f)
f['font-family'] = [family]
return f
def font_rule(src, *args, **kw):
ans = font(*args, **kw)
ans['font-family'] = list(map(icu_lower, ans['font-family']))
prepare_font_rule(ans)
ans['src'] = src
return ans
def fkey(*args, **kw):
f = font(*args, **kw)
f['font-family'] = icu_lower(f['font-family'][0])
return frozenset((k, v) for k, v in f.iteritems() if k in font_keys)
def fu(text, *args, **kw):
key = fkey(*args, **kw)
val = font(*args, **kw)
val['text'] = set(text)
val['font-family'] = val['font-family'][0]
return key, val
s = get_stats('<p style="font-family: X">abc<b>d\nef</b><i>ghi</i></p><p style="font-family: U">u</p>')
# The normal font must include ghi as it will be used to simulate
# italic by most rendering engines when the italic font is missing
self.assertEqual(s.font_stats, {'XB.otf':set('def'), 'X.otf':set('abcghi')})
self.assertEqual(s.font_spec_map, {'index.html':set('XU')})
self.assertEqual(s.all_font_rules, {'X.otf':font_rule('X.otf', 'X'), 'XB.otf':font_rule('XB.otf', 'X', 'bold')})
self.assertEqual(set(s.font_rule_map), {'index.html'})
self.assertEqual(s.font_rule_map['index.html'], [font_rule('X.otf', 'X'), font_rule('XB.otf', 'X', 'bold')])
self.assertEqual(set(s.font_usage_map), {'index.html'})
self.assertEqual(s.font_usage_map['index.html'], dict([fu('abc', 'X'), fu('def', 'X', weight='bold'), fu('ghi', 'X', style='italic'), fu('u', 'U')]))
s = get_stats('<p style="font-family: X; text-transform:uppercase">abc</p><b style="font-family: X; font-variant: small-caps">d\nef</b>')
self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})