mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Stop using Qt WebKit to calculate font usage statistics
This commit is contained in:
parent
9a2dc518ad
commit
33e23e50ab
@ -1,127 +0,0 @@
|
|||||||
#!/usr/bin/env coffee
|
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
|
|
||||||
###
|
|
||||||
Copyright 2013, Kovid Goyal <kovid at kovidgoyal.net>
|
|
||||||
Released under the GPLv3 License
|
|
||||||
###
|
|
||||||
|
|
||||||
|
|
||||||
if window?.calibre_utils
|
|
||||||
log = window.calibre_utils.log
|
|
||||||
|
|
||||||
font_dict = (style, computed=false) ->
|
|
||||||
if computed
|
|
||||||
fams = []
|
|
||||||
family = style.getPropertyCSSValue('font-family')
|
|
||||||
if family.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE
|
|
||||||
fams.push(family.getStringValue())
|
|
||||||
else
|
|
||||||
for f in family
|
|
||||||
fams.push(f.getStringValue())
|
|
||||||
else
|
|
||||||
fams = style.getPropertyValue('font-family')
|
|
||||||
return {
|
|
||||||
'font-family':fams,
|
|
||||||
'font-weight':style.getPropertyValue('font-weight'),
|
|
||||||
'font-style':style.getPropertyValue('font-style'),
|
|
||||||
'font-stretch':style.getPropertyValue('font-stretch'),
|
|
||||||
'text-transform':style.getPropertyValue('text-transform'),
|
|
||||||
'font-variant':style.getPropertyValue('font-variant'),
|
|
||||||
}
|
|
||||||
|
|
||||||
font_usage = (node) ->
|
|
||||||
style = window.getComputedStyle(node, null)
|
|
||||||
ans = font_dict(style, true)
|
|
||||||
text = []
|
|
||||||
for child in node.childNodes
|
|
||||||
if child.nodeType == Node.TEXT_NODE
|
|
||||||
text.push(child.nodeValue)
|
|
||||||
ans['text'] = text
|
|
||||||
return ans
|
|
||||||
|
|
||||||
process_sheet = (sheet, font_faces) ->
|
|
||||||
for rule in sheet.cssRules
|
|
||||||
if rule.type == rule.FONT_FACE_RULE
|
|
||||||
process_font_face_rule(rule, font_faces)
|
|
||||||
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
|
||||||
process_sheet(rule.styleSheet, font_faces)
|
|
||||||
|
|
||||||
process_font_face_rule = (rule, font_faces) ->
|
|
||||||
fd = font_dict(rule.style)
|
|
||||||
fd['src'] = rule.style.getPropertyValue('src')
|
|
||||||
font_faces.push(fd)
|
|
||||||
|
|
||||||
fl_pat = /:{1,2}(first-letter|first-line)/i
|
|
||||||
|
|
||||||
process_sheet_for_pseudo = (sheet, rules) ->
|
|
||||||
for rule in sheet.cssRules
|
|
||||||
if rule.type == rule.STYLE_RULE
|
|
||||||
st = rule.selectorText
|
|
||||||
m = fl_pat.exec(st)
|
|
||||||
if m
|
|
||||||
pseudo = m[1].toLowerCase()
|
|
||||||
ff = rule.style.getPropertyValue('font-family')
|
|
||||||
if ff
|
|
||||||
process_style_rule(st, rule.style, rules, pseudo)
|
|
||||||
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
|
||||||
process_sheet_for_pseudo(rule.styleSheet, rules)
|
|
||||||
|
|
||||||
process_style_rule = (selector_text, style, rules, pseudo) ->
|
|
||||||
selector_text = selector_text.replace(fl_pat, '')
|
|
||||||
fd = font_dict(style)
|
|
||||||
for element in document.querySelectorAll(selector_text)
|
|
||||||
text = element.innerText
|
|
||||||
if text
|
|
||||||
rules.push([fd, text, pseudo])
|
|
||||||
|
|
||||||
class FontStats
|
|
||||||
# This class is a namespace to expose functions via the
|
|
||||||
# window.font_stats object.
|
|
||||||
|
|
||||||
constructor: () ->
|
|
||||||
if not this instanceof arguments.callee
|
|
||||||
throw new Error('FontStats constructor called as function')
|
|
||||||
|
|
||||||
get_font_face_rules: () ->
|
|
||||||
font_faces = []
|
|
||||||
for sheet in document.styleSheets
|
|
||||||
process_sheet(sheet, font_faces)
|
|
||||||
py_bridge.value = font_faces
|
|
||||||
|
|
||||||
get_font_usage: () ->
|
|
||||||
ans = []
|
|
||||||
busage = font_usage(document.body)
|
|
||||||
if busage != null
|
|
||||||
ans.push(busage)
|
|
||||||
for node in document.body.getElementsByTagName('*')
|
|
||||||
usage = font_usage(node)
|
|
||||||
if usage != null
|
|
||||||
ans.push(usage)
|
|
||||||
py_bridge.value = ans
|
|
||||||
|
|
||||||
get_pseudo_element_font_usage: () ->
|
|
||||||
ans = []
|
|
||||||
for sheet in document.styleSheets
|
|
||||||
process_sheet_for_pseudo(sheet, ans)
|
|
||||||
py_bridge.value = ans
|
|
||||||
|
|
||||||
get_font_families: () ->
|
|
||||||
ans = {}
|
|
||||||
for node in document.getElementsByTagName('*')
|
|
||||||
rules = document.defaultView.getMatchedCSSRules(node, '')
|
|
||||||
if rules
|
|
||||||
for rule in rules
|
|
||||||
style = rule.style
|
|
||||||
family = style.getPropertyValue('font-family')
|
|
||||||
if family
|
|
||||||
ans[family] = true
|
|
||||||
if node.getAttribute('style')
|
|
||||||
family = node.style.getPropertyValue('font-family')
|
|
||||||
if family
|
|
||||||
ans[family] = true
|
|
||||||
py_bridge.value = ans
|
|
||||||
|
|
||||||
if window?
|
|
||||||
window.font_stats = new FontStats()
|
|
||||||
|
|
@ -7,19 +7,15 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import json, sys, os, logging
|
import sys
|
||||||
from urllib import unquote
|
from functools import partial
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
|
from lxml.etree import tostring
|
||||||
import regex
|
import regex
|
||||||
from cssutils import CSSParser
|
|
||||||
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
|
|
||||||
pyqtSlot)
|
|
||||||
from PyQt5.QtWebKitWidgets import QWebPage, QWebView
|
|
||||||
|
|
||||||
from calibre.constants import iswindows
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
from calibre.ebooks.oeb.display.webview import load_html
|
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, iterdeclaration
|
||||||
from calibre.gui2 import must_use_qt
|
from calibre.utils.icu import ord_string, safe_chr
|
||||||
|
|
||||||
def normalize_font_properties(font):
|
def normalize_font_properties(font):
|
||||||
w = font.get('font-weight', None)
|
w = font.get('font-weight', None)
|
||||||
@ -102,286 +98,200 @@ def get_matching_rules(rules, font):
|
|||||||
return m
|
return m
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def parse_font_families(parser, raw):
|
def get_css_text(elem, resolve_pseudo_property, which='before'):
|
||||||
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
text = resolve_pseudo_property(elem, which, 'content')[0].value
|
||||||
for x in style.propertyValue:
|
if text and len(text) > 2 and text[0] == '"' and text[-1] == '"':
|
||||||
x = x.value
|
return text[1:-1]
|
||||||
if x:
|
return ''
|
||||||
yield x
|
|
||||||
|
|
||||||
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
|
caps_variants = {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}
|
||||||
|
|
||||||
|
def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize_pat, for_pseudo=None):
|
||||||
ans = []
|
ans = []
|
||||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
before = get_css_text(elem, resolve_pseudo_property)
|
||||||
text = text.strip()
|
if before:
|
||||||
if pseudo == 'first-letter':
|
ans.append(before)
|
||||||
prefix = first_letter_pat.match(text)
|
if for_pseudo is not None:
|
||||||
if prefix is not None:
|
ans.append(tostring(elem, method='text', encoding=unicode, with_tail=False))
|
||||||
text = prefix + text[len(prefix):].lstrip()[:1]
|
else:
|
||||||
else:
|
if elem.text:
|
||||||
text = text[:1]
|
ans.append(elem.text)
|
||||||
if text:
|
for child in elem.iterchildren():
|
||||||
font = font_dict.copy()
|
t = getattr(child, 'tail', '')
|
||||||
font['text'] = text
|
if t:
|
||||||
font['font-family'] = list(parse_font_families(parser, font['font-family']))
|
ans.append(t)
|
||||||
ans.append(font)
|
after = get_css_text(elem, resolve_pseudo_property, 'after')
|
||||||
|
if after:
|
||||||
|
ans.append(after)
|
||||||
|
ans = ''.join(ans)
|
||||||
|
if for_pseudo is not None:
|
||||||
|
tt = resolve_pseudo_property(elem, for_pseudo, 'text-transform')[0].value
|
||||||
|
fv = resolve_pseudo_property(elem, for_pseudo, 'font-variant')[0].value
|
||||||
|
else:
|
||||||
|
tt = resolve_property(elem, 'text-transform')[0].value
|
||||||
|
fv = resolve_property(elem, 'font-variant')[0].value
|
||||||
|
if fv in caps_variants:
|
||||||
|
ans += icu_upper(ans)
|
||||||
|
if tt != 'none':
|
||||||
|
if tt == 'uppercase':
|
||||||
|
ans = icu_upper(ans)
|
||||||
|
elif tt == 'lowercase':
|
||||||
|
ans = icu_lower(ans)
|
||||||
|
elif tt == 'capitalize':
|
||||||
|
m = capitalize_pat.search(ans)
|
||||||
|
if m is not None:
|
||||||
|
ans += icu_upper(m.group())
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
class Page(QWebPage): # {{{
|
def get_font_dict(elem, resolve_property, pseudo=None):
|
||||||
|
ans = {}
|
||||||
|
if pseudo is None:
|
||||||
|
ff = resolve_property(elem, 'font-family')
|
||||||
|
else:
|
||||||
|
ff = resolve_property(elem, pseudo, 'font-family')
|
||||||
|
ans['font-family'] = tuple(x.value for x in ff)
|
||||||
|
for p in 'weight', 'style', 'stretch':
|
||||||
|
p = 'font-' + p
|
||||||
|
rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p)
|
||||||
|
ans[p] = type('')(rp[0].value)
|
||||||
|
normalize_font_properties(ans)
|
||||||
|
return ans
|
||||||
|
|
||||||
def __init__(self, log):
|
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||||
self.log = log
|
exclude_chars = frozenset(ord_string('\n\r\t'))
|
||||||
QWebPage.__init__(self)
|
skip_tags = {XHTML(x) for x in 'script style title meta link'.split()}
|
||||||
self.js = None
|
font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
|
||||||
self.evaljs = self.mainFrame().evaluateJavaScript
|
|
||||||
self.bridge_value = None
|
|
||||||
nam = self.networkAccessManager()
|
|
||||||
nam.setNetworkAccessible(nam.NotAccessible)
|
|
||||||
self.longjs_counter = 0
|
|
||||||
|
|
||||||
def javaScriptConsoleMessage(self, msg, lineno, msgid):
|
def prepare_font_rule(cssdict):
|
||||||
self.log(u'JS:', unicode(msg))
|
cssdict['font-family'] = frozenset(cssdict['font-family'][:1])
|
||||||
|
cssdict['width'] = widths[cssdict['font-stretch']]
|
||||||
def javaScriptAlert(self, frame, msg):
|
cssdict['weight'] = int(cssdict['font-weight'])
|
||||||
self.log(unicode(msg))
|
|
||||||
|
|
||||||
@pyqtSlot(result=bool)
|
|
||||||
def shouldInterruptJavaScript(self):
|
|
||||||
if self.longjs_counter < 5:
|
|
||||||
self.log('Long running javascript, letting it proceed')
|
|
||||||
self.longjs_counter += 1
|
|
||||||
return False
|
|
||||||
self.log.warn('Long running javascript, aborting it')
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _pass_json_value_getter(self):
|
|
||||||
val = json.dumps(self.bridge_value)
|
|
||||||
return val
|
|
||||||
|
|
||||||
def _pass_json_value_setter(self, value):
|
|
||||||
# Qt WebKit in Qt 4.x adds extra null bytes to the end of the string
|
|
||||||
# if the JSON contains non-BMP characters
|
|
||||||
self.bridge_value = json.loads(unicode(value).rstrip('\0'))
|
|
||||||
|
|
||||||
_pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter,
|
|
||||||
fset=_pass_json_value_setter)
|
|
||||||
|
|
||||||
def load_js(self):
|
|
||||||
self.longjs_counter = 0
|
|
||||||
if self.js is None:
|
|
||||||
from calibre.utils.resources import compiled_coffeescript
|
|
||||||
self.js = compiled_coffeescript('ebooks.oeb.display.utils')
|
|
||||||
self.js += compiled_coffeescript('ebooks.oeb.polish.font_stats')
|
|
||||||
self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
|
|
||||||
self.evaljs(self.js)
|
|
||||||
self.evaljs('''
|
|
||||||
Object.defineProperty(py_bridge, 'value', {
|
|
||||||
get : function() { return JSON.parse(this._pass_json_value); },
|
|
||||||
set : function(val) { this._pass_json_value = JSON.stringify(val); }
|
|
||||||
});
|
|
||||||
''')
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class StatsCollector(object):
|
class StatsCollector(object):
|
||||||
|
|
||||||
|
first_letter_pat = capitalize_pat = None
|
||||||
|
|
||||||
def __init__(self, container, do_embed=False):
|
def __init__(self, container, do_embed=False):
|
||||||
self.container = container
|
if self.first_letter_pat is None:
|
||||||
self.log = self.logger = container.log
|
StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile(
|
||||||
self.do_embed = do_embed
|
r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
||||||
must_use_qt()
|
StatsCollector.capitalize_pat = self.capitalize_pat = regex.compile(
|
||||||
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
|
||||||
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
|
||||||
self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
|
|
||||||
|
|
||||||
self.loop = QEventLoop()
|
self.collect_font_stats(container, do_embed)
|
||||||
self.view = QWebView()
|
|
||||||
self.page = Page(self.log)
|
|
||||||
self.view.setPage(self.page)
|
|
||||||
self.page.setViewportSize(QSize(1200, 1600))
|
|
||||||
|
|
||||||
self.view.loadFinished.connect(self.collect,
|
def collect_font_face_rules(self, container, processed, spine_name, sheet, sheet_name):
|
||||||
type=Qt.QueuedConnection)
|
if sheet_name in processed:
|
||||||
|
sheet_rules = processed[sheet_name]
|
||||||
|
else:
|
||||||
|
sheet_rules = []
|
||||||
|
if sheet_name != spine_name:
|
||||||
|
processed[sheet_name] = sheet_rules
|
||||||
|
for rule, base_name, rule_index in iterrules(container, sheet_name, rules=sheet, rule_type='FONT_FACE_RULE'):
|
||||||
|
cssdict = {}
|
||||||
|
for prop in iterdeclaration(rule.style):
|
||||||
|
if prop.name == 'font-family':
|
||||||
|
cssdict['font-family'] = [icu_lower(x.value) for x in prop.propertyValue]
|
||||||
|
elif prop.name.startswith('font-'):
|
||||||
|
cssdict[prop.name] = prop.propertyValue[0].value
|
||||||
|
elif prop.name == 'src':
|
||||||
|
for val in prop.propertyValue:
|
||||||
|
x = val.value
|
||||||
|
fname = container.href_to_name(x, sheet_name)
|
||||||
|
if container.has_name(fname):
|
||||||
|
cssdict['src'] = fname
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % prop.propertyValue.cssText)
|
||||||
|
if 'src' not in cssdict:
|
||||||
|
continue
|
||||||
|
ff = cssdict.get('font-family')
|
||||||
|
if not ff or ff[0] in bad_fonts:
|
||||||
|
continue
|
||||||
|
normalize_font_properties(cssdict)
|
||||||
|
prepare_font_rule(cssdict)
|
||||||
|
sheet_rules.append(cssdict)
|
||||||
|
self.font_rule_map[spine_name].extend(sheet_rules)
|
||||||
|
|
||||||
self.render_queue = list(container.spine_items)
|
def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec):
|
||||||
|
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat)
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
|
||||||
|
def update_usage_for_embed(font, chars):
|
||||||
|
if not do_embed:
|
||||||
|
return
|
||||||
|
ff = [icu_lower(x) for x in font.get('font-family', ())]
|
||||||
|
if ff and ff[0] not in bad_fonts:
|
||||||
|
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys))
|
||||||
|
val = font_usage_map.get(key)
|
||||||
|
if val is None:
|
||||||
|
val = font_usage_map[key] = {'text': set()}
|
||||||
|
for k in font_keys:
|
||||||
|
val[k] = font[k][0] if k == 'font-family' else font[k]
|
||||||
|
val['text'] |= chars
|
||||||
|
for ff in font.get('font-family', ()):
|
||||||
|
if ff and icu_lower(ff) not in bad_fonts:
|
||||||
|
font_spec.add(ff)
|
||||||
|
|
||||||
|
font = get_font_dict(elem, resolve_property)
|
||||||
|
chars = frozenset(ord_string(text)) - exclude_chars
|
||||||
|
update_usage_for_embed(font, chars)
|
||||||
|
for rule in get_matching_rules(font_face_rules, font):
|
||||||
|
self.font_stats[rule['src']] |= chars
|
||||||
|
q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True)
|
||||||
|
if q is not None:
|
||||||
|
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
|
||||||
|
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter')
|
||||||
|
m = self.first_letter_pat.search(text.lstrip())
|
||||||
|
if m is not None:
|
||||||
|
chars = frozenset(ord_string(m.group())) - exclude_chars
|
||||||
|
update_usage_for_embed(font, chars)
|
||||||
|
for rule in get_matching_rules(font_face_rules, font):
|
||||||
|
self.font_stats[rule['src']] |= chars
|
||||||
|
q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True)
|
||||||
|
if q is not None:
|
||||||
|
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
|
||||||
|
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line')
|
||||||
|
chars = frozenset(ord_string(text)) - exclude_chars
|
||||||
|
update_usage_for_embed(font, chars)
|
||||||
|
for rule in get_matching_rules(font_face_rules, font):
|
||||||
|
self.font_stats[rule['src']] |= chars
|
||||||
|
|
||||||
|
def get_font_usage(self, container, spine_name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed):
|
||||||
|
root = container.parsed(spine_name)
|
||||||
|
for body in root.iterchildren(XHTML('body')):
|
||||||
|
for elem in body.iter('*'):
|
||||||
|
if elem.tag not in skip_tags:
|
||||||
|
self.get_element_font_usage(
|
||||||
|
elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed,
|
||||||
|
self.font_usage_map[spine_name], self.font_spec_map[spine_name])
|
||||||
|
|
||||||
|
def collect_font_stats(self, container, do_embed=False):
|
||||||
self.font_stats = {}
|
self.font_stats = {}
|
||||||
self.font_usage_map = {}
|
self.font_usage_map = {}
|
||||||
self.font_spec_map = {}
|
self.font_spec_map = {}
|
||||||
self.font_rule_map = {}
|
self.font_rule_map = {}
|
||||||
self.all_font_rules = {}
|
self.all_font_rules = {}
|
||||||
|
|
||||||
QTimer.singleShot(0, self.render_book)
|
processed_sheets = {}
|
||||||
|
for name, is_linear in container.spine_names:
|
||||||
|
self.font_rule_map[name] = font_face_rules = []
|
||||||
|
resolve_property, resolve_pseudo_property, select = resolve_styles(container, name, sheet_callback=partial(
|
||||||
|
self.collect_font_face_rules, container, processed_sheets, name))
|
||||||
|
|
||||||
if self.loop.exec_() == 1:
|
for rule in font_face_rules:
|
||||||
raise Exception('Failed to gather statistics from book, see log for details')
|
self.all_font_rules[rule['src']] = rule
|
||||||
|
if rule['src'] not in self.font_stats:
|
||||||
|
self.font_stats[rule['src']] = set()
|
||||||
|
|
||||||
def log_exception(self, *args):
|
self.font_usage_map[name] = {}
|
||||||
orig = self.log.filter_level
|
self.font_spec_map[name] = set()
|
||||||
try:
|
self.get_font_usage(container, name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed)
|
||||||
self.log.filter_level = self.log.DEBUG
|
self.font_stats = {k:{safe_chr(x) for x in v} for k, v in self.font_stats.iteritems()}
|
||||||
self.log.exception(*args)
|
for fum in self.font_usage_map.itervalues():
|
||||||
finally:
|
for v in fum.itervalues():
|
||||||
self.log.filter_level = orig
|
v['text'] = {safe_chr(x) for x in v['text']}
|
||||||
|
|
||||||
def render_book(self):
|
|
||||||
try:
|
|
||||||
if not self.render_queue:
|
|
||||||
self.loop.exit()
|
|
||||||
else:
|
|
||||||
self.render_next()
|
|
||||||
except:
|
|
||||||
self.log_exception('Rendering failed')
|
|
||||||
self.loop.exit(1)
|
|
||||||
|
|
||||||
def render_next(self):
|
|
||||||
item = unicode(self.render_queue.pop(0))
|
|
||||||
self.current_item = item
|
|
||||||
load_html(item, self.view)
|
|
||||||
|
|
||||||
def collect(self, ok):
|
|
||||||
if not ok:
|
|
||||||
self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
|
|
||||||
self.loop.exit(1)
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
self.page.load_js()
|
|
||||||
self.collect_font_stats()
|
|
||||||
except:
|
|
||||||
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
|
||||||
self.loop.exit(1)
|
|
||||||
return
|
|
||||||
|
|
||||||
self.render_book()
|
|
||||||
|
|
||||||
def href_to_name(self, href, warn_name):
|
|
||||||
if not href.startswith('file://'):
|
|
||||||
self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
|
|
||||||
return None
|
|
||||||
src = href[len('file://'):]
|
|
||||||
if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
|
|
||||||
src = src[1:]
|
|
||||||
src = src.replace('/', os.sep)
|
|
||||||
src = unquote(src)
|
|
||||||
name = self.container.abspath_to_name(src)
|
|
||||||
if not self.container.has_name(name):
|
|
||||||
self.log.warn('Missing resource', href, 'in', warn_name,
|
|
||||||
'ignoring')
|
|
||||||
return None
|
|
||||||
return name
|
|
||||||
|
|
||||||
def collect_font_stats(self):
|
|
||||||
self.page.evaljs('window.font_stats.get_font_face_rules()')
|
|
||||||
font_face_rules = self.page.bridge_value
|
|
||||||
if not isinstance(font_face_rules, list):
|
|
||||||
raise Exception('Unknown error occurred while reading font-face rules')
|
|
||||||
|
|
||||||
# Weed out invalid font-face rules
|
|
||||||
rules = []
|
|
||||||
import tinycss
|
|
||||||
parser = tinycss.make_full_parser()
|
|
||||||
for rule in font_face_rules:
|
|
||||||
ff = rule.get('font-family', None)
|
|
||||||
if not ff:
|
|
||||||
continue
|
|
||||||
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
|
|
||||||
ff = [x.value for x in
|
|
||||||
style.getProperty('font-family').propertyValue]
|
|
||||||
if not ff or ff[0] == 'inherit':
|
|
||||||
continue
|
|
||||||
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
|
|
||||||
src = rule.get('src', None)
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
|
|
||||||
except Exception:
|
|
||||||
self.log.warn('Failed to parse @font-family src: %s' % src)
|
|
||||||
continue
|
|
||||||
for token in tokens:
|
|
||||||
if token.type == 'URI':
|
|
||||||
uv = token.value
|
|
||||||
if uv:
|
|
||||||
sn = self.href_to_name(uv, '@font-face rule')
|
|
||||||
if sn is not None:
|
|
||||||
rule['src'] = sn
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
|
|
||||||
continue
|
|
||||||
normalize_font_properties(rule)
|
|
||||||
rule['width'] = widths[rule['font-stretch']]
|
|
||||||
rule['weight'] = int(rule['font-weight'])
|
|
||||||
rules.append(rule)
|
|
||||||
|
|
||||||
if not rules and not self.do_embed:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
|
|
||||||
for rule in rules:
|
|
||||||
self.all_font_rules[rule['src']] = rule
|
|
||||||
|
|
||||||
for rule in rules:
|
|
||||||
if rule['src'] not in self.font_stats:
|
|
||||||
self.font_stats[rule['src']] = set()
|
|
||||||
|
|
||||||
self.page.evaljs('window.font_stats.get_font_usage()')
|
|
||||||
font_usage = self.page.bridge_value
|
|
||||||
if not isinstance(font_usage, list):
|
|
||||||
raise Exception('Unknown error occurred while reading font usage')
|
|
||||||
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
|
|
||||||
pseudo_element_font_usage = self.page.bridge_value
|
|
||||||
if not isinstance(pseudo_element_font_usage, list):
|
|
||||||
raise Exception('Unknown error occurred while reading pseudo element font usage')
|
|
||||||
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
|
|
||||||
exclude = {'\n', '\r', '\t'}
|
|
||||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
|
||||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
|
||||||
for font in font_usage:
|
|
||||||
text = set()
|
|
||||||
for t in font['text']:
|
|
||||||
tt = (font['text-transform'] or '').lower()
|
|
||||||
if tt != 'none':
|
|
||||||
if tt == 'uppercase':
|
|
||||||
t = icu_upper(t)
|
|
||||||
elif tt == 'lowercase':
|
|
||||||
t = icu_lower(t)
|
|
||||||
elif tt == 'capitalize':
|
|
||||||
m = self.capitalize_pat.search(t)
|
|
||||||
if m is not None:
|
|
||||||
t += icu_upper(m.group())
|
|
||||||
fv = (font['font-variant'] or '').lower()
|
|
||||||
if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}:
|
|
||||||
t += icu_upper(t) # for renderers that try to fake small-caps by using small normal caps
|
|
||||||
text |= frozenset(t)
|
|
||||||
text.difference_update(exclude)
|
|
||||||
if not text:
|
|
||||||
continue
|
|
||||||
normalize_font_properties(font)
|
|
||||||
for rule in get_matching_rules(rules, font):
|
|
||||||
self.font_stats[rule['src']] |= text
|
|
||||||
if self.do_embed:
|
|
||||||
ff = [icu_lower(x) for x in font.get('font-family', [])]
|
|
||||||
if ff and ff[0] not in bad_fonts:
|
|
||||||
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
|
|
||||||
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
|
|
||||||
val = fu[key]
|
|
||||||
if not val:
|
|
||||||
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
|
|
||||||
val['text'] = set()
|
|
||||||
val['text'] |= text
|
|
||||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
|
|
||||||
|
|
||||||
if self.do_embed:
|
|
||||||
self.page.evaljs('window.font_stats.get_font_families()')
|
|
||||||
font_families = self.page.bridge_value
|
|
||||||
if not isinstance(font_families, dict):
|
|
||||||
raise Exception('Unknown error occurred while reading font families')
|
|
||||||
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
|
||||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
|
||||||
font_families[font_dict['font-family']] = True
|
|
||||||
for raw in font_families.iterkeys():
|
|
||||||
for x in parse_font_families(self.parser, raw):
|
|
||||||
if x.lower() not in bad_fonts:
|
|
||||||
fs.add(x)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.ebooks.oeb.polish.container import get_container
|
from calibre.ebooks.oeb.polish.container import get_container
|
||||||
|
@ -12,6 +12,7 @@ from calibre.constants import iswindows
|
|||||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
||||||
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
|
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
|
||||||
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
|
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
|
||||||
|
from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule
|
||||||
from calibre.ebooks.oeb.polish.tests.base import BaseTest
|
from calibre.ebooks.oeb.polish.tests.base import BaseTest
|
||||||
from calibre.utils.logging import Log, Stream
|
from calibre.utils.logging import Log, Stream
|
||||||
|
|
||||||
@ -45,6 +46,12 @@ class VirtualContainer(ContainerBase):
|
|||||||
self.parsed_cache[name] = self.files[name]
|
self.parsed_cache[name] = self.files[name]
|
||||||
return self.parsed_cache[name]
|
return self.parsed_cache[name]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def spine_names(self):
|
||||||
|
for name in sorted(self.mime_map):
|
||||||
|
if self.mime_map[name] in OEB_DOCS:
|
||||||
|
yield name, True
|
||||||
|
|
||||||
class CascadeTest(BaseTest):
|
class CascadeTest(BaseTest):
|
||||||
|
|
||||||
def test_iterrules(self):
|
def test_iterrules(self):
|
||||||
@ -131,3 +138,65 @@ class CascadeTest(BaseTest):
|
|||||||
t('p', 'before', 'font-weight', 'bold')
|
t('p', 'before', 'font-weight', 'bold')
|
||||||
t('p', 'first-letter', 'content')
|
t('p', 'first-letter', 'content')
|
||||||
t('p', 'first-letter', 'content', abort_on_missing=True)
|
t('p', 'first-letter', 'content', abort_on_missing=True)
|
||||||
|
|
||||||
|
def test_font_stats(self):
|
||||||
|
embeds = '@font-face { font-family: X; src: url(X.otf) }\n@font-face { font-family: X; src: url(XB.otf); font-weight: bold }'
|
||||||
|
def get_stats(html, *fonts):
|
||||||
|
styles = []
|
||||||
|
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
|
||||||
|
files = {'index.html':html, 'X.otf':b'xxx', 'XB.otf': b'xbxb'}
|
||||||
|
for font in fonts:
|
||||||
|
styles.append('@font-face {')
|
||||||
|
for k, v in font.iteritems():
|
||||||
|
if k == 'src':
|
||||||
|
files[v] = b'xxx'
|
||||||
|
v = 'url(%s)' % v
|
||||||
|
styles.append('%s : %s;' % (k, v))
|
||||||
|
styles.append('}\n')
|
||||||
|
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
|
||||||
|
files['styles.css'] = embeds + '\n'.join(styles)
|
||||||
|
c = VirtualContainer(files)
|
||||||
|
return StatsCollector(c, do_embed=True)
|
||||||
|
|
||||||
|
def font(family, weight=None, style=None):
|
||||||
|
f = {}
|
||||||
|
if weight is not None:
|
||||||
|
f['font-weight'] = weight
|
||||||
|
if style is not None:
|
||||||
|
f['font-style'] = style
|
||||||
|
f = normalize_font_properties(f)
|
||||||
|
f['font-family'] = [family]
|
||||||
|
return f
|
||||||
|
|
||||||
|
def font_rule(src, *args, **kw):
|
||||||
|
ans = font(*args, **kw)
|
||||||
|
ans['font-family'] = list(map(icu_lower, ans['font-family']))
|
||||||
|
prepare_font_rule(ans)
|
||||||
|
ans['src'] = src
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def fkey(*args, **kw):
|
||||||
|
f = font(*args, **kw)
|
||||||
|
f['font-family'] = icu_lower(f['font-family'][0])
|
||||||
|
return frozenset((k, v) for k, v in f.iteritems() if k in font_keys)
|
||||||
|
|
||||||
|
def fu(text, *args, **kw):
|
||||||
|
key = fkey(*args, **kw)
|
||||||
|
val = font(*args, **kw)
|
||||||
|
val['text'] = set(text)
|
||||||
|
val['font-family'] = val['font-family'][0]
|
||||||
|
return key, val
|
||||||
|
|
||||||
|
s = get_stats('<p style="font-family: X">abc<b>d\nef</b><i>ghi</i></p><p style="font-family: U">u</p>')
|
||||||
|
# The normal font must include ghi as it will be used to simulate
|
||||||
|
# italic by most rendering engines when the italic font is missing
|
||||||
|
self.assertEqual(s.font_stats, {'XB.otf':set('def'), 'X.otf':set('abcghi')})
|
||||||
|
self.assertEqual(s.font_spec_map, {'index.html':set('XU')})
|
||||||
|
self.assertEqual(s.all_font_rules, {'X.otf':font_rule('X.otf', 'X'), 'XB.otf':font_rule('XB.otf', 'X', 'bold')})
|
||||||
|
self.assertEqual(set(s.font_rule_map), {'index.html'})
|
||||||
|
self.assertEqual(s.font_rule_map['index.html'], [font_rule('X.otf', 'X'), font_rule('XB.otf', 'X', 'bold')])
|
||||||
|
self.assertEqual(set(s.font_usage_map), {'index.html'})
|
||||||
|
self.assertEqual(s.font_usage_map['index.html'], dict([fu('abc', 'X'), fu('def', 'X', weight='bold'), fu('ghi', 'X', style='italic'), fu('u', 'U')]))
|
||||||
|
|
||||||
|
s = get_stats('<p style="font-family: X; text-transform:uppercase">abc</p><b style="font-family: X; font-variant: small-caps">d\nef</b>')
|
||||||
|
self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user