mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Stop using Qt WebKit to calculate font usage statistics
This commit is contained in:
parent
9a2dc518ad
commit
33e23e50ab
@ -1,127 +0,0 @@
|
||||
#!/usr/bin/env coffee
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
###
|
||||
Copyright 2013, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
Released under the GPLv3 License
|
||||
###
|
||||
|
||||
|
||||
if window?.calibre_utils
|
||||
log = window.calibre_utils.log
|
||||
|
||||
font_dict = (style, computed=false) ->
|
||||
if computed
|
||||
fams = []
|
||||
family = style.getPropertyCSSValue('font-family')
|
||||
if family.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE
|
||||
fams.push(family.getStringValue())
|
||||
else
|
||||
for f in family
|
||||
fams.push(f.getStringValue())
|
||||
else
|
||||
fams = style.getPropertyValue('font-family')
|
||||
return {
|
||||
'font-family':fams,
|
||||
'font-weight':style.getPropertyValue('font-weight'),
|
||||
'font-style':style.getPropertyValue('font-style'),
|
||||
'font-stretch':style.getPropertyValue('font-stretch'),
|
||||
'text-transform':style.getPropertyValue('text-transform'),
|
||||
'font-variant':style.getPropertyValue('font-variant'),
|
||||
}
|
||||
|
||||
font_usage = (node) ->
|
||||
style = window.getComputedStyle(node, null)
|
||||
ans = font_dict(style, true)
|
||||
text = []
|
||||
for child in node.childNodes
|
||||
if child.nodeType == Node.TEXT_NODE
|
||||
text.push(child.nodeValue)
|
||||
ans['text'] = text
|
||||
return ans
|
||||
|
||||
process_sheet = (sheet, font_faces) ->
|
||||
for rule in sheet.cssRules
|
||||
if rule.type == rule.FONT_FACE_RULE
|
||||
process_font_face_rule(rule, font_faces)
|
||||
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
||||
process_sheet(rule.styleSheet, font_faces)
|
||||
|
||||
process_font_face_rule = (rule, font_faces) ->
|
||||
fd = font_dict(rule.style)
|
||||
fd['src'] = rule.style.getPropertyValue('src')
|
||||
font_faces.push(fd)
|
||||
|
||||
fl_pat = /:{1,2}(first-letter|first-line)/i
|
||||
|
||||
process_sheet_for_pseudo = (sheet, rules) ->
|
||||
for rule in sheet.cssRules
|
||||
if rule.type == rule.STYLE_RULE
|
||||
st = rule.selectorText
|
||||
m = fl_pat.exec(st)
|
||||
if m
|
||||
pseudo = m[1].toLowerCase()
|
||||
ff = rule.style.getPropertyValue('font-family')
|
||||
if ff
|
||||
process_style_rule(st, rule.style, rules, pseudo)
|
||||
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
||||
process_sheet_for_pseudo(rule.styleSheet, rules)
|
||||
|
||||
process_style_rule = (selector_text, style, rules, pseudo) ->
|
||||
selector_text = selector_text.replace(fl_pat, '')
|
||||
fd = font_dict(style)
|
||||
for element in document.querySelectorAll(selector_text)
|
||||
text = element.innerText
|
||||
if text
|
||||
rules.push([fd, text, pseudo])
|
||||
|
||||
class FontStats
|
||||
# This class is a namespace to expose functions via the
|
||||
# window.font_stats object.
|
||||
|
||||
constructor: () ->
|
||||
if not this instanceof arguments.callee
|
||||
throw new Error('FontStats constructor called as function')
|
||||
|
||||
get_font_face_rules: () ->
|
||||
font_faces = []
|
||||
for sheet in document.styleSheets
|
||||
process_sheet(sheet, font_faces)
|
||||
py_bridge.value = font_faces
|
||||
|
||||
get_font_usage: () ->
|
||||
ans = []
|
||||
busage = font_usage(document.body)
|
||||
if busage != null
|
||||
ans.push(busage)
|
||||
for node in document.body.getElementsByTagName('*')
|
||||
usage = font_usage(node)
|
||||
if usage != null
|
||||
ans.push(usage)
|
||||
py_bridge.value = ans
|
||||
|
||||
get_pseudo_element_font_usage: () ->
|
||||
ans = []
|
||||
for sheet in document.styleSheets
|
||||
process_sheet_for_pseudo(sheet, ans)
|
||||
py_bridge.value = ans
|
||||
|
||||
get_font_families: () ->
|
||||
ans = {}
|
||||
for node in document.getElementsByTagName('*')
|
||||
rules = document.defaultView.getMatchedCSSRules(node, '')
|
||||
if rules
|
||||
for rule in rules
|
||||
style = rule.style
|
||||
family = style.getPropertyValue('font-family')
|
||||
if family
|
||||
ans[family] = true
|
||||
if node.getAttribute('style')
|
||||
family = node.style.getPropertyValue('font-family')
|
||||
if family
|
||||
ans[family] = true
|
||||
py_bridge.value = ans
|
||||
|
||||
if window?
|
||||
window.font_stats = new FontStats()
|
||||
|
@ -7,19 +7,15 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import json, sys, os, logging
|
||||
from urllib import unquote
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
from functools import partial
|
||||
|
||||
from lxml.etree import tostring
|
||||
import regex
|
||||
from cssutils import CSSParser
|
||||
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
|
||||
pyqtSlot)
|
||||
from PyQt5.QtWebKitWidgets import QWebPage, QWebView
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.ebooks.oeb.display.webview import load_html
|
||||
from calibre.gui2 import must_use_qt
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, iterdeclaration
|
||||
from calibre.utils.icu import ord_string, safe_chr
|
||||
|
||||
def normalize_font_properties(font):
|
||||
w = font.get('font-weight', None)
|
||||
@ -102,286 +98,200 @@ def get_matching_rules(rules, font):
|
||||
return m
|
||||
return []
|
||||
|
||||
def parse_font_families(parser, raw):
|
||||
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
||||
for x in style.propertyValue:
|
||||
x = x.value
|
||||
if x:
|
||||
yield x
|
||||
def get_css_text(elem, resolve_pseudo_property, which='before'):
|
||||
text = resolve_pseudo_property(elem, which, 'content')[0].value
|
||||
if text and len(text) > 2 and text[0] == '"' and text[-1] == '"':
|
||||
return text[1:-1]
|
||||
return ''
|
||||
|
||||
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
|
||||
caps_variants = {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}
|
||||
|
||||
def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize_pat, for_pseudo=None):
|
||||
ans = []
|
||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||
text = text.strip()
|
||||
if pseudo == 'first-letter':
|
||||
prefix = first_letter_pat.match(text)
|
||||
if prefix is not None:
|
||||
text = prefix + text[len(prefix):].lstrip()[:1]
|
||||
else:
|
||||
text = text[:1]
|
||||
if text:
|
||||
font = font_dict.copy()
|
||||
font['text'] = text
|
||||
font['font-family'] = list(parse_font_families(parser, font['font-family']))
|
||||
ans.append(font)
|
||||
|
||||
before = get_css_text(elem, resolve_pseudo_property)
|
||||
if before:
|
||||
ans.append(before)
|
||||
if for_pseudo is not None:
|
||||
ans.append(tostring(elem, method='text', encoding=unicode, with_tail=False))
|
||||
else:
|
||||
if elem.text:
|
||||
ans.append(elem.text)
|
||||
for child in elem.iterchildren():
|
||||
t = getattr(child, 'tail', '')
|
||||
if t:
|
||||
ans.append(t)
|
||||
after = get_css_text(elem, resolve_pseudo_property, 'after')
|
||||
if after:
|
||||
ans.append(after)
|
||||
ans = ''.join(ans)
|
||||
if for_pseudo is not None:
|
||||
tt = resolve_pseudo_property(elem, for_pseudo, 'text-transform')[0].value
|
||||
fv = resolve_pseudo_property(elem, for_pseudo, 'font-variant')[0].value
|
||||
else:
|
||||
tt = resolve_property(elem, 'text-transform')[0].value
|
||||
fv = resolve_property(elem, 'font-variant')[0].value
|
||||
if fv in caps_variants:
|
||||
ans += icu_upper(ans)
|
||||
if tt != 'none':
|
||||
if tt == 'uppercase':
|
||||
ans = icu_upper(ans)
|
||||
elif tt == 'lowercase':
|
||||
ans = icu_lower(ans)
|
||||
elif tt == 'capitalize':
|
||||
m = capitalize_pat.search(ans)
|
||||
if m is not None:
|
||||
ans += icu_upper(m.group())
|
||||
return ans
|
||||
|
||||
class Page(QWebPage): # {{{
|
||||
def get_font_dict(elem, resolve_property, pseudo=None):
|
||||
ans = {}
|
||||
if pseudo is None:
|
||||
ff = resolve_property(elem, 'font-family')
|
||||
else:
|
||||
ff = resolve_property(elem, pseudo, 'font-family')
|
||||
ans['font-family'] = tuple(x.value for x in ff)
|
||||
for p in 'weight', 'style', 'stretch':
|
||||
p = 'font-' + p
|
||||
rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p)
|
||||
ans[p] = type('')(rp[0].value)
|
||||
normalize_font_properties(ans)
|
||||
return ans
|
||||
|
||||
def __init__(self, log):
|
||||
self.log = log
|
||||
QWebPage.__init__(self)
|
||||
self.js = None
|
||||
self.evaljs = self.mainFrame().evaluateJavaScript
|
||||
self.bridge_value = None
|
||||
nam = self.networkAccessManager()
|
||||
nam.setNetworkAccessible(nam.NotAccessible)
|
||||
self.longjs_counter = 0
|
||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||
exclude_chars = frozenset(ord_string('\n\r\t'))
|
||||
skip_tags = {XHTML(x) for x in 'script style title meta link'.split()}
|
||||
font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
|
||||
|
||||
def javaScriptConsoleMessage(self, msg, lineno, msgid):
|
||||
self.log(u'JS:', unicode(msg))
|
||||
|
||||
def javaScriptAlert(self, frame, msg):
|
||||
self.log(unicode(msg))
|
||||
|
||||
@pyqtSlot(result=bool)
|
||||
def shouldInterruptJavaScript(self):
|
||||
if self.longjs_counter < 5:
|
||||
self.log('Long running javascript, letting it proceed')
|
||||
self.longjs_counter += 1
|
||||
return False
|
||||
self.log.warn('Long running javascript, aborting it')
|
||||
return True
|
||||
|
||||
def _pass_json_value_getter(self):
|
||||
val = json.dumps(self.bridge_value)
|
||||
return val
|
||||
|
||||
def _pass_json_value_setter(self, value):
|
||||
# Qt WebKit in Qt 4.x adds extra null bytes to the end of the string
|
||||
# if the JSON contains non-BMP characters
|
||||
self.bridge_value = json.loads(unicode(value).rstrip('\0'))
|
||||
|
||||
_pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter,
|
||||
fset=_pass_json_value_setter)
|
||||
|
||||
def load_js(self):
|
||||
self.longjs_counter = 0
|
||||
if self.js is None:
|
||||
from calibre.utils.resources import compiled_coffeescript
|
||||
self.js = compiled_coffeescript('ebooks.oeb.display.utils')
|
||||
self.js += compiled_coffeescript('ebooks.oeb.polish.font_stats')
|
||||
self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
|
||||
self.evaljs(self.js)
|
||||
self.evaljs('''
|
||||
Object.defineProperty(py_bridge, 'value', {
|
||||
get : function() { return JSON.parse(this._pass_json_value); },
|
||||
set : function(val) { this._pass_json_value = JSON.stringify(val); }
|
||||
});
|
||||
''')
|
||||
# }}}
|
||||
def prepare_font_rule(cssdict):
|
||||
cssdict['font-family'] = frozenset(cssdict['font-family'][:1])
|
||||
cssdict['width'] = widths[cssdict['font-stretch']]
|
||||
cssdict['weight'] = int(cssdict['font-weight'])
|
||||
|
||||
class StatsCollector(object):
|
||||
|
||||
first_letter_pat = capitalize_pat = None
|
||||
|
||||
def __init__(self, container, do_embed=False):
|
||||
self.container = container
|
||||
self.log = self.logger = container.log
|
||||
self.do_embed = do_embed
|
||||
must_use_qt()
|
||||
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
||||
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
||||
self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
|
||||
if self.first_letter_pat is None:
|
||||
StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile(
|
||||
r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
||||
StatsCollector.capitalize_pat = self.capitalize_pat = regex.compile(
|
||||
r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)
|
||||
|
||||
self.loop = QEventLoop()
|
||||
self.view = QWebView()
|
||||
self.page = Page(self.log)
|
||||
self.view.setPage(self.page)
|
||||
self.page.setViewportSize(QSize(1200, 1600))
|
||||
self.collect_font_stats(container, do_embed)
|
||||
|
||||
self.view.loadFinished.connect(self.collect,
|
||||
type=Qt.QueuedConnection)
|
||||
def collect_font_face_rules(self, container, processed, spine_name, sheet, sheet_name):
|
||||
if sheet_name in processed:
|
||||
sheet_rules = processed[sheet_name]
|
||||
else:
|
||||
sheet_rules = []
|
||||
if sheet_name != spine_name:
|
||||
processed[sheet_name] = sheet_rules
|
||||
for rule, base_name, rule_index in iterrules(container, sheet_name, rules=sheet, rule_type='FONT_FACE_RULE'):
|
||||
cssdict = {}
|
||||
for prop in iterdeclaration(rule.style):
|
||||
if prop.name == 'font-family':
|
||||
cssdict['font-family'] = [icu_lower(x.value) for x in prop.propertyValue]
|
||||
elif prop.name.startswith('font-'):
|
||||
cssdict[prop.name] = prop.propertyValue[0].value
|
||||
elif prop.name == 'src':
|
||||
for val in prop.propertyValue:
|
||||
x = val.value
|
||||
fname = container.href_to_name(x, sheet_name)
|
||||
if container.has_name(fname):
|
||||
cssdict['src'] = fname
|
||||
break
|
||||
else:
|
||||
container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % prop.propertyValue.cssText)
|
||||
if 'src' not in cssdict:
|
||||
continue
|
||||
ff = cssdict.get('font-family')
|
||||
if not ff or ff[0] in bad_fonts:
|
||||
continue
|
||||
normalize_font_properties(cssdict)
|
||||
prepare_font_rule(cssdict)
|
||||
sheet_rules.append(cssdict)
|
||||
self.font_rule_map[spine_name].extend(sheet_rules)
|
||||
|
||||
self.render_queue = list(container.spine_items)
|
||||
def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec):
|
||||
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat)
|
||||
if not text:
|
||||
return
|
||||
|
||||
def update_usage_for_embed(font, chars):
|
||||
if not do_embed:
|
||||
return
|
||||
ff = [icu_lower(x) for x in font.get('font-family', ())]
|
||||
if ff and ff[0] not in bad_fonts:
|
||||
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys))
|
||||
val = font_usage_map.get(key)
|
||||
if val is None:
|
||||
val = font_usage_map[key] = {'text': set()}
|
||||
for k in font_keys:
|
||||
val[k] = font[k][0] if k == 'font-family' else font[k]
|
||||
val['text'] |= chars
|
||||
for ff in font.get('font-family', ()):
|
||||
if ff and icu_lower(ff) not in bad_fonts:
|
||||
font_spec.add(ff)
|
||||
|
||||
font = get_font_dict(elem, resolve_property)
|
||||
chars = frozenset(ord_string(text)) - exclude_chars
|
||||
update_usage_for_embed(font, chars)
|
||||
for rule in get_matching_rules(font_face_rules, font):
|
||||
self.font_stats[rule['src']] |= chars
|
||||
q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True)
|
||||
if q is not None:
|
||||
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
|
||||
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter')
|
||||
m = self.first_letter_pat.search(text.lstrip())
|
||||
if m is not None:
|
||||
chars = frozenset(ord_string(m.group())) - exclude_chars
|
||||
update_usage_for_embed(font, chars)
|
||||
for rule in get_matching_rules(font_face_rules, font):
|
||||
self.font_stats[rule['src']] |= chars
|
||||
q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True)
|
||||
if q is not None:
|
||||
font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
|
||||
text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line')
|
||||
chars = frozenset(ord_string(text)) - exclude_chars
|
||||
update_usage_for_embed(font, chars)
|
||||
for rule in get_matching_rules(font_face_rules, font):
|
||||
self.font_stats[rule['src']] |= chars
|
||||
|
||||
def get_font_usage(self, container, spine_name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed):
|
||||
root = container.parsed(spine_name)
|
||||
for body in root.iterchildren(XHTML('body')):
|
||||
for elem in body.iter('*'):
|
||||
if elem.tag not in skip_tags:
|
||||
self.get_element_font_usage(
|
||||
elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed,
|
||||
self.font_usage_map[spine_name], self.font_spec_map[spine_name])
|
||||
|
||||
def collect_font_stats(self, container, do_embed=False):
|
||||
self.font_stats = {}
|
||||
self.font_usage_map = {}
|
||||
self.font_spec_map = {}
|
||||
self.font_rule_map = {}
|
||||
self.all_font_rules = {}
|
||||
|
||||
QTimer.singleShot(0, self.render_book)
|
||||
processed_sheets = {}
|
||||
for name, is_linear in container.spine_names:
|
||||
self.font_rule_map[name] = font_face_rules = []
|
||||
resolve_property, resolve_pseudo_property, select = resolve_styles(container, name, sheet_callback=partial(
|
||||
self.collect_font_face_rules, container, processed_sheets, name))
|
||||
|
||||
if self.loop.exec_() == 1:
|
||||
raise Exception('Failed to gather statistics from book, see log for details')
|
||||
for rule in font_face_rules:
|
||||
self.all_font_rules[rule['src']] = rule
|
||||
if rule['src'] not in self.font_stats:
|
||||
self.font_stats[rule['src']] = set()
|
||||
|
||||
def log_exception(self, *args):
|
||||
orig = self.log.filter_level
|
||||
try:
|
||||
self.log.filter_level = self.log.DEBUG
|
||||
self.log.exception(*args)
|
||||
finally:
|
||||
self.log.filter_level = orig
|
||||
|
||||
def render_book(self):
|
||||
try:
|
||||
if not self.render_queue:
|
||||
self.loop.exit()
|
||||
else:
|
||||
self.render_next()
|
||||
except:
|
||||
self.log_exception('Rendering failed')
|
||||
self.loop.exit(1)
|
||||
|
||||
def render_next(self):
|
||||
item = unicode(self.render_queue.pop(0))
|
||||
self.current_item = item
|
||||
load_html(item, self.view)
|
||||
|
||||
def collect(self, ok):
|
||||
if not ok:
|
||||
self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
|
||||
self.loop.exit(1)
|
||||
return
|
||||
try:
|
||||
self.page.load_js()
|
||||
self.collect_font_stats()
|
||||
except:
|
||||
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
||||
self.loop.exit(1)
|
||||
return
|
||||
|
||||
self.render_book()
|
||||
|
||||
def href_to_name(self, href, warn_name):
|
||||
if not href.startswith('file://'):
|
||||
self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
|
||||
return None
|
||||
src = href[len('file://'):]
|
||||
if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
|
||||
src = src[1:]
|
||||
src = src.replace('/', os.sep)
|
||||
src = unquote(src)
|
||||
name = self.container.abspath_to_name(src)
|
||||
if not self.container.has_name(name):
|
||||
self.log.warn('Missing resource', href, 'in', warn_name,
|
||||
'ignoring')
|
||||
return None
|
||||
return name
|
||||
|
||||
def collect_font_stats(self):
|
||||
self.page.evaljs('window.font_stats.get_font_face_rules()')
|
||||
font_face_rules = self.page.bridge_value
|
||||
if not isinstance(font_face_rules, list):
|
||||
raise Exception('Unknown error occurred while reading font-face rules')
|
||||
|
||||
# Weed out invalid font-face rules
|
||||
rules = []
|
||||
import tinycss
|
||||
parser = tinycss.make_full_parser()
|
||||
for rule in font_face_rules:
|
||||
ff = rule.get('font-family', None)
|
||||
if not ff:
|
||||
continue
|
||||
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
|
||||
ff = [x.value for x in
|
||||
style.getProperty('font-family').propertyValue]
|
||||
if not ff or ff[0] == 'inherit':
|
||||
continue
|
||||
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
|
||||
src = rule.get('src', None)
|
||||
if not src:
|
||||
continue
|
||||
try:
|
||||
tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
|
||||
except Exception:
|
||||
self.log.warn('Failed to parse @font-family src: %s' % src)
|
||||
continue
|
||||
for token in tokens:
|
||||
if token.type == 'URI':
|
||||
uv = token.value
|
||||
if uv:
|
||||
sn = self.href_to_name(uv, '@font-face rule')
|
||||
if sn is not None:
|
||||
rule['src'] = sn
|
||||
break
|
||||
else:
|
||||
self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
|
||||
continue
|
||||
normalize_font_properties(rule)
|
||||
rule['width'] = widths[rule['font-stretch']]
|
||||
rule['weight'] = int(rule['font-weight'])
|
||||
rules.append(rule)
|
||||
|
||||
if not rules and not self.do_embed:
|
||||
return
|
||||
|
||||
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
|
||||
for rule in rules:
|
||||
self.all_font_rules[rule['src']] = rule
|
||||
|
||||
for rule in rules:
|
||||
if rule['src'] not in self.font_stats:
|
||||
self.font_stats[rule['src']] = set()
|
||||
|
||||
self.page.evaljs('window.font_stats.get_font_usage()')
|
||||
font_usage = self.page.bridge_value
|
||||
if not isinstance(font_usage, list):
|
||||
raise Exception('Unknown error occurred while reading font usage')
|
||||
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
|
||||
pseudo_element_font_usage = self.page.bridge_value
|
||||
if not isinstance(pseudo_element_font_usage, list):
|
||||
raise Exception('Unknown error occurred while reading pseudo element font usage')
|
||||
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
|
||||
exclude = {'\n', '\r', '\t'}
|
||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||
for font in font_usage:
|
||||
text = set()
|
||||
for t in font['text']:
|
||||
tt = (font['text-transform'] or '').lower()
|
||||
if tt != 'none':
|
||||
if tt == 'uppercase':
|
||||
t = icu_upper(t)
|
||||
elif tt == 'lowercase':
|
||||
t = icu_lower(t)
|
||||
elif tt == 'capitalize':
|
||||
m = self.capitalize_pat.search(t)
|
||||
if m is not None:
|
||||
t += icu_upper(m.group())
|
||||
fv = (font['font-variant'] or '').lower()
|
||||
if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}:
|
||||
t += icu_upper(t) # for renderers that try to fake small-caps by using small normal caps
|
||||
text |= frozenset(t)
|
||||
text.difference_update(exclude)
|
||||
if not text:
|
||||
continue
|
||||
normalize_font_properties(font)
|
||||
for rule in get_matching_rules(rules, font):
|
||||
self.font_stats[rule['src']] |= text
|
||||
if self.do_embed:
|
||||
ff = [icu_lower(x) for x in font.get('font-family', [])]
|
||||
if ff and ff[0] not in bad_fonts:
|
||||
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
|
||||
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
|
||||
val = fu[key]
|
||||
if not val:
|
||||
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
|
||||
val['text'] = set()
|
||||
val['text'] |= text
|
||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
|
||||
|
||||
if self.do_embed:
|
||||
self.page.evaljs('window.font_stats.get_font_families()')
|
||||
font_families = self.page.bridge_value
|
||||
if not isinstance(font_families, dict):
|
||||
raise Exception('Unknown error occurred while reading font families')
|
||||
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||
font_families[font_dict['font-family']] = True
|
||||
for raw in font_families.iterkeys():
|
||||
for x in parse_font_families(self.parser, raw):
|
||||
if x.lower() not in bad_fonts:
|
||||
fs.add(x)
|
||||
self.font_usage_map[name] = {}
|
||||
self.font_spec_map[name] = set()
|
||||
self.get_font_usage(container, name, resolve_property, resolve_pseudo_property, font_face_rules, do_embed)
|
||||
self.font_stats = {k:{safe_chr(x) for x in v} for k, v in self.font_stats.iteritems()}
|
||||
for fum in self.font_usage_map.itervalues():
|
||||
for v in fum.itervalues():
|
||||
v['text'] = {safe_chr(x) for x in v['text']}
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.ebooks.oeb.polish.container import get_container
|
||||
|
@ -12,6 +12,7 @@ from calibre.constants import iswindows
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
||||
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
|
||||
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
|
||||
from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule
|
||||
from calibre.ebooks.oeb.polish.tests.base import BaseTest
|
||||
from calibre.utils.logging import Log, Stream
|
||||
|
||||
@ -45,6 +46,12 @@ class VirtualContainer(ContainerBase):
|
||||
self.parsed_cache[name] = self.files[name]
|
||||
return self.parsed_cache[name]
|
||||
|
||||
@property
|
||||
def spine_names(self):
|
||||
for name in sorted(self.mime_map):
|
||||
if self.mime_map[name] in OEB_DOCS:
|
||||
yield name, True
|
||||
|
||||
class CascadeTest(BaseTest):
|
||||
|
||||
def test_iterrules(self):
|
||||
@ -131,3 +138,65 @@ class CascadeTest(BaseTest):
|
||||
t('p', 'before', 'font-weight', 'bold')
|
||||
t('p', 'first-letter', 'content')
|
||||
t('p', 'first-letter', 'content', abort_on_missing=True)
|
||||
|
||||
def test_font_stats(self):
|
||||
embeds = '@font-face { font-family: X; src: url(X.otf) }\n@font-face { font-family: X; src: url(XB.otf); font-weight: bold }'
|
||||
def get_stats(html, *fonts):
|
||||
styles = []
|
||||
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
|
||||
files = {'index.html':html, 'X.otf':b'xxx', 'XB.otf': b'xbxb'}
|
||||
for font in fonts:
|
||||
styles.append('@font-face {')
|
||||
for k, v in font.iteritems():
|
||||
if k == 'src':
|
||||
files[v] = b'xxx'
|
||||
v = 'url(%s)' % v
|
||||
styles.append('%s : %s;' % (k, v))
|
||||
styles.append('}\n')
|
||||
html = '<html><head><link href="styles.css"></head><body>{}</body></html>'.format(html)
|
||||
files['styles.css'] = embeds + '\n'.join(styles)
|
||||
c = VirtualContainer(files)
|
||||
return StatsCollector(c, do_embed=True)
|
||||
|
||||
def font(family, weight=None, style=None):
|
||||
f = {}
|
||||
if weight is not None:
|
||||
f['font-weight'] = weight
|
||||
if style is not None:
|
||||
f['font-style'] = style
|
||||
f = normalize_font_properties(f)
|
||||
f['font-family'] = [family]
|
||||
return f
|
||||
|
||||
def font_rule(src, *args, **kw):
|
||||
ans = font(*args, **kw)
|
||||
ans['font-family'] = list(map(icu_lower, ans['font-family']))
|
||||
prepare_font_rule(ans)
|
||||
ans['src'] = src
|
||||
return ans
|
||||
|
||||
def fkey(*args, **kw):
|
||||
f = font(*args, **kw)
|
||||
f['font-family'] = icu_lower(f['font-family'][0])
|
||||
return frozenset((k, v) for k, v in f.iteritems() if k in font_keys)
|
||||
|
||||
def fu(text, *args, **kw):
|
||||
key = fkey(*args, **kw)
|
||||
val = font(*args, **kw)
|
||||
val['text'] = set(text)
|
||||
val['font-family'] = val['font-family'][0]
|
||||
return key, val
|
||||
|
||||
s = get_stats('<p style="font-family: X">abc<b>d\nef</b><i>ghi</i></p><p style="font-family: U">u</p>')
|
||||
# The normal font must include ghi as it will be used to simulate
|
||||
# italic by most rendering engines when the italic font is missing
|
||||
self.assertEqual(s.font_stats, {'XB.otf':set('def'), 'X.otf':set('abcghi')})
|
||||
self.assertEqual(s.font_spec_map, {'index.html':set('XU')})
|
||||
self.assertEqual(s.all_font_rules, {'X.otf':font_rule('X.otf', 'X'), 'XB.otf':font_rule('XB.otf', 'X', 'bold')})
|
||||
self.assertEqual(set(s.font_rule_map), {'index.html'})
|
||||
self.assertEqual(s.font_rule_map['index.html'], [font_rule('X.otf', 'X'), font_rule('XB.otf', 'X', 'bold')])
|
||||
self.assertEqual(set(s.font_usage_map), {'index.html'})
|
||||
self.assertEqual(s.font_usage_map['index.html'], dict([fu('abc', 'X'), fu('def', 'X', weight='bold'), fu('ghi', 'X', style='italic'), fu('u', 'U')]))
|
||||
|
||||
s = get_stats('<p style="font-family: X; text-transform:uppercase">abc</p><b style="font-family: X; font-variant: small-caps">d\nef</b>')
|
||||
self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})
|
||||
|
Loading…
x
Reference in New Issue
Block a user