Edit book/Book polishing: When embedding fonts or subsetting embedded fonts, handle the :first-letter and :first-line pseudo-element style rules. Fixes #1373649 [Book Editor - Font Subsetting does not recognize pseudo-element declarations](https://bugs.launchpad.net/calibre/+bug/1373649)

This commit is contained in:
Kovid Goyal 2014-10-02 12:00:45 +05:30
parent 06c4255b4a
commit 0f660259d3
3 changed files with 75 additions and 8 deletions

Binary file not shown.

View File

@ -50,6 +50,29 @@ process_font_face_rule = (rule, font_faces) ->
fd['src'] = rule.style.getPropertyValue('src') fd['src'] = rule.style.getPropertyValue('src')
font_faces.push(fd) font_faces.push(fd)
fl_pat = /:{1,2}(first-letter|first-line)/i
process_sheet_for_pseudo = (sheet, rules) ->
for rule in sheet.cssRules
if rule.type == rule.STYLE_RULE
st = rule.selectorText
m = fl_pat.exec(st)
if m
pseudo = m[1].toLowerCase()
ff = rule.style.getPropertyValue('font-family')
if ff
process_style_rule(st, rule.style, rules, pseudo)
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
process_sheet_for_pseudo(rule.styleSheet, rules)
process_style_rule = (selector_text, style, rules, pseudo) ->
selector_text = selector_text.replace(fl_pat, '')
fd = font_dict(style)
for element in document.querySelectorAll(selector_text)
text = element.innerText
if text
rules.push([fd, text, pseudo])
class FontStats class FontStats
# This class is a namespace to expose functions via the # This class is a namespace to expose functions via the
# window.font_stats object. # window.font_stats object.
@ -75,6 +98,12 @@ class FontStats
ans.push(usage) ans.push(usage)
py_bridge.value = ans py_bridge.value = ans
get_pseudo_element_font_usage: () ->
ans = []
for sheet in document.styleSheets
process_sheet_for_pseudo(sheet, ans)
py_bridge.value = ans
get_font_families: () -> get_font_families: () ->
ans = {} ans = {}
for node in document.getElementsByTagName('*') for node in document.getElementsByTagName('*')

View File

@ -11,6 +11,7 @@ import json, sys, os, logging
from urllib import unquote from urllib import unquote
from collections import defaultdict from collections import defaultdict
import regex
from cssutils import CSSParser from cssutils import CSSParser
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer, from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
pyqtSlot) pyqtSlot)
@ -101,6 +102,31 @@ def get_matching_rules(rules, font):
return m return m
return [] return []
def parse_font_families(parser, raw):
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x:
yield x
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
ans = []
for font_dict, text, pseudo in pseudo_element_font_usage:
text = text.strip()
if pseudo == 'first-letter':
prefix = first_letter_pat.match(text)
if prefix is not None:
text = prefix + text[len(prefix):].lstrip()[:1]
else:
text = text[:1]
if text:
font = font_dict.copy()
font['text'] = text
font['font-family'] = list(parse_font_families(parser, font['font-family']))
ans.append(font)
return ans
class Page(QWebPage): # {{{ class Page(QWebPage): # {{{
def __init__(self, log): def __init__(self, log):
@ -164,6 +190,7 @@ class StatsCollector(object):
self.do_embed = do_embed self.do_embed = do_embed
must_use_qt() must_use_qt()
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
self.loop = QEventLoop() self.loop = QEventLoop()
self.view = QWebView() self.view = QWebView()
@ -186,6 +213,14 @@ class StatsCollector(object):
if self.loop.exec_() == 1: if self.loop.exec_() == 1:
raise Exception('Failed to gather statistics from book, see log for details') raise Exception('Failed to gather statistics from book, see log for details')
def log_exception(self, *args):
orig = self.log.filter_level
try:
self.log.filter_level = self.log.DEBUG
self.log.exception(*args)
finally:
self.log.filter_level = orig
def render_book(self): def render_book(self):
try: try:
if not self.render_queue: if not self.render_queue:
@ -193,7 +228,7 @@ class StatsCollector(object):
else: else:
self.render_next() self.render_next()
except: except:
self.logger.exception('Rendering failed') self.log_exception('Rendering failed')
self.loop.exit(1) self.loop.exit(1)
def render_next(self): def render_next(self):
@ -210,7 +245,7 @@ class StatsCollector(object):
self.page.load_js() self.page.load_js()
self.collect_font_stats() self.collect_font_stats()
except: except:
self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
self.loop.exit(1) self.loop.exit(1)
return return
@ -283,6 +318,11 @@ class StatsCollector(object):
font_usage = self.page.bridge_value font_usage = self.page.bridge_value
if not isinstance(font_usage, list): if not isinstance(font_usage, list):
raise Exception('Unknown error occurred while reading font usage') raise Exception('Unknown error occurred while reading font usage')
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
pseudo_element_font_usage = self.page.bridge_value
if not isinstance(pseudo_element_font_usage, list):
raise Exception('Unknown error occurred while reading pseudo element font usage')
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
exclude = {'\n', '\r', '\t'} exclude = {'\n', '\r', '\t'}
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
@ -314,11 +354,11 @@ class StatsCollector(object):
if not isinstance(font_families, dict): if not isinstance(font_families, dict):
raise Exception('Unknown error occurred while reading font families') raise Exception('Unknown error occurred while reading font families')
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
for font_dict, text, pseudo in pseudo_element_font_usage:
font_families[font_dict['font-family']] = True
for raw in font_families.iterkeys(): for raw in font_families.iterkeys():
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') for x in parse_font_families(self.parser, raw):
for x in style.propertyValue: if x.lower() not in bad_fonts:
x = x.value
if x and x.lower() not in bad_fonts:
fs.add(x) fs.add(x)
if __name__ == '__main__': if __name__ == '__main__':
@ -327,5 +367,3 @@ if __name__ == '__main__':
default_log.filter_level = default_log.DEBUG default_log.filter_level = default_log.DEBUG
ebook = get_container(sys.argv[-1], default_log) ebook = get_container(sys.argv[-1], default_log)
print (StatsCollector(ebook, do_embed=True).font_stats) print (StatsCollector(ebook, do_embed=True).font_stats)