Edit book/Book polishing: When embedding fonts or subsetting embedded fonts, handle the :first-letter and :first-line pseudo-element style rules. Fixes #1373649 [Book Editor - Font Subsetting does not recognize pseudo-element declarations](https://bugs.launchpad.net/calibre/+bug/1373649)

This commit is contained in:
Kovid Goyal 2014-10-02 12:00:45 +05:30
parent 06c4255b4a
commit 0f660259d3
3 changed files with 75 additions and 8 deletions

Binary file not shown.

View File

@ -50,6 +50,29 @@ process_font_face_rule = (rule, font_faces) ->
fd['src'] = rule.style.getPropertyValue('src')
font_faces.push(fd)
fl_pat = /:{1,2}(first-letter|first-line)/i
process_sheet_for_pseudo = (sheet, rules) ->
for rule in sheet.cssRules
if rule.type == rule.STYLE_RULE
st = rule.selectorText
m = fl_pat.exec(st)
if m
pseudo = m[1].toLowerCase()
ff = rule.style.getPropertyValue('font-family')
if ff
process_style_rule(st, rule.style, rules, pseudo)
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
process_sheet_for_pseudo(rule.styleSheet, rules)
process_style_rule = (selector_text, style, rules, pseudo) ->
selector_text = selector_text.replace(fl_pat, '')
fd = font_dict(style)
for element in document.querySelectorAll(selector_text)
text = element.innerText
if text
rules.push([fd, text, pseudo])
class FontStats
# This class is a namespace to expose functions via the
# window.font_stats object.
@ -75,6 +98,12 @@ class FontStats
ans.push(usage)
py_bridge.value = ans
get_pseudo_element_font_usage: () ->
ans = []
for sheet in document.styleSheets
process_sheet_for_pseudo(sheet, ans)
py_bridge.value = ans
get_font_families: () ->
ans = {}
for node in document.getElementsByTagName('*')

View File

@ -11,6 +11,7 @@ import json, sys, os, logging
from urllib import unquote
from collections import defaultdict
import regex
from cssutils import CSSParser
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
pyqtSlot)
@ -101,6 +102,31 @@ def get_matching_rules(rules, font):
return m
return []
def parse_font_families(parser, raw):
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x:
yield x
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
ans = []
for font_dict, text, pseudo in pseudo_element_font_usage:
text = text.strip()
if pseudo == 'first-letter':
prefix = first_letter_pat.match(text)
if prefix is not None:
text = prefix + text[len(prefix):].lstrip()[:1]
else:
text = text[:1]
if text:
font = font_dict.copy()
font['text'] = text
font['font-family'] = list(parse_font_families(parser, font['font-family']))
ans.append(font)
return ans
class Page(QWebPage): # {{{
def __init__(self, log):
@ -164,6 +190,7 @@ class StatsCollector(object):
self.do_embed = do_embed
must_use_qt()
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
self.loop = QEventLoop()
self.view = QWebView()
@ -186,6 +213,14 @@ class StatsCollector(object):
if self.loop.exec_() == 1:
raise Exception('Failed to gather statistics from book, see log for details')
def log_exception(self, *args):
orig = self.log.filter_level
try:
self.log.filter_level = self.log.DEBUG
self.log.exception(*args)
finally:
self.log.filter_level = orig
def render_book(self):
try:
if not self.render_queue:
@ -193,7 +228,7 @@ class StatsCollector(object):
else:
self.render_next()
except:
self.logger.exception('Rendering failed')
self.log_exception('Rendering failed')
self.loop.exit(1)
def render_next(self):
@ -210,7 +245,7 @@ class StatsCollector(object):
self.page.load_js()
self.collect_font_stats()
except:
self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
self.loop.exit(1)
return
@ -283,6 +318,11 @@ class StatsCollector(object):
font_usage = self.page.bridge_value
if not isinstance(font_usage, list):
raise Exception('Unknown error occurred while reading font usage')
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
pseudo_element_font_usage = self.page.bridge_value
if not isinstance(pseudo_element_font_usage, list):
raise Exception('Unknown error occurred while reading pseudo element font usage')
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
exclude = {'\n', '\r', '\t'}
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
@ -314,11 +354,11 @@ class StatsCollector(object):
if not isinstance(font_families, dict):
raise Exception('Unknown error occurred while reading font families')
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
for font_dict, text, pseudo in pseudo_element_font_usage:
font_families[font_dict['font-family']] = True
for raw in font_families.iterkeys():
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x and x.lower() not in bad_fonts:
for x in parse_font_families(self.parser, raw):
if x.lower() not in bad_fonts:
fs.add(x)
if __name__ == '__main__':
@ -327,5 +367,3 @@ if __name__ == '__main__':
default_log.filter_level = default_log.DEBUG
ebook = get_container(sys.argv[-1], default_log)
print (StatsCollector(ebook, do_embed=True).font_stats)