mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book/Book polishing: When embedding fonts or subsetting embedded fonts, handle the :first-letter and :first-line pseudo-element style rules. Fixes #1373649 [Book Editor - Font Subsetting does not recognize pseudo-element declarations](https://bugs.launchpad.net/calibre/+bug/1373649)
This commit is contained in:
parent
06c4255b4a
commit
0f660259d3
Binary file not shown.
@ -50,6 +50,29 @@ process_font_face_rule = (rule, font_faces) ->
|
||||
fd['src'] = rule.style.getPropertyValue('src')
|
||||
font_faces.push(fd)
|
||||
|
||||
fl_pat = /:{1,2}(first-letter|first-line)/i
|
||||
|
||||
process_sheet_for_pseudo = (sheet, rules) ->
|
||||
for rule in sheet.cssRules
|
||||
if rule.type == rule.STYLE_RULE
|
||||
st = rule.selectorText
|
||||
m = fl_pat.exec(st)
|
||||
if m
|
||||
pseudo = m[1].toLowerCase()
|
||||
ff = rule.style.getPropertyValue('font-family')
|
||||
if ff
|
||||
process_style_rule(st, rule.style, rules, pseudo)
|
||||
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
||||
process_sheet_for_pseudo(rule.styleSheet, rules)
|
||||
|
||||
process_style_rule = (selector_text, style, rules, pseudo) ->
|
||||
selector_text = selector_text.replace(fl_pat, '')
|
||||
fd = font_dict(style)
|
||||
for element in document.querySelectorAll(selector_text)
|
||||
text = element.innerText
|
||||
if text
|
||||
rules.push([fd, text, pseudo])
|
||||
|
||||
class FontStats
|
||||
# This class is a namespace to expose functions via the
|
||||
# window.font_stats object.
|
||||
@ -75,6 +98,12 @@ class FontStats
|
||||
ans.push(usage)
|
||||
py_bridge.value = ans
|
||||
|
||||
get_pseudo_element_font_usage: () ->
|
||||
ans = []
|
||||
for sheet in document.styleSheets
|
||||
process_sheet_for_pseudo(sheet, ans)
|
||||
py_bridge.value = ans
|
||||
|
||||
get_font_families: () ->
|
||||
ans = {}
|
||||
for node in document.getElementsByTagName('*')
|
||||
|
@ -11,6 +11,7 @@ import json, sys, os, logging
|
||||
from urllib import unquote
|
||||
from collections import defaultdict
|
||||
|
||||
import regex
|
||||
from cssutils import CSSParser
|
||||
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
|
||||
pyqtSlot)
|
||||
@ -101,6 +102,31 @@ def get_matching_rules(rules, font):
|
||||
return m
|
||||
return []
|
||||
|
||||
def parse_font_families(parser, raw):
|
||||
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
||||
for x in style.propertyValue:
|
||||
x = x.value
|
||||
if x:
|
||||
yield x
|
||||
|
||||
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
|
||||
ans = []
|
||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||
text = text.strip()
|
||||
if pseudo == 'first-letter':
|
||||
prefix = first_letter_pat.match(text)
|
||||
if prefix is not None:
|
||||
text = prefix + text[len(prefix):].lstrip()[:1]
|
||||
else:
|
||||
text = text[:1]
|
||||
if text:
|
||||
font = font_dict.copy()
|
||||
font['text'] = text
|
||||
font['font-family'] = list(parse_font_families(parser, font['font-family']))
|
||||
ans.append(font)
|
||||
|
||||
return ans
|
||||
|
||||
class Page(QWebPage): # {{{
|
||||
|
||||
def __init__(self, log):
|
||||
@ -164,6 +190,7 @@ class StatsCollector(object):
|
||||
self.do_embed = do_embed
|
||||
must_use_qt()
|
||||
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
||||
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
||||
|
||||
self.loop = QEventLoop()
|
||||
self.view = QWebView()
|
||||
@ -186,6 +213,14 @@ class StatsCollector(object):
|
||||
if self.loop.exec_() == 1:
|
||||
raise Exception('Failed to gather statistics from book, see log for details')
|
||||
|
||||
def log_exception(self, *args):
|
||||
orig = self.log.filter_level
|
||||
try:
|
||||
self.log.filter_level = self.log.DEBUG
|
||||
self.log.exception(*args)
|
||||
finally:
|
||||
self.log.filter_level = orig
|
||||
|
||||
def render_book(self):
|
||||
try:
|
||||
if not self.render_queue:
|
||||
@ -193,7 +228,7 @@ class StatsCollector(object):
|
||||
else:
|
||||
self.render_next()
|
||||
except:
|
||||
self.logger.exception('Rendering failed')
|
||||
self.log_exception('Rendering failed')
|
||||
self.loop.exit(1)
|
||||
|
||||
def render_next(self):
|
||||
@ -210,7 +245,7 @@ class StatsCollector(object):
|
||||
self.page.load_js()
|
||||
self.collect_font_stats()
|
||||
except:
|
||||
self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
||||
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
||||
self.loop.exit(1)
|
||||
return
|
||||
|
||||
@ -283,6 +318,11 @@ class StatsCollector(object):
|
||||
font_usage = self.page.bridge_value
|
||||
if not isinstance(font_usage, list):
|
||||
raise Exception('Unknown error occurred while reading font usage')
|
||||
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
|
||||
pseudo_element_font_usage = self.page.bridge_value
|
||||
if not isinstance(pseudo_element_font_usage, list):
|
||||
raise Exception('Unknown error occurred while reading pseudo element font usage')
|
||||
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
|
||||
exclude = {'\n', '\r', '\t'}
|
||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||
@ -314,11 +354,11 @@ class StatsCollector(object):
|
||||
if not isinstance(font_families, dict):
|
||||
raise Exception('Unknown error occurred while reading font families')
|
||||
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
||||
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||
font_families[font_dict['font-family']] = True
|
||||
for raw in font_families.iterkeys():
|
||||
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
||||
for x in style.propertyValue:
|
||||
x = x.value
|
||||
if x and x.lower() not in bad_fonts:
|
||||
for x in parse_font_families(self.parser, raw):
|
||||
if x.lower() not in bad_fonts:
|
||||
fs.add(x)
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -327,5 +367,3 @@ if __name__ == '__main__':
|
||||
default_log.filter_level = default_log.DEBUG
|
||||
ebook = get_container(sys.argv[-1], default_log)
|
||||
print (StatsCollector(ebook, do_embed=True).font_stats)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user