mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book/Book polishing: When embedding fonts or subsetting embedded fonts, handle the :first-letter and :first-line pseudo-element style rules. Fixes #1373649 [Book Editor - Font Subsetting does not recognize pseudo-element declarations](https://bugs.launchpad.net/calibre/+bug/1373649)
This commit is contained in:
parent
06c4255b4a
commit
0f660259d3
Binary file not shown.
@ -50,6 +50,29 @@ process_font_face_rule = (rule, font_faces) ->
|
|||||||
fd['src'] = rule.style.getPropertyValue('src')
|
fd['src'] = rule.style.getPropertyValue('src')
|
||||||
font_faces.push(fd)
|
font_faces.push(fd)
|
||||||
|
|
||||||
|
fl_pat = /:{1,2}(first-letter|first-line)/i
|
||||||
|
|
||||||
|
process_sheet_for_pseudo = (sheet, rules) ->
|
||||||
|
for rule in sheet.cssRules
|
||||||
|
if rule.type == rule.STYLE_RULE
|
||||||
|
st = rule.selectorText
|
||||||
|
m = fl_pat.exec(st)
|
||||||
|
if m
|
||||||
|
pseudo = m[1].toLowerCase()
|
||||||
|
ff = rule.style.getPropertyValue('font-family')
|
||||||
|
if ff
|
||||||
|
process_style_rule(st, rule.style, rules, pseudo)
|
||||||
|
else if rule.type == rule.IMPORT_RULE and rule.styleSheet
|
||||||
|
process_sheet_for_pseudo(rule.styleSheet, rules)
|
||||||
|
|
||||||
|
process_style_rule = (selector_text, style, rules, pseudo) ->
|
||||||
|
selector_text = selector_text.replace(fl_pat, '')
|
||||||
|
fd = font_dict(style)
|
||||||
|
for element in document.querySelectorAll(selector_text)
|
||||||
|
text = element.innerText
|
||||||
|
if text
|
||||||
|
rules.push([fd, text, pseudo])
|
||||||
|
|
||||||
class FontStats
|
class FontStats
|
||||||
# This class is a namespace to expose functions via the
|
# This class is a namespace to expose functions via the
|
||||||
# window.font_stats object.
|
# window.font_stats object.
|
||||||
@ -75,6 +98,12 @@ class FontStats
|
|||||||
ans.push(usage)
|
ans.push(usage)
|
||||||
py_bridge.value = ans
|
py_bridge.value = ans
|
||||||
|
|
||||||
|
get_pseudo_element_font_usage: () ->
|
||||||
|
ans = []
|
||||||
|
for sheet in document.styleSheets
|
||||||
|
process_sheet_for_pseudo(sheet, ans)
|
||||||
|
py_bridge.value = ans
|
||||||
|
|
||||||
get_font_families: () ->
|
get_font_families: () ->
|
||||||
ans = {}
|
ans = {}
|
||||||
for node in document.getElementsByTagName('*')
|
for node in document.getElementsByTagName('*')
|
||||||
|
@ -11,6 +11,7 @@ import json, sys, os, logging
|
|||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import regex
|
||||||
from cssutils import CSSParser
|
from cssutils import CSSParser
|
||||||
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
|
from PyQt5.Qt import (pyqtProperty, QEventLoop, Qt, QSize, QTimer,
|
||||||
pyqtSlot)
|
pyqtSlot)
|
||||||
@ -101,6 +102,31 @@ def get_matching_rules(rules, font):
|
|||||||
return m
|
return m
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def parse_font_families(parser, raw):
|
||||||
|
style = parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
||||||
|
for x in style.propertyValue:
|
||||||
|
x = x.value
|
||||||
|
if x:
|
||||||
|
yield x
|
||||||
|
|
||||||
|
def get_pseudo_element_font_usage(pseudo_element_font_usage, first_letter_pat, parser):
|
||||||
|
ans = []
|
||||||
|
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||||
|
text = text.strip()
|
||||||
|
if pseudo == 'first-letter':
|
||||||
|
prefix = first_letter_pat.match(text)
|
||||||
|
if prefix is not None:
|
||||||
|
text = prefix + text[len(prefix):].lstrip()[:1]
|
||||||
|
else:
|
||||||
|
text = text[:1]
|
||||||
|
if text:
|
||||||
|
font = font_dict.copy()
|
||||||
|
font['text'] = text
|
||||||
|
font['font-family'] = list(parse_font_families(parser, font['font-family']))
|
||||||
|
ans.append(font)
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
class Page(QWebPage): # {{{
|
class Page(QWebPage): # {{{
|
||||||
|
|
||||||
def __init__(self, log):
|
def __init__(self, log):
|
||||||
@ -164,6 +190,7 @@ class StatsCollector(object):
|
|||||||
self.do_embed = do_embed
|
self.do_embed = do_embed
|
||||||
must_use_qt()
|
must_use_qt()
|
||||||
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
||||||
|
self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
|
||||||
|
|
||||||
self.loop = QEventLoop()
|
self.loop = QEventLoop()
|
||||||
self.view = QWebView()
|
self.view = QWebView()
|
||||||
@ -186,6 +213,14 @@ class StatsCollector(object):
|
|||||||
if self.loop.exec_() == 1:
|
if self.loop.exec_() == 1:
|
||||||
raise Exception('Failed to gather statistics from book, see log for details')
|
raise Exception('Failed to gather statistics from book, see log for details')
|
||||||
|
|
||||||
|
def log_exception(self, *args):
|
||||||
|
orig = self.log.filter_level
|
||||||
|
try:
|
||||||
|
self.log.filter_level = self.log.DEBUG
|
||||||
|
self.log.exception(*args)
|
||||||
|
finally:
|
||||||
|
self.log.filter_level = orig
|
||||||
|
|
||||||
def render_book(self):
|
def render_book(self):
|
||||||
try:
|
try:
|
||||||
if not self.render_queue:
|
if not self.render_queue:
|
||||||
@ -193,7 +228,7 @@ class StatsCollector(object):
|
|||||||
else:
|
else:
|
||||||
self.render_next()
|
self.render_next()
|
||||||
except:
|
except:
|
||||||
self.logger.exception('Rendering failed')
|
self.log_exception('Rendering failed')
|
||||||
self.loop.exit(1)
|
self.loop.exit(1)
|
||||||
|
|
||||||
def render_next(self):
|
def render_next(self):
|
||||||
@ -210,7 +245,7 @@ class StatsCollector(object):
|
|||||||
self.page.load_js()
|
self.page.load_js()
|
||||||
self.collect_font_stats()
|
self.collect_font_stats()
|
||||||
except:
|
except:
|
||||||
self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
|
||||||
self.loop.exit(1)
|
self.loop.exit(1)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -283,6 +318,11 @@ class StatsCollector(object):
|
|||||||
font_usage = self.page.bridge_value
|
font_usage = self.page.bridge_value
|
||||||
if not isinstance(font_usage, list):
|
if not isinstance(font_usage, list):
|
||||||
raise Exception('Unknown error occurred while reading font usage')
|
raise Exception('Unknown error occurred while reading font usage')
|
||||||
|
self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
|
||||||
|
pseudo_element_font_usage = self.page.bridge_value
|
||||||
|
if not isinstance(pseudo_element_font_usage, list):
|
||||||
|
raise Exception('Unknown error occurred while reading pseudo element font usage')
|
||||||
|
font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
|
||||||
exclude = {'\n', '\r', '\t'}
|
exclude = {'\n', '\r', '\t'}
|
||||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
||||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||||
@ -314,11 +354,11 @@ class StatsCollector(object):
|
|||||||
if not isinstance(font_families, dict):
|
if not isinstance(font_families, dict):
|
||||||
raise Exception('Unknown error occurred while reading font families')
|
raise Exception('Unknown error occurred while reading font families')
|
||||||
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
||||||
|
for font_dict, text, pseudo in pseudo_element_font_usage:
|
||||||
|
font_families[font_dict['font-family']] = True
|
||||||
for raw in font_families.iterkeys():
|
for raw in font_families.iterkeys():
|
||||||
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
for x in parse_font_families(self.parser, raw):
|
||||||
for x in style.propertyValue:
|
if x.lower() not in bad_fonts:
|
||||||
x = x.value
|
|
||||||
if x and x.lower() not in bad_fonts:
|
|
||||||
fs.add(x)
|
fs.add(x)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@ -327,5 +367,3 @@ if __name__ == '__main__':
|
|||||||
default_log.filter_level = default_log.DEBUG
|
default_log.filter_level = default_log.DEBUG
|
||||||
ebook = get_container(sys.argv[-1], default_log)
|
ebook = get_container(sys.argv[-1], default_log)
|
||||||
print (StatsCollector(ebook, do_embed=True).font_stats)
|
print (StatsCollector(ebook, do_embed=True).font_stats)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user