From 9952abad4aa8a973a5ed06a3a9575ecda1a446e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 15:08:54 +0530 Subject: [PATCH] Polish: Add option to embed referenced fonts Book polishing: Add option to embed all referenced fonts when polishing books using the "Polish Books" tool. Fixes #1196038 [[enhancement] embed font without conversion](https://bugs.launchpad.net/calibre/+bug/1196038) --- resources/compiled_coffeescript.zip | Bin 71177 -> 71881 bytes src/calibre/ebooks/oeb/polish/embed.py | 158 ++++++++++++++++++ .../ebooks/oeb/polish/font_stats.coffee | 12 ++ src/calibre/ebooks/oeb/polish/main.py | 18 +- src/calibre/ebooks/oeb/polish/stats.py | 70 ++++++-- src/calibre/gui2/actions/polish.py | 2 + 6 files changed, 245 insertions(+), 15 deletions(-) create mode 100644 src/calibre/ebooks/oeb/polish/embed.py diff --git a/resources/compiled_coffeescript.zip b/resources/compiled_coffeescript.zip index cb48a58bd2714e22fdeb5e827bef753f51998001..e092b53157f3c30f95712db0b597358b744dc08a 100644 GIT binary patch delta 454 zcmeBN!*X&Xi$s7oGm8iV2+S(D=k%>F=<5+a28JVi+qKvkr*co#KP6R1p{O(mN*3!V6qi)yq)u<(W0dBy=28HI#JtH91>`25 zwG*3se=7H6PAi+~feRRi?{vGRjS#z{MzK1~FPeAu~-u1L7P_g=(mUFuBCM;%JCmEYO;gqSDmK z{)eR||49^{-0+=!^80f3=@SGPg{H3)Vq891w}q#XZz~@o8@n_klL#}mFkxVjpI#u$ zC^Oxfol#_ZurQ+*6C?k02T?}R=~ILmO_^@1KsXPC87-JL=z%$tAINh|Hx~gaHRyy0 zBvf%tZvqMEOoj-=v~Wy64HCG$VET6vMt3HK6(9*$W&=*f>5ig|B9jw-aftxjQrDU#ThxKFBD@mWx6Fgy+E8%boyH{Mhm9(@(_-VIHM7heiex0 z$)w!^5xf8ryftz1_XV=k6(tz8nB-?dB$6Z;4Vd(QK{zWV7 + ans = {} + for node in document.getElementsByTagName('*') + rules = document.defaultView.getMatchedCSSRules(node, '') + if rules + for rule in rules + style = rule.style + family = style.getPropertyValue('font-family') + if family + ans[family] = true + py_bridge.value = ans + if window? window.font_stats = new FontStats() diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 08b5004c91..ff46288643 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -14,6 +14,7 @@ from functools import partial from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.subset import subset_all_fonts +from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.replace import smarten_punctuation from calibre.ebooks.oeb.polish.jacket import ( @@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import ( from calibre.utils.logging import Log ALL_OPTS = { + 'embed': False, 'subset': False, 'opf': None, 'cover': None, @@ -47,6 +49,12 @@ changes needed for the desired effect.

Note that polishing only works on files in the %s formats.

\ ''')%_(' or ').join('%s'%x for x in SUPPORTED), +'embed': _('''\ +

Embed all fonts that are referenced in the document and are not already embedded. +This will scan your computer for the fonts, and if they are found, they will be +embedded into the document.

+'''), + 'subset': _('''\

Subsetting fonts means reducing an embedded font to contain only the characters used from that font in the book. This @@ -118,8 +126,8 @@ def polish(file_map, opts, log, report): ebook = get_container(inbook, log) jacket = None - if opts.subset: - stats = StatsCollector(ebook) + if opts.subset or opts.embed: + stats = StatsCollector(ebook, do_embed=opts.embed) if opts.opf: rt(_('Updating metadata')) @@ -159,6 +167,11 @@ def polish(file_map, opts, log, report): smarten_punctuation(ebook, report) report('') + if opts.embed: + rt(_('Embedding referenced fonts')) + embed_all_fonts(ebook, stats, report) + report('') + if opts.subset: rt(_('Subsetting embedded fonts')) subset_all_fonts(ebook, stats.font_stats, report) @@ -197,6 +210,7 @@ def option_parser(): parser = OptionParser(usage=USAGE) a = parser.add_option o = partial(a, default=False, action='store_true') + o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed']) o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset']) a('--cover', '-c', help=_( 'Path to a cover image. Changes the cover specified in the ebook. ' diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index d4a5c96111..77b99ff9b6 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -7,10 +7,11 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import json, sys, os +import json, sys, os, logging from urllib import unquote +from collections import defaultdict -from cssutils import parseStyle +from cssutils import CSSParser from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer, pyqtSlot) from PyQt4.QtWebKit import QWebPage, QWebView @@ -41,14 +42,14 @@ def normalize_font_properties(font): 'extra-expanded', 'ultra-expanded'}: val = 'normal' font['font-stretch'] = val + return font -widths = {x:i for i, x in enumerate(( 'ultra-condensed', +widths = {x:i for i, x in enumerate(('ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed', 'normal', 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded' ))} def get_matching_rules(rules, font): - normalize_font_properties(font) matches = [] # Filter on family @@ -100,7 +101,7 @@ def get_matching_rules(rules, font): return m return [] -class Page(QWebPage): # {{{ +class Page(QWebPage): # {{{ def __init__(self, log): self.log = log @@ -157,10 +158,12 @@ class Page(QWebPage): # {{{ class StatsCollector(object): - def __init__(self, container): + def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log + self.do_embed = do_embed must_use_qt() + self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() @@ -173,6 +176,10 @@ class StatsCollector(object): self.render_queue = list(container.spine_items) self.font_stats = {} + self.font_usage_map = {} + self.font_spec_map = {} + self.font_rule_map = {} + self.all_font_rules = {} QTimer.singleShot(0, self.render_book) @@ -235,27 +242,35 @@ class StatsCollector(object): rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) - if not ff: continue - style = parseStyle('font-family:%s'%ff, validate=False) + if not ff: + continue + style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) - if not src: continue - style = parseStyle('background-image:%s'%src, validate=False) + if not src: + continue + style = self.parser.parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') + if name is None: + continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) - if not rules: + if not rules and not self.do_embed: return + self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules + for rule in rules: + self.all_font_rules[rule['src']] = rule + for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() @@ -265,19 +280,48 @@ class StatsCollector(object): if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) + bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) - if not text: continue + if not text: + continue + normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text + if self.do_embed: + ff = [icu_lower(x) for x in font.get('font-family', [])] + if ff and ff[0] not in bad_fonts: + keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} + key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) + val = fu[key] + if not val: + val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) + val['text'] = set() + val['text'] |= text + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) + + if self.do_embed: + self.page.evaljs('window.font_stats.get_font_families()') + font_families = self.page.bridge_value + if not isinstance(font_families, dict): + raise Exception('Unknown error occurred while reading font families') + self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() + for raw in font_families.iterkeys(): + style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') + for x in style.propertyValue: + x = x.value + if x and x.lower() not in bad_fonts: + fs.add(x) if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) - print (StatsCollector(ebook).font_stats) + print (StatsCollector(ebook, do_embed=True).font_stats) + diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index eb21fb2626..0f21807afb 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -45,6 +45,7 @@ class Polish(QDialog): # {{{ ORIGINAL_* format before running it.

''') ), + 'embed':_('

Embed referenced fonts

%s')%HELP['embed'], 'subset':_('

Subsetting fonts

%s')%HELP['subset'], 'smarten_punctuation': @@ -75,6 +76,7 @@ class Polish(QDialog): # {{{ count = 0 self.all_actions = OrderedDict([ + ('embed', _('&Embed all referenced fonts')), ('subset', _('&Subset all embedded fonts')), ('smarten_punctuation', _('Smarten &punctuation')), ('metadata', _('Update &metadata in the book files')),