Polish: Add option to embed referenced fonts

Book polishing: Add option to embed all referenced fonts when polishing
books using the "Polish Books" tool. Fixes #1196038 [[enhancement] embed font without conversion](https://bugs.launchpad.net/calibre/+bug/1196038)
This commit is contained in:
Kovid Goyal 2013-07-01 15:08:54 +05:30
parent 59346348c5
commit 9952abad4a
6 changed files with 245 additions and 15 deletions

Binary file not shown.

View File

@ -0,0 +1,158 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys
from lxml import etree
from calibre import prints
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.polish.stats import normalize_font_properties
from calibre.utils.filenames import ascii_filename
props = {'font-family':None, 'font-weight':'normal', 'font-style':'normal', 'font-stretch':'normal'}
def matching_rule(font, rules):
ff = font['font-family']
if not isinstance(ff, basestring):
ff = tuple(ff)[0]
family = icu_lower(ff)
wt = font['font-weight']
style = font['font-style']
stretch = font['font-stretch']
for rule in rules:
if rule['font-style'] == style and rule['font-stretch'] == stretch and rule['font-weight'] == wt:
ff = rule['font-family']
if not isinstance(ff, basestring):
ff = tuple(ff)[0]
if icu_lower(ff) == family:
return rule
def embed_font(container, font, all_font_rules, report, warned):
rule = matching_rule(font, all_font_rules)
ff = font['font-family']
if not isinstance(ff, basestring):
ff = ff[0]
if rule is None:
from calibre.utils.fonts.scanner import font_scanner, NoFonts
if ff in warned:
return
try:
fonts = font_scanner.fonts_for_family(ff)
except NoFonts:
report(_('Failed to find fonts for family: %s, not embedding') % ff)
warned.add(ff)
return
wt = int(font.get('font-weight', '400'))
for f in fonts:
if f['weight'] == wt and f['font-style'] == font.get('font-style', 'normal') and f['font-stretch'] == font.get('font-stretch', 'normal'):
report('Embedding font %s from %s' % (f['full_name'], f['path']))
data = font_scanner.get_font_data(f)
fname = f['full_name']
ext = 'otf' if f['is_otf'] else 'ttf'
fname = ascii_filename(fname).replace(' ', '-').replace('(', '').replace(')', '')
item = container.generate_item('fonts/%s.%s'%(fname, ext), id_prefix='font')
name = container.href_to_name(item.get('href'), container.opf_name)
with container.open(name, 'wb') as out:
out.write(data)
href = container.name_to_href(name)
rule = {k:f.get(k, v) for k, v in props.iteritems()}
rule['src'] = 'url(%s)' % href
rule['name'] = name
return rule
msg = _('Failed to find font matching: family: %s; weight: %s; style: %s; stretch: %s') % (
ff, font['font-weight'], font['font-style'], font['font-stretch'])
if msg not in warned:
warned.add(msg)
report(msg)
else:
name = rule['src']
href = container.name_to_href(name)
rule = {k:ff if k == 'font-family' else rule.get(k, v) for k, v in props.iteritems()}
rule['src'] = 'url(%s)' % href
rule['name'] = name
return rule
def embed_all_fonts(container, stats, report):
all_font_rules = tuple(stats.all_font_rules.itervalues())
warned = set()
rules, nrules = [], []
modified = set()
for path in container.spine_items:
name = container.abspath_to_name(path)
fu = stats.font_usage_map.get(name, None)
fs = stats.font_spec_map.get(name, None)
fr = stats.font_rule_map.get(name, None)
if None in (fs, fu, fr):
continue
fs = {icu_lower(x) for x in fs}
for font in fu.itervalues():
if icu_lower(font['font-family']) not in fs:
continue
rule = matching_rule(font, fr)
if rule is None:
# This font was not already embedded in this HTML file, before
# processing started
rule = matching_rule(font, nrules)
if rule is None:
rule = embed_font(container, font, all_font_rules, report, warned)
if rule is not None:
rules.append(rule)
nrules.append(normalize_font_properties(rule.copy()))
modified.add(name)
stats.font_stats[rule['name']] = font['text']
else:
# This font was previously embedded by this code, update its stats
stats.font_stats[rule['name']] |= font['text']
modified.add(name)
if not rules:
report(_('No embeddable fonts found'))
return
# Write out CSS
rules = [';\n\t'.join('%s: %s' % (
k, '"%s"' % v if k == 'font-family' else v) for k, v in rule.iteritems() if (k in props and props[k] != v and v != '400') or k == 'src')
for rule in rules]
css = '\n\n'.join(['@font-face {\n\t%s\n}' % r for r in rules])
item = container.generate_item('fonts.css', id_prefix='font_embed')
name = container.href_to_name(item.get('href'), container.opf_name)
with container.open(name, 'wb') as out:
out.write(css.encode('utf-8'))
# Add link to CSS in all files that need it
for spine_name in modified:
root = container.parsed(spine_name)
head = root.xpath('//*[local-name()="head"][1]')[0]
href = container.name_to_href(name, spine_name)
etree.SubElement(head, XHTML('link'), rel='stylesheet', type='text/css', href=href).tail = '\n'
container.dirty(spine_name)
if __name__ == '__main__':
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
inbook = sys.argv[-1]
ebook = get_container(inbook, default_log)
report = []
stats = StatsCollector(ebook, do_embed=True)
embed_all_fonts(ebook, stats, report.append)
outbook, ext = inbook.rpartition('.')[0::2]
outbook += '_subset.'+ext
ebook.commit(outbook)
prints('\nReport:')
for msg in report:
prints(msg)
print()
prints('Output written to:', outbook)

View File

@ -67,6 +67,18 @@ class FontStats
ans.push(usage)
py_bridge.value = ans
get_font_families: () ->
ans = {}
for node in document.getElementsByTagName('*')
rules = document.defaultView.getMatchedCSSRules(node, '')
if rules
for rule in rules
style = rule.style
family = style.getPropertyValue('font-family')
if family
ans[family] = true
py_bridge.value = ans
if window?
window.font_stats = new FontStats()

View File

@ -14,6 +14,7 @@ from functools import partial
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import subset_all_fonts
from calibre.ebooks.oeb.polish.embed import embed_all_fonts
from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.replace import smarten_punctuation
from calibre.ebooks.oeb.polish.jacket import (
@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import (
from calibre.utils.logging import Log
ALL_OPTS = {
'embed': False,
'subset': False,
'opf': None,
'cover': None,
@ -47,6 +49,12 @@ changes needed for the desired effect.</p>
<p>Note that polishing only works on files in the %s formats.</p>\
''')%_(' or ').join('<b>%s</b>'%x for x in SUPPORTED),
'embed': _('''\
<p>Embed all fonts that are referenced in the document and are not already embedded.
This will scan your computer for the fonts, and if they are found, they will be
embedded into the document.</p>
'''),
'subset': _('''\
<p>Subsetting fonts means reducing an embedded font to contain
only the characters used from that font in the book. This
@ -118,8 +126,8 @@ def polish(file_map, opts, log, report):
ebook = get_container(inbook, log)
jacket = None
if opts.subset:
stats = StatsCollector(ebook)
if opts.subset or opts.embed:
stats = StatsCollector(ebook, do_embed=opts.embed)
if opts.opf:
rt(_('Updating metadata'))
@ -159,6 +167,11 @@ def polish(file_map, opts, log, report):
smarten_punctuation(ebook, report)
report('')
if opts.embed:
rt(_('Embedding referenced fonts'))
embed_all_fonts(ebook, stats, report)
report('')
if opts.subset:
rt(_('Subsetting embedded fonts'))
subset_all_fonts(ebook, stats.font_stats, report)
@ -197,6 +210,7 @@ def option_parser():
parser = OptionParser(usage=USAGE)
a = parser.add_option
o = partial(a, default=False, action='store_true')
o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed'])
o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
a('--cover', '-c', help=_(
'Path to a cover image. Changes the cover specified in the ebook. '

View File

@ -7,10 +7,11 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import json, sys, os
import json, sys, os, logging
from urllib import unquote
from collections import defaultdict
from cssutils import parseStyle
from cssutils import CSSParser
from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer,
pyqtSlot)
from PyQt4.QtWebKit import QWebPage, QWebView
@ -41,14 +42,14 @@ def normalize_font_properties(font):
'extra-expanded', 'ultra-expanded'}:
val = 'normal'
font['font-stretch'] = val
return font
widths = {x:i for i, x in enumerate(( 'ultra-condensed',
widths = {x:i for i, x in enumerate(('ultra-condensed',
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded'
))}
def get_matching_rules(rules, font):
normalize_font_properties(font)
matches = []
# Filter on family
@ -100,7 +101,7 @@ def get_matching_rules(rules, font):
return m
return []
class Page(QWebPage): # {{{
class Page(QWebPage): # {{{
def __init__(self, log):
self.log = log
@ -157,10 +158,12 @@ class Page(QWebPage): # {{{
class StatsCollector(object):
def __init__(self, container):
def __init__(self, container, do_embed=False):
self.container = container
self.log = self.logger = container.log
self.do_embed = do_embed
must_use_qt()
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.loop = QEventLoop()
self.view = QWebView()
@ -173,6 +176,10 @@ class StatsCollector(object):
self.render_queue = list(container.spine_items)
self.font_stats = {}
self.font_usage_map = {}
self.font_spec_map = {}
self.font_rule_map = {}
self.all_font_rules = {}
QTimer.singleShot(0, self.render_book)
@ -235,27 +242,35 @@ class StatsCollector(object):
rules = []
for rule in font_face_rules:
ff = rule.get('font-family', None)
if not ff: continue
style = parseStyle('font-family:%s'%ff, validate=False)
if not ff:
continue
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
ff = [x.value for x in
style.getProperty('font-family').propertyValue]
if not ff or ff[0] == 'inherit':
continue
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
src = rule.get('src', None)
if not src: continue
style = parseStyle('background-image:%s'%src, validate=False)
if not src:
continue
style = self.parser.parseStyle('background-image:%s'%src, validate=False)
src = style.getProperty('background-image').propertyValue[0].uri
name = self.href_to_name(src, '@font-face rule')
if name is None:
continue
rule['src'] = name
normalize_font_properties(rule)
rule['width'] = widths[rule['font-stretch']]
rule['weight'] = int(rule['font-weight'])
rules.append(rule)
if not rules:
if not rules and not self.do_embed:
return
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
for rule in rules:
self.all_font_rules[rule['src']] = rule
for rule in rules:
if rule['src'] not in self.font_stats:
self.font_stats[rule['src']] = set()
@ -265,19 +280,48 @@ class StatsCollector(object):
if not isinstance(font_usage, list):
raise Exception('Unknown error occurred while reading font usage')
exclude = {'\n', '\r', '\t'}
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
for font in font_usage:
text = set()
for t in font['text']:
text |= frozenset(t)
text.difference_update(exclude)
if not text: continue
if not text:
continue
normalize_font_properties(font)
for rule in get_matching_rules(rules, font):
self.font_stats[rule['src']] |= text
if self.do_embed:
ff = [icu_lower(x) for x in font.get('font-family', [])]
if ff and ff[0] not in bad_fonts:
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
val = fu[key]
if not val:
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
val['text'] = set()
val['text'] |= text
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
if self.do_embed:
self.page.evaljs('window.font_stats.get_font_families()')
font_families = self.page.bridge_value
if not isinstance(font_families, dict):
raise Exception('Unknown error occurred while reading font families')
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
for raw in font_families.iterkeys():
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x and x.lower() not in bad_fonts:
fs.add(x)
if __name__ == '__main__':
from calibre.ebooks.oeb.polish.container import get_container
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
ebook = get_container(sys.argv[-1], default_log)
print (StatsCollector(ebook).font_stats)
print (StatsCollector(ebook, do_embed=True).font_stats)

View File

@ -45,6 +45,7 @@ class Polish(QDialog): # {{{
ORIGINAL_* format before running it.</p>''')
),
'embed':_('<h3>Embed referenced fonts</h3>%s')%HELP['embed'],
'subset':_('<h3>Subsetting fonts</h3>%s')%HELP['subset'],
'smarten_punctuation':
@ -75,6 +76,7 @@ class Polish(QDialog): # {{{
count = 0
self.all_actions = OrderedDict([
('embed', _('&Embed all referenced fonts')),
('subset', _('&Subset all embedded fonts')),
('smarten_punctuation', _('Smarten &punctuation')),
('metadata', _('Update &metadata in the book files')),