mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Polish: Add option to embed referenced fonts
Book polishing: Add option to embed all referenced fonts when polishing books using the "Polish Books" tool. Fixes #1196038 [[enhancement] embed font without conversion](https://bugs.launchpad.net/calibre/+bug/1196038)
This commit is contained in:
parent
59346348c5
commit
9952abad4a
Binary file not shown.
158
src/calibre/ebooks/oeb/polish/embed.py
Normal file
158
src/calibre/ebooks/oeb/polish/embed.py
Normal file
@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
from calibre.ebooks.oeb.polish.stats import normalize_font_properties
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
props = {'font-family':None, 'font-weight':'normal', 'font-style':'normal', 'font-stretch':'normal'}
|
||||
|
||||
def matching_rule(font, rules):
|
||||
ff = font['font-family']
|
||||
if not isinstance(ff, basestring):
|
||||
ff = tuple(ff)[0]
|
||||
family = icu_lower(ff)
|
||||
wt = font['font-weight']
|
||||
style = font['font-style']
|
||||
stretch = font['font-stretch']
|
||||
|
||||
for rule in rules:
|
||||
if rule['font-style'] == style and rule['font-stretch'] == stretch and rule['font-weight'] == wt:
|
||||
ff = rule['font-family']
|
||||
if not isinstance(ff, basestring):
|
||||
ff = tuple(ff)[0]
|
||||
if icu_lower(ff) == family:
|
||||
return rule
|
||||
|
||||
def embed_font(container, font, all_font_rules, report, warned):
|
||||
rule = matching_rule(font, all_font_rules)
|
||||
ff = font['font-family']
|
||||
if not isinstance(ff, basestring):
|
||||
ff = ff[0]
|
||||
if rule is None:
|
||||
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
||||
if ff in warned:
|
||||
return
|
||||
try:
|
||||
fonts = font_scanner.fonts_for_family(ff)
|
||||
except NoFonts:
|
||||
report(_('Failed to find fonts for family: %s, not embedding') % ff)
|
||||
warned.add(ff)
|
||||
return
|
||||
wt = int(font.get('font-weight', '400'))
|
||||
for f in fonts:
|
||||
if f['weight'] == wt and f['font-style'] == font.get('font-style', 'normal') and f['font-stretch'] == font.get('font-stretch', 'normal'):
|
||||
report('Embedding font %s from %s' % (f['full_name'], f['path']))
|
||||
data = font_scanner.get_font_data(f)
|
||||
fname = f['full_name']
|
||||
ext = 'otf' if f['is_otf'] else 'ttf'
|
||||
fname = ascii_filename(fname).replace(' ', '-').replace('(', '').replace(')', '')
|
||||
item = container.generate_item('fonts/%s.%s'%(fname, ext), id_prefix='font')
|
||||
name = container.href_to_name(item.get('href'), container.opf_name)
|
||||
with container.open(name, 'wb') as out:
|
||||
out.write(data)
|
||||
href = container.name_to_href(name)
|
||||
rule = {k:f.get(k, v) for k, v in props.iteritems()}
|
||||
rule['src'] = 'url(%s)' % href
|
||||
rule['name'] = name
|
||||
return rule
|
||||
msg = _('Failed to find font matching: family: %s; weight: %s; style: %s; stretch: %s') % (
|
||||
ff, font['font-weight'], font['font-style'], font['font-stretch'])
|
||||
if msg not in warned:
|
||||
warned.add(msg)
|
||||
report(msg)
|
||||
else:
|
||||
name = rule['src']
|
||||
href = container.name_to_href(name)
|
||||
rule = {k:ff if k == 'font-family' else rule.get(k, v) for k, v in props.iteritems()}
|
||||
rule['src'] = 'url(%s)' % href
|
||||
rule['name'] = name
|
||||
return rule
|
||||
|
||||
def embed_all_fonts(container, stats, report):
|
||||
all_font_rules = tuple(stats.all_font_rules.itervalues())
|
||||
warned = set()
|
||||
rules, nrules = [], []
|
||||
modified = set()
|
||||
|
||||
for path in container.spine_items:
|
||||
name = container.abspath_to_name(path)
|
||||
fu = stats.font_usage_map.get(name, None)
|
||||
fs = stats.font_spec_map.get(name, None)
|
||||
fr = stats.font_rule_map.get(name, None)
|
||||
if None in (fs, fu, fr):
|
||||
continue
|
||||
fs = {icu_lower(x) for x in fs}
|
||||
for font in fu.itervalues():
|
||||
if icu_lower(font['font-family']) not in fs:
|
||||
continue
|
||||
rule = matching_rule(font, fr)
|
||||
if rule is None:
|
||||
# This font was not already embedded in this HTML file, before
|
||||
# processing started
|
||||
rule = matching_rule(font, nrules)
|
||||
if rule is None:
|
||||
rule = embed_font(container, font, all_font_rules, report, warned)
|
||||
if rule is not None:
|
||||
rules.append(rule)
|
||||
nrules.append(normalize_font_properties(rule.copy()))
|
||||
modified.add(name)
|
||||
stats.font_stats[rule['name']] = font['text']
|
||||
else:
|
||||
# This font was previously embedded by this code, update its stats
|
||||
stats.font_stats[rule['name']] |= font['text']
|
||||
modified.add(name)
|
||||
|
||||
if not rules:
|
||||
report(_('No embeddable fonts found'))
|
||||
return
|
||||
|
||||
# Write out CSS
|
||||
rules = [';\n\t'.join('%s: %s' % (
|
||||
k, '"%s"' % v if k == 'font-family' else v) for k, v in rule.iteritems() if (k in props and props[k] != v and v != '400') or k == 'src')
|
||||
for rule in rules]
|
||||
css = '\n\n'.join(['@font-face {\n\t%s\n}' % r for r in rules])
|
||||
item = container.generate_item('fonts.css', id_prefix='font_embed')
|
||||
name = container.href_to_name(item.get('href'), container.opf_name)
|
||||
with container.open(name, 'wb') as out:
|
||||
out.write(css.encode('utf-8'))
|
||||
|
||||
# Add link to CSS in all files that need it
|
||||
for spine_name in modified:
|
||||
root = container.parsed(spine_name)
|
||||
head = root.xpath('//*[local-name()="head"][1]')[0]
|
||||
href = container.name_to_href(name, spine_name)
|
||||
etree.SubElement(head, XHTML('link'), rel='stylesheet', type='text/css', href=href).tail = '\n'
|
||||
container.dirty(spine_name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.ebooks.oeb.polish.container import get_container
|
||||
from calibre.ebooks.oeb.polish.stats import StatsCollector
|
||||
from calibre.utils.logging import default_log
|
||||
default_log.filter_level = default_log.DEBUG
|
||||
inbook = sys.argv[-1]
|
||||
ebook = get_container(inbook, default_log)
|
||||
report = []
|
||||
stats = StatsCollector(ebook, do_embed=True)
|
||||
embed_all_fonts(ebook, stats, report.append)
|
||||
outbook, ext = inbook.rpartition('.')[0::2]
|
||||
outbook += '_subset.'+ext
|
||||
ebook.commit(outbook)
|
||||
prints('\nReport:')
|
||||
for msg in report:
|
||||
prints(msg)
|
||||
print()
|
||||
prints('Output written to:', outbook)
|
||||
|
@ -67,6 +67,18 @@ class FontStats
|
||||
ans.push(usage)
|
||||
py_bridge.value = ans
|
||||
|
||||
get_font_families: () ->
|
||||
ans = {}
|
||||
for node in document.getElementsByTagName('*')
|
||||
rules = document.defaultView.getMatchedCSSRules(node, '')
|
||||
if rules
|
||||
for rule in rules
|
||||
style = rule.style
|
||||
family = style.getPropertyValue('font-family')
|
||||
if family
|
||||
ans[family] = true
|
||||
py_bridge.value = ans
|
||||
|
||||
if window?
|
||||
window.font_stats = new FontStats()
|
||||
|
||||
|
@ -14,6 +14,7 @@ from functools import partial
|
||||
from calibre.ebooks.oeb.polish.container import get_container
|
||||
from calibre.ebooks.oeb.polish.stats import StatsCollector
|
||||
from calibre.ebooks.oeb.polish.subset import subset_all_fonts
|
||||
from calibre.ebooks.oeb.polish.embed import embed_all_fonts
|
||||
from calibre.ebooks.oeb.polish.cover import set_cover
|
||||
from calibre.ebooks.oeb.polish.replace import smarten_punctuation
|
||||
from calibre.ebooks.oeb.polish.jacket import (
|
||||
@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import (
|
||||
from calibre.utils.logging import Log
|
||||
|
||||
ALL_OPTS = {
|
||||
'embed': False,
|
||||
'subset': False,
|
||||
'opf': None,
|
||||
'cover': None,
|
||||
@ -47,6 +49,12 @@ changes needed for the desired effect.</p>
|
||||
<p>Note that polishing only works on files in the %s formats.</p>\
|
||||
''')%_(' or ').join('<b>%s</b>'%x for x in SUPPORTED),
|
||||
|
||||
'embed': _('''\
|
||||
<p>Embed all fonts that are referenced in the document and are not already embedded.
|
||||
This will scan your computer for the fonts, and if they are found, they will be
|
||||
embedded into the document.</p>
|
||||
'''),
|
||||
|
||||
'subset': _('''\
|
||||
<p>Subsetting fonts means reducing an embedded font to contain
|
||||
only the characters used from that font in the book. This
|
||||
@ -118,8 +126,8 @@ def polish(file_map, opts, log, report):
|
||||
ebook = get_container(inbook, log)
|
||||
jacket = None
|
||||
|
||||
if opts.subset:
|
||||
stats = StatsCollector(ebook)
|
||||
if opts.subset or opts.embed:
|
||||
stats = StatsCollector(ebook, do_embed=opts.embed)
|
||||
|
||||
if opts.opf:
|
||||
rt(_('Updating metadata'))
|
||||
@ -159,6 +167,11 @@ def polish(file_map, opts, log, report):
|
||||
smarten_punctuation(ebook, report)
|
||||
report('')
|
||||
|
||||
if opts.embed:
|
||||
rt(_('Embedding referenced fonts'))
|
||||
embed_all_fonts(ebook, stats, report)
|
||||
report('')
|
||||
|
||||
if opts.subset:
|
||||
rt(_('Subsetting embedded fonts'))
|
||||
subset_all_fonts(ebook, stats.font_stats, report)
|
||||
@ -197,6 +210,7 @@ def option_parser():
|
||||
parser = OptionParser(usage=USAGE)
|
||||
a = parser.add_option
|
||||
o = partial(a, default=False, action='store_true')
|
||||
o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed'])
|
||||
o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
|
||||
a('--cover', '-c', help=_(
|
||||
'Path to a cover image. Changes the cover specified in the ebook. '
|
||||
|
@ -7,10 +7,11 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import json, sys, os
|
||||
import json, sys, os, logging
|
||||
from urllib import unquote
|
||||
from collections import defaultdict
|
||||
|
||||
from cssutils import parseStyle
|
||||
from cssutils import CSSParser
|
||||
from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer,
|
||||
pyqtSlot)
|
||||
from PyQt4.QtWebKit import QWebPage, QWebView
|
||||
@ -41,6 +42,7 @@ def normalize_font_properties(font):
|
||||
'extra-expanded', 'ultra-expanded'}:
|
||||
val = 'normal'
|
||||
font['font-stretch'] = val
|
||||
return font
|
||||
|
||||
widths = {x:i for i, x in enumerate(('ultra-condensed',
|
||||
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
|
||||
@ -48,7 +50,6 @@ widths = {x:i for i, x in enumerate(( 'ultra-condensed',
|
||||
))}
|
||||
|
||||
def get_matching_rules(rules, font):
|
||||
normalize_font_properties(font)
|
||||
matches = []
|
||||
|
||||
# Filter on family
|
||||
@ -157,10 +158,12 @@ class Page(QWebPage): # {{{
|
||||
|
||||
class StatsCollector(object):
|
||||
|
||||
def __init__(self, container):
|
||||
def __init__(self, container, do_embed=False):
|
||||
self.container = container
|
||||
self.log = self.logger = container.log
|
||||
self.do_embed = do_embed
|
||||
must_use_qt()
|
||||
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
|
||||
|
||||
self.loop = QEventLoop()
|
||||
self.view = QWebView()
|
||||
@ -173,6 +176,10 @@ class StatsCollector(object):
|
||||
|
||||
self.render_queue = list(container.spine_items)
|
||||
self.font_stats = {}
|
||||
self.font_usage_map = {}
|
||||
self.font_spec_map = {}
|
||||
self.font_rule_map = {}
|
||||
self.all_font_rules = {}
|
||||
|
||||
QTimer.singleShot(0, self.render_book)
|
||||
|
||||
@ -235,27 +242,35 @@ class StatsCollector(object):
|
||||
rules = []
|
||||
for rule in font_face_rules:
|
||||
ff = rule.get('font-family', None)
|
||||
if not ff: continue
|
||||
style = parseStyle('font-family:%s'%ff, validate=False)
|
||||
if not ff:
|
||||
continue
|
||||
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
|
||||
ff = [x.value for x in
|
||||
style.getProperty('font-family').propertyValue]
|
||||
if not ff or ff[0] == 'inherit':
|
||||
continue
|
||||
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
|
||||
src = rule.get('src', None)
|
||||
if not src: continue
|
||||
style = parseStyle('background-image:%s'%src, validate=False)
|
||||
if not src:
|
||||
continue
|
||||
style = self.parser.parseStyle('background-image:%s'%src, validate=False)
|
||||
src = style.getProperty('background-image').propertyValue[0].uri
|
||||
name = self.href_to_name(src, '@font-face rule')
|
||||
if name is None:
|
||||
continue
|
||||
rule['src'] = name
|
||||
normalize_font_properties(rule)
|
||||
rule['width'] = widths[rule['font-stretch']]
|
||||
rule['weight'] = int(rule['font-weight'])
|
||||
rules.append(rule)
|
||||
|
||||
if not rules:
|
||||
if not rules and not self.do_embed:
|
||||
return
|
||||
|
||||
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
|
||||
for rule in rules:
|
||||
self.all_font_rules[rule['src']] = rule
|
||||
|
||||
for rule in rules:
|
||||
if rule['src'] not in self.font_stats:
|
||||
self.font_stats[rule['src']] = set()
|
||||
@ -265,19 +280,48 @@ class StatsCollector(object):
|
||||
if not isinstance(font_usage, list):
|
||||
raise Exception('Unknown error occurred while reading font usage')
|
||||
exclude = {'\n', '\r', '\t'}
|
||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
|
||||
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
|
||||
for font in font_usage:
|
||||
text = set()
|
||||
for t in font['text']:
|
||||
text |= frozenset(t)
|
||||
text.difference_update(exclude)
|
||||
if not text: continue
|
||||
if not text:
|
||||
continue
|
||||
normalize_font_properties(font)
|
||||
for rule in get_matching_rules(rules, font):
|
||||
self.font_stats[rule['src']] |= text
|
||||
if self.do_embed:
|
||||
ff = [icu_lower(x) for x in font.get('font-family', [])]
|
||||
if ff and ff[0] not in bad_fonts:
|
||||
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
|
||||
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
|
||||
val = fu[key]
|
||||
if not val:
|
||||
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
|
||||
val['text'] = set()
|
||||
val['text'] |= text
|
||||
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
|
||||
|
||||
if self.do_embed:
|
||||
self.page.evaljs('window.font_stats.get_font_families()')
|
||||
font_families = self.page.bridge_value
|
||||
if not isinstance(font_families, dict):
|
||||
raise Exception('Unknown error occurred while reading font families')
|
||||
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
|
||||
for raw in font_families.iterkeys():
|
||||
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
|
||||
for x in style.propertyValue:
|
||||
x = x.value
|
||||
if x and x.lower() not in bad_fonts:
|
||||
fs.add(x)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.ebooks.oeb.polish.container import get_container
|
||||
from calibre.utils.logging import default_log
|
||||
default_log.filter_level = default_log.DEBUG
|
||||
ebook = get_container(sys.argv[-1], default_log)
|
||||
print (StatsCollector(ebook).font_stats)
|
||||
print (StatsCollector(ebook, do_embed=True).font_stats)
|
||||
|
||||
|
||||
|
@ -45,6 +45,7 @@ class Polish(QDialog): # {{{
|
||||
ORIGINAL_* format before running it.</p>''')
|
||||
),
|
||||
|
||||
'embed':_('<h3>Embed referenced fonts</h3>%s')%HELP['embed'],
|
||||
'subset':_('<h3>Subsetting fonts</h3>%s')%HELP['subset'],
|
||||
|
||||
'smarten_punctuation':
|
||||
@ -75,6 +76,7 @@ class Polish(QDialog): # {{{
|
||||
|
||||
count = 0
|
||||
self.all_actions = OrderedDict([
|
||||
('embed', _('&Embed all referenced fonts')),
|
||||
('subset', _('&Subset all embedded fonts')),
|
||||
('smarten_punctuation', _('Smarten &punctuation')),
|
||||
('metadata', _('Update &metadata in the book files')),
|
||||
|
Loading…
x
Reference in New Issue
Block a user