diff --git a/resources/compiled_coffeescript.zip b/resources/compiled_coffeescript.zip index cb48a58bd2..e092b53157 100644 Binary files a/resources/compiled_coffeescript.zip and b/resources/compiled_coffeescript.zip differ diff --git a/src/calibre/ebooks/oeb/polish/embed.py b/src/calibre/ebooks/oeb/polish/embed.py new file mode 100644 index 0000000000..1f5412bbc9 --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/embed.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys + +from lxml import etree + +from calibre import prints +from calibre.ebooks.oeb.base import XHTML +from calibre.ebooks.oeb.polish.stats import normalize_font_properties +from calibre.utils.filenames import ascii_filename + +props = {'font-family':None, 'font-weight':'normal', 'font-style':'normal', 'font-stretch':'normal'} + +def matching_rule(font, rules): + ff = font['font-family'] + if not isinstance(ff, basestring): + ff = tuple(ff)[0] + family = icu_lower(ff) + wt = font['font-weight'] + style = font['font-style'] + stretch = font['font-stretch'] + + for rule in rules: + if rule['font-style'] == style and rule['font-stretch'] == stretch and rule['font-weight'] == wt: + ff = rule['font-family'] + if not isinstance(ff, basestring): + ff = tuple(ff)[0] + if icu_lower(ff) == family: + return rule + +def embed_font(container, font, all_font_rules, report, warned): + rule = matching_rule(font, all_font_rules) + ff = font['font-family'] + if not isinstance(ff, basestring): + ff = ff[0] + if rule is None: + from calibre.utils.fonts.scanner import font_scanner, NoFonts + if ff in warned: + return + try: + fonts = font_scanner.fonts_for_family(ff) + except NoFonts: + report(_('Failed to find fonts for family: %s, not embedding') % ff) + warned.add(ff) + return + wt = int(font.get('font-weight', '400')) + for f in fonts: + if f['weight'] == wt and f['font-style'] == font.get('font-style', 'normal') and f['font-stretch'] == font.get('font-stretch', 'normal'): + report('Embedding font %s from %s' % (f['full_name'], f['path'])) + data = font_scanner.get_font_data(f) + fname = f['full_name'] + ext = 'otf' if f['is_otf'] else 'ttf' + fname = ascii_filename(fname).replace(' ', '-').replace('(', '').replace(')', '') + item = container.generate_item('fonts/%s.%s'%(fname, ext), id_prefix='font') + name = container.href_to_name(item.get('href'), container.opf_name) + with container.open(name, 'wb') as out: + out.write(data) + href = container.name_to_href(name) + rule = {k:f.get(k, v) for k, v in props.iteritems()} + rule['src'] = 'url(%s)' % href + rule['name'] = name + return rule + msg = _('Failed to find font matching: family: %s; weight: %s; style: %s; stretch: %s') % ( + ff, font['font-weight'], font['font-style'], font['font-stretch']) + if msg not in warned: + warned.add(msg) + report(msg) + else: + name = rule['src'] + href = container.name_to_href(name) + rule = {k:ff if k == 'font-family' else rule.get(k, v) for k, v in props.iteritems()} + rule['src'] = 'url(%s)' % href + rule['name'] = name + return rule + +def embed_all_fonts(container, stats, report): + all_font_rules = tuple(stats.all_font_rules.itervalues()) + warned = set() + rules, nrules = [], [] + modified = set() + + for path in container.spine_items: + name = container.abspath_to_name(path) + fu = stats.font_usage_map.get(name, None) + fs = stats.font_spec_map.get(name, None) + fr = stats.font_rule_map.get(name, None) + if None in (fs, fu, fr): + continue + fs = {icu_lower(x) for x in fs} + for font in fu.itervalues(): + if icu_lower(font['font-family']) not in fs: + continue + rule = matching_rule(font, fr) + if rule is None: + # This font was not already embedded in this HTML file, before + # processing started + rule = matching_rule(font, nrules) + if rule is None: + rule = embed_font(container, font, all_font_rules, report, warned) + if rule is not None: + rules.append(rule) + nrules.append(normalize_font_properties(rule.copy())) + modified.add(name) + stats.font_stats[rule['name']] = font['text'] + else: + # This font was previously embedded by this code, update its stats + stats.font_stats[rule['name']] |= font['text'] + modified.add(name) + + if not rules: + report(_('No embeddable fonts found')) + return + + # Write out CSS + rules = [';\n\t'.join('%s: %s' % ( + k, '"%s"' % v if k == 'font-family' else v) for k, v in rule.iteritems() if (k in props and props[k] != v and v != '400') or k == 'src') + for rule in rules] + css = '\n\n'.join(['@font-face {\n\t%s\n}' % r for r in rules]) + item = container.generate_item('fonts.css', id_prefix='font_embed') + name = container.href_to_name(item.get('href'), container.opf_name) + with container.open(name, 'wb') as out: + out.write(css.encode('utf-8')) + + # Add link to CSS in all files that need it + for spine_name in modified: + root = container.parsed(spine_name) + head = root.xpath('//*[local-name()="head"][1]')[0] + href = container.name_to_href(name, spine_name) + etree.SubElement(head, XHTML('link'), rel='stylesheet', type='text/css', href=href).tail = '\n' + container.dirty(spine_name) + + +if __name__ == '__main__': + from calibre.ebooks.oeb.polish.container import get_container + from calibre.ebooks.oeb.polish.stats import StatsCollector + from calibre.utils.logging import default_log + default_log.filter_level = default_log.DEBUG + inbook = sys.argv[-1] + ebook = get_container(inbook, default_log) + report = [] + stats = StatsCollector(ebook, do_embed=True) + embed_all_fonts(ebook, stats, report.append) + outbook, ext = inbook.rpartition('.')[0::2] + outbook += '_subset.'+ext + ebook.commit(outbook) + prints('\nReport:') + for msg in report: + prints(msg) + print() + prints('Output written to:', outbook) + diff --git a/src/calibre/ebooks/oeb/polish/font_stats.coffee b/src/calibre/ebooks/oeb/polish/font_stats.coffee index ea99b65cea..e5e29becd1 100644 --- a/src/calibre/ebooks/oeb/polish/font_stats.coffee +++ b/src/calibre/ebooks/oeb/polish/font_stats.coffee @@ -67,6 +67,18 @@ class FontStats ans.push(usage) py_bridge.value = ans + get_font_families: () -> + ans = {} + for node in document.getElementsByTagName('*') + rules = document.defaultView.getMatchedCSSRules(node, '') + if rules + for rule in rules + style = rule.style + family = style.getPropertyValue('font-family') + if family + ans[family] = true + py_bridge.value = ans + if window? window.font_stats = new FontStats() diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 08b5004c91..ff46288643 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -14,6 +14,7 @@ from functools import partial from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.subset import subset_all_fonts +from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.replace import smarten_punctuation from calibre.ebooks.oeb.polish.jacket import ( @@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import ( from calibre.utils.logging import Log ALL_OPTS = { + 'embed': False, 'subset': False, 'opf': None, 'cover': None, @@ -47,6 +49,12 @@ changes needed for the desired effect.

Note that polishing only works on files in the %s formats.

\ ''')%_(' or ').join('%s'%x for x in SUPPORTED), +'embed': _('''\ +

Embed all fonts that are referenced in the document and are not already embedded. +This will scan your computer for the fonts, and if they are found, they will be +embedded into the document.

+'''), + 'subset': _('''\

Subsetting fonts means reducing an embedded font to contain only the characters used from that font in the book. This @@ -118,8 +126,8 @@ def polish(file_map, opts, log, report): ebook = get_container(inbook, log) jacket = None - if opts.subset: - stats = StatsCollector(ebook) + if opts.subset or opts.embed: + stats = StatsCollector(ebook, do_embed=opts.embed) if opts.opf: rt(_('Updating metadata')) @@ -159,6 +167,11 @@ def polish(file_map, opts, log, report): smarten_punctuation(ebook, report) report('') + if opts.embed: + rt(_('Embedding referenced fonts')) + embed_all_fonts(ebook, stats, report) + report('') + if opts.subset: rt(_('Subsetting embedded fonts')) subset_all_fonts(ebook, stats.font_stats, report) @@ -197,6 +210,7 @@ def option_parser(): parser = OptionParser(usage=USAGE) a = parser.add_option o = partial(a, default=False, action='store_true') + o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed']) o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset']) a('--cover', '-c', help=_( 'Path to a cover image. Changes the cover specified in the ebook. ' diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index d4a5c96111..77b99ff9b6 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -7,10 +7,11 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import json, sys, os +import json, sys, os, logging from urllib import unquote +from collections import defaultdict -from cssutils import parseStyle +from cssutils import CSSParser from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer, pyqtSlot) from PyQt4.QtWebKit import QWebPage, QWebView @@ -41,14 +42,14 @@ def normalize_font_properties(font): 'extra-expanded', 'ultra-expanded'}: val = 'normal' font['font-stretch'] = val + return font -widths = {x:i for i, x in enumerate(( 'ultra-condensed', +widths = {x:i for i, x in enumerate(('ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed', 'normal', 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded' ))} def get_matching_rules(rules, font): - normalize_font_properties(font) matches = [] # Filter on family @@ -100,7 +101,7 @@ def get_matching_rules(rules, font): return m return [] -class Page(QWebPage): # {{{ +class Page(QWebPage): # {{{ def __init__(self, log): self.log = log @@ -157,10 +158,12 @@ class Page(QWebPage): # {{{ class StatsCollector(object): - def __init__(self, container): + def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log + self.do_embed = do_embed must_use_qt() + self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() @@ -173,6 +176,10 @@ class StatsCollector(object): self.render_queue = list(container.spine_items) self.font_stats = {} + self.font_usage_map = {} + self.font_spec_map = {} + self.font_rule_map = {} + self.all_font_rules = {} QTimer.singleShot(0, self.render_book) @@ -235,27 +242,35 @@ class StatsCollector(object): rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) - if not ff: continue - style = parseStyle('font-family:%s'%ff, validate=False) + if not ff: + continue + style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) - if not src: continue - style = parseStyle('background-image:%s'%src, validate=False) + if not src: + continue + style = self.parser.parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') + if name is None: + continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) - if not rules: + if not rules and not self.do_embed: return + self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules + for rule in rules: + self.all_font_rules[rule['src']] = rule + for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() @@ -265,19 +280,48 @@ class StatsCollector(object): if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) + bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) - if not text: continue + if not text: + continue + normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text + if self.do_embed: + ff = [icu_lower(x) for x in font.get('font-family', [])] + if ff and ff[0] not in bad_fonts: + keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} + key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) + val = fu[key] + if not val: + val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) + val['text'] = set() + val['text'] |= text + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) + + if self.do_embed: + self.page.evaljs('window.font_stats.get_font_families()') + font_families = self.page.bridge_value + if not isinstance(font_families, dict): + raise Exception('Unknown error occurred while reading font families') + self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() + for raw in font_families.iterkeys(): + style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') + for x in style.propertyValue: + x = x.value + if x and x.lower() not in bad_fonts: + fs.add(x) if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) - print (StatsCollector(ebook).font_stats) + print (StatsCollector(ebook, do_embed=True).font_stats) + diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index eb21fb2626..0f21807afb 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -45,6 +45,7 @@ class Polish(QDialog): # {{{ ORIGINAL_* format before running it.

''') ), + 'embed':_('

Embed referenced fonts

%s')%HELP['embed'], 'subset':_('

Subsetting fonts

%s')%HELP['subset'], 'smarten_punctuation': @@ -75,6 +76,7 @@ class Polish(QDialog): # {{{ count = 0 self.all_actions = OrderedDict([ + ('embed', _('&Embed all referenced fonts')), ('subset', _('&Subset all embedded fonts')), ('smarten_punctuation', _('Smarten &punctuation')), ('metadata', _('Update &metadata in the book files')),