DOCX Output: Implement embedding of fonts

This commit is contained in:
Kovid Goyal 2015-03-27 14:07:25 +05:30
parent 8fbf98fe1b
commit 54242de084
5 changed files with 64 additions and 12 deletions

View File

@ -63,6 +63,7 @@ class CompositeProgressReporter(object):
ARCHIVE_FMTS = ('zip', 'rar', 'oebzip')
class Plumber(object):
'''
The `Plumber` manages the conversion pipeline. An UI should call the methods
:method:`merge_ui_recommendations` and then :method:`run`. The plumber will
@ -202,7 +203,7 @@ OptionRecommendation(name='embed_font_family',
'specifies its own fonts, they may override this base font. '
'You can use the filter style information option to remove fonts from the '
'input document. Note that font embedding only works '
'with some output formats, principally EPUB and AZW3.')
'with some output formats, principally EPUB, AZW3 and DOCX.')
),
OptionRecommendation(name='embed_all_fonts',
@ -212,7 +213,7 @@ OptionRecommendation(name='embed_all_fonts',
'but not already embedded. This will search your system for the '
'fonts, and if found, they will be embedded. Embedding will only work '
'if the format you are converting to supports embedded fonts, such as '
'EPUB, AZW3 or PDF. Please ensure that you have the proper license for embedding '
'EPUB, AZW3, DOCX or PDF. Please ensure that you have the proper license for embedding '
'the fonts used in this document.'
)),
@ -1142,8 +1143,7 @@ OptionRecommendation(name='search_replace',
mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
needs_old_markup = (self.output_plugin.file_type == 'lit' or
(self.output_plugin.file_type == 'mobi' and mobi_file_type
== 'old'))
(self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=line_height,
untable=needs_old_markup,
@ -1233,4 +1233,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
reader()(oeb, path_or_stream)
return oeb

View File

@ -18,6 +18,7 @@ APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
EMBEDDED_FONT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font'
IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'

View File

@ -91,6 +91,7 @@ class DOCX(object):
self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
self.embedded_fonts = E.Relationships()
self.fonts = {}
# Boilerplate {{{
@property
@ -192,6 +193,8 @@ class DOCX(object):
zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
for fname, data_getter in self.images.iteritems():
zf.writestr(fname, data_getter())
for fname, data in self.fonts.iteritems():
zf.writestr(fname, data)
if __name__ == '__main__':
d = DOCX(None, None)

View File

@ -6,14 +6,25 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.ebooks.docx.names import makeelement
from collections import defaultdict
from uuid import uuid4
from calibre.ebooks.docx.names import makeelement, EMBEDDED_FONT
from calibre.ebooks.oeb.base import OEB_STYLES
from calibre.ebooks.oeb.transforms.subset import find_font_face_rules
def obfuscate_font_data(data, key):
prefix = bytearray(data[:32])
key = bytearray(reversed(key.bytes))
prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
return prefix + data[32:]
class FontsManager(object):
def __init__(self, oeb):
self.oeb, self.log = oeb, oeb.log
def __init__(self, oeb, opts):
self.oeb, self.log, self.opts = oeb, oeb.log, opts
def serialize(self, text_styles, fonts, embed_relationships):
def serialize(self, text_styles, fonts, embed_relationships, font_data_map):
font_families, seen = set(), set()
for ts in text_styles:
if ts.font_family:
@ -21,6 +32,44 @@ class FontsManager(object):
if lf not in seen:
seen.add(lf)
font_families.add(ts.font_family)
family_map = {}
for family in sorted(font_families):
makeelement(fonts, 'w:font', w_name=family)
family_map[family] = makeelement(fonts, 'w:font', w_name=family)
embedded_fonts = []
for item in self.oeb.manifest:
if item.media_type in OEB_STYLES and hasattr(item.data, 'cssRules'):
embedded_fonts.extend(find_font_face_rules(item, self.oeb))
num = 0
face_map = defaultdict(set)
rel_map = {}
for ef in embedded_fonts:
ff = ef['font-family'][0]
if ff not in font_families:
continue
num += 1
bold = ef['weight'] > 400
italic = ef['font-style'] != 'normal'
tag = 'Regular'
if bold or italic:
tag = 'Italic'
if bold and italic:
tag = 'BoldItalic'
elif bold:
tag = 'Bold'
if tag in face_map[ff]:
continue
face_map[ff].add(tag)
font = family_map[ff]
key = uuid4()
item = ef['item']
rid = rel_map.get(item)
if rid is None:
rel_map[item] = rid = 'rId%d' % num
fname = 'fonts/font%d.odttf' % num
makeelement(embed_relationships, 'Relationship', Id=rid, Type=EMBEDDED_FONT, Target=fname)
font_data_map['word/' + fname] = obfuscate_font_data(item.data, key)
makeelement(font, 'w:embed' + tag, r_id=rid,
w_fontKey='{%s}' % key.urn.rpartition(':')[-1].upper(),
w_subsetted="true" if self.opts.subset_embedded_fonts else "false")

View File

@ -171,7 +171,7 @@ class Convert(object):
self.styles_manager = StylesManager()
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
self.fonts_manager = FontsManager(self.oeb)
self.fonts_manager = FontsManager(self.oeb, self.opts)
for item in self.oeb.spine:
self.process_item(item)
@ -298,4 +298,4 @@ class Convert(object):
self.docx.images = {}
self.styles_manager.serialize(self.docx.styles)
self.images_manager.serialize(self.docx.images)
self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts)
self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)