diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index a9d2a43cdb..b65766e494 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -113,6 +113,14 @@ def read_vert_align(parent, dest): if val and val in {'baseline', 'subscript', 'superscript'}: ans = val setattr(dest, 'vert_align', ans) + +def read_font_family(parent, dest): + ans = inherit + for col in XPath('./w:rFonts[@w:ascii]')(parent): + val = get(col, 'w:ascii') + if val: + ans = val + setattr(dest, 'font_family', ans) # }}} class RunStyle(object): @@ -122,7 +130,7 @@ class RunStyle(object): 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color', - 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', + 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family' } toggle_properties = { @@ -141,7 +149,7 @@ class RunStyle(object): ): setattr(self, p, binary_property(rPr, p)) - for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'): + for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'): f = globals()['read_%s' % x] f(rPr, self) @@ -212,6 +220,9 @@ class RunStyle(object): if self.b: c['font-weight'] = 'bold' + + if self.font_family is not inherit: + c['font-family'] = self.font_family return self._css def same_border(self, other): diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index ec0decacef..bcca336474 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -167,7 +167,9 @@ class DOCX(object): @property def document_relationships(self): - name = self.document_name + return self.get_relationships(self.document_name) + + def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') diff --git a/src/calibre/ebooks/docx/fonts.py b/src/calibre/ebooks/docx/fonts.py new file mode 100644 index 0000000000..4ed602c71d --- /dev/null +++ b/src/calibre/ebooks/docx/fonts.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import os, re +from collections import namedtuple + +from calibre.ebooks.docx.block_styles import binary_property, inherit +from calibre.ebooks.docx.names import XPath, get +from calibre.utils.filenames import ascii_filename +from calibre.utils.fonts.scanner import font_scanner, NoFonts +from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font + +Embed = namedtuple('Embed', 'name key subsetted') + +def has_system_fonts(name): + try: + return bool(font_scanner.fonts_for_family(name)) + except NoFonts: + return False + +def get_variant(bold=False, italic=False): + return {(False, False):'Regular', (False, True):'Italic', + (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)] + +class Family(object): + + def __init__(self, elem, embed_relationships): + self.name = self.family_name = get(elem, 'w:name') + self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem)) + if self.alt_names and not has_system_fonts(self.name): + for x in self.alt_names: + if has_system_fonts(x): + self.family_name = x + break + + self.embedded = {} + for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'): + for y in XPath('./w:embed%s[@r:id]' % x)(elem): + rid = get(y, 'r:id') + key = get(y, 'w:fontKey') + subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'} + if rid in embed_relationships: + self.embedded[x] = Embed(embed_relationships[rid], key, subsetted) + + self.generic_family = 'auto' + for x in XPath('./w:family[@w:val]')(elem): + self.generic_family = get(x, 'w:val', 'auto') + + ntt = binary_property(elem, 'notTrueType') + self.is_ttf = ntt is inherit or not ntt + + self.panose1 = None + self.panose_name = None + for x in XPath('./w:panose1[@w:val]')(elem): + try: + v = get(x, 'w:val') + v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2)) + except (TypeError, ValueError, IndexError): + pass + else: + self.panose1 = v + self.panose_name = panose_to_css_generic_family(v) + + self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace', + 'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None) + self.css_generic_family = self.css_generic_family or self.panose_name or 'serif' + + +class Fonts(object): + + def __init__(self): + self.fonts = {} + self.used = set() + + def __call__(self, root, embed_relationships, docx, dest_dir): + for elem in XPath('//w:font[@w:name]')(root): + self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships) + + def family_for(self, name, bold=False, italic=False): + f = self.fonts.get(name, None) + if f is None: + return 'serif' + variant = get_variant(bold, italic) + self.used.add((name, variant)) + name = f.name if variant in f.embedded else f.family_name + return '"%s", %s' % (name.replace('"', ''), f.css_generic_family) + + def embed_fonts(self, dest_dir, docx): + defs = [] + dest_dir = os.path.join(dest_dir, 'fonts') + for name, variant in self.used: + f = self.fonts[name] + if variant in f.embedded: + if not os.path.exists(dest_dir): + os.mkdir(dest_dir) + fname = self.write(name, dest_dir, docx, variant) + if fname is not None: + d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname} + if 'Bold' in variant: + d['font-weight'] = 'bold' + if 'Italic' in variant: + d['font-style'] = 'italic' + d = ['%s: %s' % (k, v) for k, v in d.iteritems()] + d = ';\n\t'.join(d) + defs.append('@font-face {\n\t%s\n}\n' % d) + return '\n'.join(defs) + + def write(self, name, dest_dir, docx, variant): + f = self.fonts[name] + ef = f.embedded[variant] + raw = docx.read(ef.name) + prefix = raw[:32] + if ef.key: + key = re.sub(r'[^A-Fa-f0-9]', '', ef.key) + key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2)))) + prefix = bytearray(prefix) + prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix)))) + if not is_truetype_font(prefix): + return None + ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf' + fname = ascii_filename('%s - %s.%s' % (name, variant, ext)) + with open(os.path.join(dest_dir, fname), 'wb') as dest: + dest.write(prefix) + dest.write(raw[32:]) + + return fname + diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 91b051d691..da643dcc2c 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -13,6 +13,7 @@ DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metada APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties' STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles' NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering' +FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable' namespaces = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 44ae2cea89..13b9ebe58f 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -97,7 +97,8 @@ class Styles(object): def get(self, key, default=None): return self.id_map.get(key, default) - def __call__(self, root): + def __call__(self, root, fonts): + self.fonts = fonts for s in XPath('//w:style')(root): s = Style(s) if s.style_id: @@ -246,6 +247,9 @@ class Styles(object): for attr in ans.all_properties: setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr)) + if ans.font_family is not inherit: + ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i) + return ans def resolve(self, obj): @@ -290,13 +294,16 @@ class Styles(object): h = hash(frozenset(css.iteritems())) return self.classes.get(h, (None, None))[0] - def generate_css(self): + def generate_css(self, dest_dir, docx): + ef = self.fonts.embed_fonts(dest_dir, docx) prefix = textwrap.dedent( '''\ p { text-indent: 1.5em } ul, ol, p { margin: 0; padding: 0 } ''') + if ef: + prefix += '\n' + ef ans = [] for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 8cd79074e3..dbd6dce043 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -14,9 +14,10 @@ from lxml.html.builder import ( HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) from calibre.ebooks.docx.container import DOCX, fromstring -from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING +from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.styles import Styles, inherit from calibre.ebooks.docx.numbering import Numbering +from calibre.ebooks.docx.fonts import Fonts from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -116,7 +117,18 @@ class Convert(object): nname = get_name(NUMBERING, 'numbering.xml') sname = get_name(STYLES, 'styles.xml') + fname = get_name(FONTS, 'fontTable.xml') numbering = self.numbering = Numbering() + fonts = self.fonts = Fonts() + + if fname is not None: + embed_relationships = self.docx.get_relationships(fname)[0] + try: + raw = self.docx.read(fname) + except KeyError: + self.log.warn('Fonts table %s does not exist' % fname) + else: + fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir) if sname is not None: try: @@ -124,7 +136,7 @@ class Convert(object): except KeyError: self.log.warn('Styles %s do not exist' % sname) else: - self.styles(fromstring(raw)) + self.styles(fromstring(raw), fonts) if nname is not None: try: @@ -140,7 +152,7 @@ class Convert(object): raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) - css = self.styles.generate_css() + css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8'))