mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Fonts
This commit is contained in:
parent
ffdc9d377c
commit
d8a896616a
@ -113,6 +113,14 @@ def read_vert_align(parent, dest):
|
||||
if val and val in {'baseline', 'subscript', 'superscript'}:
|
||||
ans = val
|
||||
setattr(dest, 'vert_align', ans)
|
||||
|
||||
def read_font_family(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:rFonts[@w:ascii]')(parent):
|
||||
val = get(col, 'w:ascii')
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'font_family', ans)
|
||||
# }}}
|
||||
|
||||
class RunStyle(object):
|
||||
@ -122,7 +130,7 @@ class RunStyle(object):
|
||||
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
|
||||
|
||||
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
|
||||
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
|
||||
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
|
||||
}
|
||||
|
||||
toggle_properties = {
|
||||
@ -141,7 +149,7 @@ class RunStyle(object):
|
||||
):
|
||||
setattr(self, p, binary_property(rPr, p))
|
||||
|
||||
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
|
||||
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(rPr, self)
|
||||
|
||||
@ -212,6 +220,9 @@ class RunStyle(object):
|
||||
|
||||
if self.b:
|
||||
c['font-weight'] = 'bold'
|
||||
|
||||
if self.font_family is not inherit:
|
||||
c['font-family'] = self.font_family
|
||||
return self._css
|
||||
|
||||
def same_border(self, other):
|
||||
|
@ -167,7 +167,9 @@ class DOCX(object):
|
||||
|
||||
@property
|
||||
def document_relationships(self):
|
||||
name = self.document_name
|
||||
return self.get_relationships(self.document_name)
|
||||
|
||||
def get_relationships(self, name):
|
||||
base = '/'.join(name.split('/')[:-1])
|
||||
by_id, by_type = {}, {}
|
||||
parts = name.split('/')
|
||||
|
132
src/calibre/ebooks/docx/fonts.py
Normal file
132
src/calibre/ebooks/docx/fonts.py
Normal file
@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, re
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.ebooks.docx.block_styles import binary_property, inherit
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
||||
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
|
||||
|
||||
Embed = namedtuple('Embed', 'name key subsetted')
|
||||
|
||||
def has_system_fonts(name):
|
||||
try:
|
||||
return bool(font_scanner.fonts_for_family(name))
|
||||
except NoFonts:
|
||||
return False
|
||||
|
||||
def get_variant(bold=False, italic=False):
|
||||
return {(False, False):'Regular', (False, True):'Italic',
|
||||
(True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)]
|
||||
|
||||
class Family(object):
|
||||
|
||||
def __init__(self, elem, embed_relationships):
|
||||
self.name = self.family_name = get(elem, 'w:name')
|
||||
self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem))
|
||||
if self.alt_names and not has_system_fonts(self.name):
|
||||
for x in self.alt_names:
|
||||
if has_system_fonts(x):
|
||||
self.family_name = x
|
||||
break
|
||||
|
||||
self.embedded = {}
|
||||
for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'):
|
||||
for y in XPath('./w:embed%s[@r:id]' % x)(elem):
|
||||
rid = get(y, 'r:id')
|
||||
key = get(y, 'w:fontKey')
|
||||
subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'}
|
||||
if rid in embed_relationships:
|
||||
self.embedded[x] = Embed(embed_relationships[rid], key, subsetted)
|
||||
|
||||
self.generic_family = 'auto'
|
||||
for x in XPath('./w:family[@w:val]')(elem):
|
||||
self.generic_family = get(x, 'w:val', 'auto')
|
||||
|
||||
ntt = binary_property(elem, 'notTrueType')
|
||||
self.is_ttf = ntt is inherit or not ntt
|
||||
|
||||
self.panose1 = None
|
||||
self.panose_name = None
|
||||
for x in XPath('./w:panose1[@w:val]')(elem):
|
||||
try:
|
||||
v = get(x, 'w:val')
|
||||
v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2))
|
||||
except (TypeError, ValueError, IndexError):
|
||||
pass
|
||||
else:
|
||||
self.panose1 = v
|
||||
self.panose_name = panose_to_css_generic_family(v)
|
||||
|
||||
self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace',
|
||||
'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None)
|
||||
self.css_generic_family = self.css_generic_family or self.panose_name or 'serif'
|
||||
|
||||
|
||||
class Fonts(object):
|
||||
|
||||
def __init__(self):
|
||||
self.fonts = {}
|
||||
self.used = set()
|
||||
|
||||
def __call__(self, root, embed_relationships, docx, dest_dir):
|
||||
for elem in XPath('//w:font[@w:name]')(root):
|
||||
self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships)
|
||||
|
||||
def family_for(self, name, bold=False, italic=False):
|
||||
f = self.fonts.get(name, None)
|
||||
if f is None:
|
||||
return 'serif'
|
||||
variant = get_variant(bold, italic)
|
||||
self.used.add((name, variant))
|
||||
name = f.name if variant in f.embedded else f.family_name
|
||||
return '"%s", %s' % (name.replace('"', ''), f.css_generic_family)
|
||||
|
||||
def embed_fonts(self, dest_dir, docx):
|
||||
defs = []
|
||||
dest_dir = os.path.join(dest_dir, 'fonts')
|
||||
for name, variant in self.used:
|
||||
f = self.fonts[name]
|
||||
if variant in f.embedded:
|
||||
if not os.path.exists(dest_dir):
|
||||
os.mkdir(dest_dir)
|
||||
fname = self.write(name, dest_dir, docx, variant)
|
||||
if fname is not None:
|
||||
d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname}
|
||||
if 'Bold' in variant:
|
||||
d['font-weight'] = 'bold'
|
||||
if 'Italic' in variant:
|
||||
d['font-style'] = 'italic'
|
||||
d = ['%s: %s' % (k, v) for k, v in d.iteritems()]
|
||||
d = ';\n\t'.join(d)
|
||||
defs.append('@font-face {\n\t%s\n}\n' % d)
|
||||
return '\n'.join(defs)
|
||||
|
||||
def write(self, name, dest_dir, docx, variant):
|
||||
f = self.fonts[name]
|
||||
ef = f.embedded[variant]
|
||||
raw = docx.read(ef.name)
|
||||
prefix = raw[:32]
|
||||
if ef.key:
|
||||
key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
|
||||
key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2))))
|
||||
prefix = bytearray(prefix)
|
||||
prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
|
||||
if not is_truetype_font(prefix):
|
||||
return None
|
||||
ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
|
||||
fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
|
||||
with open(os.path.join(dest_dir, fname), 'wb') as dest:
|
||||
dest.write(prefix)
|
||||
dest.write(raw[32:])
|
||||
|
||||
return fname
|
||||
|
@ -13,6 +13,7 @@ DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metada
|
||||
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
|
||||
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
|
||||
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
|
||||
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
|
||||
|
||||
namespaces = {
|
||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||
|
@ -97,7 +97,8 @@ class Styles(object):
|
||||
def get(self, key, default=None):
|
||||
return self.id_map.get(key, default)
|
||||
|
||||
def __call__(self, root):
|
||||
def __call__(self, root, fonts):
|
||||
self.fonts = fonts
|
||||
for s in XPath('//w:style')(root):
|
||||
s = Style(s)
|
||||
if s.style_id:
|
||||
@ -246,6 +247,9 @@ class Styles(object):
|
||||
for attr in ans.all_properties:
|
||||
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
|
||||
|
||||
if ans.font_family is not inherit:
|
||||
ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
|
||||
|
||||
return ans
|
||||
|
||||
def resolve(self, obj):
|
||||
@ -290,13 +294,16 @@ class Styles(object):
|
||||
h = hash(frozenset(css.iteritems()))
|
||||
return self.classes.get(h, (None, None))[0]
|
||||
|
||||
def generate_css(self):
|
||||
def generate_css(self, dest_dir, docx):
|
||||
ef = self.fonts.embed_fonts(dest_dir, docx)
|
||||
prefix = textwrap.dedent(
|
||||
'''\
|
||||
p { text-indent: 1.5em }
|
||||
|
||||
ul, ol, p { margin: 0; padding: 0 }
|
||||
''')
|
||||
if ef:
|
||||
prefix += '\n' + ef
|
||||
|
||||
ans = []
|
||||
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
|
||||
|
@ -14,9 +14,10 @@ from lxml.html.builder import (
|
||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
|
||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS
|
||||
from calibre.ebooks.docx.styles import Styles, inherit
|
||||
from calibre.ebooks.docx.numbering import Numbering
|
||||
from calibre.ebooks.docx.fonts import Fonts
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
class Text:
|
||||
@ -116,7 +117,18 @@ class Convert(object):
|
||||
|
||||
nname = get_name(NUMBERING, 'numbering.xml')
|
||||
sname = get_name(STYLES, 'styles.xml')
|
||||
fname = get_name(FONTS, 'fontTable.xml')
|
||||
numbering = self.numbering = Numbering()
|
||||
fonts = self.fonts = Fonts()
|
||||
|
||||
if fname is not None:
|
||||
embed_relationships = self.docx.get_relationships(fname)[0]
|
||||
try:
|
||||
raw = self.docx.read(fname)
|
||||
except KeyError:
|
||||
self.log.warn('Fonts table %s does not exist' % fname)
|
||||
else:
|
||||
fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
|
||||
|
||||
if sname is not None:
|
||||
try:
|
||||
@ -124,7 +136,7 @@ class Convert(object):
|
||||
except KeyError:
|
||||
self.log.warn('Styles %s do not exist' % sname)
|
||||
else:
|
||||
self.styles(fromstring(raw))
|
||||
self.styles(fromstring(raw), fonts)
|
||||
|
||||
if nname is not None:
|
||||
try:
|
||||
@ -140,7 +152,7 @@ class Convert(object):
|
||||
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||
f.write(raw)
|
||||
css = self.styles.generate_css()
|
||||
css = self.styles.generate_css(self.dest_dir, self.docx)
|
||||
if css:
|
||||
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
|
||||
f.write(css.encode('utf-8'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user