mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Fonts
This commit is contained in:
parent
ffdc9d377c
commit
d8a896616a
@ -113,6 +113,14 @@ def read_vert_align(parent, dest):
|
|||||||
if val and val in {'baseline', 'subscript', 'superscript'}:
|
if val and val in {'baseline', 'subscript', 'superscript'}:
|
||||||
ans = val
|
ans = val
|
||||||
setattr(dest, 'vert_align', ans)
|
setattr(dest, 'vert_align', ans)
|
||||||
|
|
||||||
|
def read_font_family(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:rFonts[@w:ascii]')(parent):
|
||||||
|
val = get(col, 'w:ascii')
|
||||||
|
if val:
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'font_family', ans)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class RunStyle(object):
|
class RunStyle(object):
|
||||||
@ -122,7 +130,7 @@ class RunStyle(object):
|
|||||||
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
|
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
|
||||||
|
|
||||||
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
|
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
|
||||||
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
|
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
|
||||||
}
|
}
|
||||||
|
|
||||||
toggle_properties = {
|
toggle_properties = {
|
||||||
@ -141,7 +149,7 @@ class RunStyle(object):
|
|||||||
):
|
):
|
||||||
setattr(self, p, binary_property(rPr, p))
|
setattr(self, p, binary_property(rPr, p))
|
||||||
|
|
||||||
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
|
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'):
|
||||||
f = globals()['read_%s' % x]
|
f = globals()['read_%s' % x]
|
||||||
f(rPr, self)
|
f(rPr, self)
|
||||||
|
|
||||||
@ -212,6 +220,9 @@ class RunStyle(object):
|
|||||||
|
|
||||||
if self.b:
|
if self.b:
|
||||||
c['font-weight'] = 'bold'
|
c['font-weight'] = 'bold'
|
||||||
|
|
||||||
|
if self.font_family is not inherit:
|
||||||
|
c['font-family'] = self.font_family
|
||||||
return self._css
|
return self._css
|
||||||
|
|
||||||
def same_border(self, other):
|
def same_border(self, other):
|
||||||
|
@ -167,7 +167,9 @@ class DOCX(object):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def document_relationships(self):
|
def document_relationships(self):
|
||||||
name = self.document_name
|
return self.get_relationships(self.document_name)
|
||||||
|
|
||||||
|
def get_relationships(self, name):
|
||||||
base = '/'.join(name.split('/')[:-1])
|
base = '/'.join(name.split('/')[:-1])
|
||||||
by_id, by_type = {}, {}
|
by_id, by_type = {}, {}
|
||||||
parts = name.split('/')
|
parts = name.split('/')
|
||||||
|
132
src/calibre/ebooks/docx/fonts.py
Normal file
132
src/calibre/ebooks/docx/fonts.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.block_styles import binary_property, inherit
|
||||||
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
||||||
|
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
|
||||||
|
|
||||||
|
Embed = namedtuple('Embed', 'name key subsetted')
|
||||||
|
|
||||||
|
def has_system_fonts(name):
|
||||||
|
try:
|
||||||
|
return bool(font_scanner.fonts_for_family(name))
|
||||||
|
except NoFonts:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_variant(bold=False, italic=False):
|
||||||
|
return {(False, False):'Regular', (False, True):'Italic',
|
||||||
|
(True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)]
|
||||||
|
|
||||||
|
class Family(object):
|
||||||
|
|
||||||
|
def __init__(self, elem, embed_relationships):
|
||||||
|
self.name = self.family_name = get(elem, 'w:name')
|
||||||
|
self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem))
|
||||||
|
if self.alt_names and not has_system_fonts(self.name):
|
||||||
|
for x in self.alt_names:
|
||||||
|
if has_system_fonts(x):
|
||||||
|
self.family_name = x
|
||||||
|
break
|
||||||
|
|
||||||
|
self.embedded = {}
|
||||||
|
for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'):
|
||||||
|
for y in XPath('./w:embed%s[@r:id]' % x)(elem):
|
||||||
|
rid = get(y, 'r:id')
|
||||||
|
key = get(y, 'w:fontKey')
|
||||||
|
subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'}
|
||||||
|
if rid in embed_relationships:
|
||||||
|
self.embedded[x] = Embed(embed_relationships[rid], key, subsetted)
|
||||||
|
|
||||||
|
self.generic_family = 'auto'
|
||||||
|
for x in XPath('./w:family[@w:val]')(elem):
|
||||||
|
self.generic_family = get(x, 'w:val', 'auto')
|
||||||
|
|
||||||
|
ntt = binary_property(elem, 'notTrueType')
|
||||||
|
self.is_ttf = ntt is inherit or not ntt
|
||||||
|
|
||||||
|
self.panose1 = None
|
||||||
|
self.panose_name = None
|
||||||
|
for x in XPath('./w:panose1[@w:val]')(elem):
|
||||||
|
try:
|
||||||
|
v = get(x, 'w:val')
|
||||||
|
v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2))
|
||||||
|
except (TypeError, ValueError, IndexError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self.panose1 = v
|
||||||
|
self.panose_name = panose_to_css_generic_family(v)
|
||||||
|
|
||||||
|
self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace',
|
||||||
|
'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None)
|
||||||
|
self.css_generic_family = self.css_generic_family or self.panose_name or 'serif'
|
||||||
|
|
||||||
|
|
||||||
|
class Fonts(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.fonts = {}
|
||||||
|
self.used = set()
|
||||||
|
|
||||||
|
def __call__(self, root, embed_relationships, docx, dest_dir):
|
||||||
|
for elem in XPath('//w:font[@w:name]')(root):
|
||||||
|
self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships)
|
||||||
|
|
||||||
|
def family_for(self, name, bold=False, italic=False):
|
||||||
|
f = self.fonts.get(name, None)
|
||||||
|
if f is None:
|
||||||
|
return 'serif'
|
||||||
|
variant = get_variant(bold, italic)
|
||||||
|
self.used.add((name, variant))
|
||||||
|
name = f.name if variant in f.embedded else f.family_name
|
||||||
|
return '"%s", %s' % (name.replace('"', ''), f.css_generic_family)
|
||||||
|
|
||||||
|
def embed_fonts(self, dest_dir, docx):
|
||||||
|
defs = []
|
||||||
|
dest_dir = os.path.join(dest_dir, 'fonts')
|
||||||
|
for name, variant in self.used:
|
||||||
|
f = self.fonts[name]
|
||||||
|
if variant in f.embedded:
|
||||||
|
if not os.path.exists(dest_dir):
|
||||||
|
os.mkdir(dest_dir)
|
||||||
|
fname = self.write(name, dest_dir, docx, variant)
|
||||||
|
if fname is not None:
|
||||||
|
d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname}
|
||||||
|
if 'Bold' in variant:
|
||||||
|
d['font-weight'] = 'bold'
|
||||||
|
if 'Italic' in variant:
|
||||||
|
d['font-style'] = 'italic'
|
||||||
|
d = ['%s: %s' % (k, v) for k, v in d.iteritems()]
|
||||||
|
d = ';\n\t'.join(d)
|
||||||
|
defs.append('@font-face {\n\t%s\n}\n' % d)
|
||||||
|
return '\n'.join(defs)
|
||||||
|
|
||||||
|
def write(self, name, dest_dir, docx, variant):
|
||||||
|
f = self.fonts[name]
|
||||||
|
ef = f.embedded[variant]
|
||||||
|
raw = docx.read(ef.name)
|
||||||
|
prefix = raw[:32]
|
||||||
|
if ef.key:
|
||||||
|
key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
|
||||||
|
key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2))))
|
||||||
|
prefix = bytearray(prefix)
|
||||||
|
prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
|
||||||
|
if not is_truetype_font(prefix):
|
||||||
|
return None
|
||||||
|
ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
|
||||||
|
fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
|
||||||
|
with open(os.path.join(dest_dir, fname), 'wb') as dest:
|
||||||
|
dest.write(prefix)
|
||||||
|
dest.write(raw[32:])
|
||||||
|
|
||||||
|
return fname
|
||||||
|
|
@ -13,6 +13,7 @@ DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metada
|
|||||||
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
|
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
|
||||||
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
|
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
|
||||||
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
|
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
|
||||||
|
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
|
||||||
|
|
||||||
namespaces = {
|
namespaces = {
|
||||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
|
@ -97,7 +97,8 @@ class Styles(object):
|
|||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
return self.id_map.get(key, default)
|
return self.id_map.get(key, default)
|
||||||
|
|
||||||
def __call__(self, root):
|
def __call__(self, root, fonts):
|
||||||
|
self.fonts = fonts
|
||||||
for s in XPath('//w:style')(root):
|
for s in XPath('//w:style')(root):
|
||||||
s = Style(s)
|
s = Style(s)
|
||||||
if s.style_id:
|
if s.style_id:
|
||||||
@ -246,6 +247,9 @@ class Styles(object):
|
|||||||
for attr in ans.all_properties:
|
for attr in ans.all_properties:
|
||||||
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
|
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
|
||||||
|
|
||||||
|
if ans.font_family is not inherit:
|
||||||
|
ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
|
||||||
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def resolve(self, obj):
|
def resolve(self, obj):
|
||||||
@ -290,13 +294,16 @@ class Styles(object):
|
|||||||
h = hash(frozenset(css.iteritems()))
|
h = hash(frozenset(css.iteritems()))
|
||||||
return self.classes.get(h, (None, None))[0]
|
return self.classes.get(h, (None, None))[0]
|
||||||
|
|
||||||
def generate_css(self):
|
def generate_css(self, dest_dir, docx):
|
||||||
|
ef = self.fonts.embed_fonts(dest_dir, docx)
|
||||||
prefix = textwrap.dedent(
|
prefix = textwrap.dedent(
|
||||||
'''\
|
'''\
|
||||||
p { text-indent: 1.5em }
|
p { text-indent: 1.5em }
|
||||||
|
|
||||||
ul, ol, p { margin: 0; padding: 0 }
|
ul, ol, p { margin: 0; padding: 0 }
|
||||||
''')
|
''')
|
||||||
|
if ef:
|
||||||
|
prefix += '\n' + ef
|
||||||
|
|
||||||
ans = []
|
ans = []
|
||||||
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
|
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
|
||||||
|
@ -14,9 +14,10 @@ from lxml.html.builder import (
|
|||||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
||||||
|
|
||||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
|
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS
|
||||||
from calibre.ebooks.docx.styles import Styles, inherit
|
from calibre.ebooks.docx.styles import Styles, inherit
|
||||||
from calibre.ebooks.docx.numbering import Numbering
|
from calibre.ebooks.docx.numbering import Numbering
|
||||||
|
from calibre.ebooks.docx.fonts import Fonts
|
||||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||||
|
|
||||||
class Text:
|
class Text:
|
||||||
@ -116,7 +117,18 @@ class Convert(object):
|
|||||||
|
|
||||||
nname = get_name(NUMBERING, 'numbering.xml')
|
nname = get_name(NUMBERING, 'numbering.xml')
|
||||||
sname = get_name(STYLES, 'styles.xml')
|
sname = get_name(STYLES, 'styles.xml')
|
||||||
|
fname = get_name(FONTS, 'fontTable.xml')
|
||||||
numbering = self.numbering = Numbering()
|
numbering = self.numbering = Numbering()
|
||||||
|
fonts = self.fonts = Fonts()
|
||||||
|
|
||||||
|
if fname is not None:
|
||||||
|
embed_relationships = self.docx.get_relationships(fname)[0]
|
||||||
|
try:
|
||||||
|
raw = self.docx.read(fname)
|
||||||
|
except KeyError:
|
||||||
|
self.log.warn('Fonts table %s does not exist' % fname)
|
||||||
|
else:
|
||||||
|
fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
|
||||||
|
|
||||||
if sname is not None:
|
if sname is not None:
|
||||||
try:
|
try:
|
||||||
@ -124,7 +136,7 @@ class Convert(object):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
self.log.warn('Styles %s do not exist' % sname)
|
self.log.warn('Styles %s do not exist' % sname)
|
||||||
else:
|
else:
|
||||||
self.styles(fromstring(raw))
|
self.styles(fromstring(raw), fonts)
|
||||||
|
|
||||||
if nname is not None:
|
if nname is not None:
|
||||||
try:
|
try:
|
||||||
@ -140,7 +152,7 @@ class Convert(object):
|
|||||||
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||||
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
css = self.styles.generate_css()
|
css = self.styles.generate_css(self.dest_dir, self.docx)
|
||||||
if css:
|
if css:
|
||||||
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
|
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
|
||||||
f.write(css.encode('utf-8'))
|
f.write(css.encode('utf-8'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user