mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on DOCX Output
This commit is contained in:
parent
94294b62e2
commit
87c474ef7e
@ -6,6 +6,9 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from operator import attrgetter
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks import parse_css_length
|
from calibre.ebooks import parse_css_length
|
||||||
@ -33,13 +36,25 @@ def css_font_family_to_docx(raw):
|
|||||||
for ff in parse_css_font_family(raw):
|
for ff in parse_css_font_family(raw):
|
||||||
return generic.get(ff.lower(), ff)
|
return generic.get(ff.lower(), ff)
|
||||||
|
|
||||||
|
def w(x):
|
||||||
|
return '{%s}%s' % (namespaces['w'], x)
|
||||||
|
|
||||||
|
def makeelement(parent, name, **attrs):
|
||||||
|
return parent.makeelement(w(name), **{w(k):v for k, v in attrs.iteritems()})
|
||||||
|
|
||||||
|
def bmap(x):
|
||||||
|
return 'on' if x else 'off'
|
||||||
|
|
||||||
class DOCXStyle(object):
|
class DOCXStyle(object):
|
||||||
|
|
||||||
ALL_PROPS = ()
|
ALL_PROPS = ()
|
||||||
|
TYPE = 'paragraph'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._hash = hash(tuple(
|
self._hash = hash(tuple(
|
||||||
getattr(self, x) for x in self.ALL_PROPS))
|
getattr(self, x) for x in self.ALL_PROPS))
|
||||||
|
self.id = self.name = None
|
||||||
|
self.next_style = None
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return self._hash
|
return self._hash
|
||||||
@ -57,15 +72,17 @@ class DOCXStyle(object):
|
|||||||
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':namespaces['w']})), pretty_print=True)
|
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':namespaces['w']})), pretty_print=True)
|
||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
def serialize_borders(self, bdr):
|
def serialize_borders(self, bdr, normal_style):
|
||||||
for edge in border_edges:
|
for edge in border_edges:
|
||||||
e = bdr.makeelement(w(edge))
|
e = bdr.makeelement(w(edge))
|
||||||
padding = getattr(self, 'padding_' + edge)
|
padding = getattr(self, 'padding_' + edge)
|
||||||
if padding > 0:
|
if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)):
|
||||||
e.set(w('space'), str(padding))
|
e.set(w('space'), str(padding))
|
||||||
width = getattr(self, 'border_%s_width' % edge)
|
width = getattr(self, 'border_%s_width' % edge)
|
||||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||||
if width > 0 and bstyle != 'none':
|
if (self is normal_style and width > 0 and bstyle != 'none'
|
||||||
|
) or width != getattr(normal_style, 'border_%s_width' % edge
|
||||||
|
) or bstyle != getattr(normal_style, 'border_%s_style' % edge):
|
||||||
e.set(w('val'), bstyle)
|
e.set(w('val'), bstyle)
|
||||||
e.set(w('sz'), str(width))
|
e.set(w('sz'), str(width))
|
||||||
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
|
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
|
||||||
@ -73,6 +90,17 @@ class DOCXStyle(object):
|
|||||||
bdr.append(e)
|
bdr.append(e)
|
||||||
return bdr
|
return bdr
|
||||||
|
|
||||||
|
def serialize(self, styles, normal_style):
|
||||||
|
style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
|
||||||
|
style.append(makeelement(style, 'name', val=self.name))
|
||||||
|
if self is normal_style:
|
||||||
|
style.set(w('default'), '1')
|
||||||
|
style.append(makeelement(style, 'qFormat'))
|
||||||
|
else:
|
||||||
|
style.append(makeelement(style, 'basedOn', val=normal_style.id))
|
||||||
|
styles.append(style)
|
||||||
|
return style
|
||||||
|
|
||||||
LINE_STYLES = {
|
LINE_STYLES = {
|
||||||
'none' : 'none',
|
'none' : 'none',
|
||||||
'hidden': 'none',
|
'hidden': 'none',
|
||||||
@ -86,15 +114,13 @@ LINE_STYLES = {
|
|||||||
'outset': 'outset',
|
'outset': 'outset',
|
||||||
}
|
}
|
||||||
|
|
||||||
def w(x):
|
|
||||||
return '{%s}%s' % (namespaces['w'], x)
|
|
||||||
|
|
||||||
class TextStyle(DOCXStyle):
|
class TextStyle(DOCXStyle):
|
||||||
|
|
||||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||||
'shadow', 'small_caps', 'spacing', 'vertical_align') + tuple(
|
'shadow', 'small_caps', 'spacing', 'vertical_align') + tuple(
|
||||||
x%edge for edge in border_edges for x in border_props)
|
x%edge for edge in border_edges for x in border_props)
|
||||||
|
TYPE = 'character'
|
||||||
|
|
||||||
def __init__(self, css):
|
def __init__(self, css):
|
||||||
self.font_family = css_font_family_to_docx(css['font-family'])
|
self.font_family = css_font_family_to_docx(css['font-family'])
|
||||||
@ -131,41 +157,51 @@ class TextStyle(DOCXStyle):
|
|||||||
|
|
||||||
DOCXStyle.__init__(self)
|
DOCXStyle.__init__(self)
|
||||||
|
|
||||||
def serialize(self, style):
|
def serialize(self, styles, normal_style):
|
||||||
style.append(style.makeelement(w('rFonts'), **{
|
style = DOCXStyle.serialize(self, styles, normal_style)
|
||||||
w(k):self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
|
||||||
for suffix in ('', 'Cs'):
|
if self is normal_style or self.font_family != normal_style.font_family:
|
||||||
style.append(style.makeelement(w('sz' + suffix), **{w('val'):str(self.font_size)}))
|
style.append(makeelement(
|
||||||
style.append(style.makeelement(w('b' + suffix), **{w('val'):('on' if self.bold else 'off')}))
|
style, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
||||||
style.append(style.makeelement(w('i' + suffix), **{w('val'):('on' if self.italic else 'off')}))
|
|
||||||
if self.color:
|
for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
|
||||||
style.append(style.makeelement(w('color'), **{w('val'):str(self.color)}))
|
val = getattr(self, attr)
|
||||||
if self.background_color:
|
if self is normal_style or getattr(normal_style, attr) != val:
|
||||||
style.append(style.makeelement(w('shd'), **{w('val'):str(self.background_color)}))
|
for suffix in ('', 'Cs'):
|
||||||
if self.underline:
|
style.append(makeelement(style, 'sz' + suffix, val=vmap(val)))
|
||||||
style.append(style.makeelement(w('u'), **{w('val'):'single'}))
|
|
||||||
if self.dstrike:
|
def check_attr(attr):
|
||||||
style.append(style.makeelement(w('dstrike'), **{w('val'):'on'}))
|
val = getattr(self, attr)
|
||||||
elif self.strike:
|
return (self is normal_style and val is not False and val is not None) or (val != getattr(normal_style, attr))
|
||||||
style.append(style.makeelement(w('strike'), **{w('val'):'on'}))
|
|
||||||
if self.caps:
|
if check_attr('color'):
|
||||||
style.append(style.makeelement(w('caps'), **{w('val'):'on'}))
|
style.append(makeelement(style, 'color', val=self.color or 'auto'))
|
||||||
if self.small_caps:
|
if check_attr('background_color'):
|
||||||
style.append(style.makeelement(w('smallCaps'), **{w('val'):'on'}))
|
style.append(makeelement(style, 'shd', fill=self.background_color or 'auto'))
|
||||||
if self.shadow:
|
if check_attr('underline'):
|
||||||
style.append(style.makeelement(w('shadow'), **{w('val'):'on'}))
|
style.append(makeelement(style, 'u', val='single' if self.underline else 'none'))
|
||||||
if self.spacing is not None:
|
if check_attr('dstrike'):
|
||||||
style.append(style.makeelement(w('spacing'), **{w('val'):str(self.spacing)}))
|
style.append(makeelement(style, 'dstrike', val=bmap(self.dstrike)))
|
||||||
|
if check_attr('strike'):
|
||||||
|
style.append(makeelement(style, 'strike', val=bmap(self.strike)))
|
||||||
|
if check_attr('caps'):
|
||||||
|
style.append(makeelement(style, 'caps', val=bmap(self.caps)))
|
||||||
|
if check_attr('small_caps'):
|
||||||
|
style.append(makeelement(style, 'smallCaps', val=bmap(self.small_caps)))
|
||||||
|
if check_attr('shadow'):
|
||||||
|
style.append(makeelement(style, 'shadow', val=bmap(self.shadow)))
|
||||||
|
if check_attr('spacing'):
|
||||||
|
style.append(makeelement(style, 'spacing', val=str(self.spacing or 0)))
|
||||||
if isinstance(self.vertical_align, (int, float)):
|
if isinstance(self.vertical_align, (int, float)):
|
||||||
val = int(self.vertical_align * 2)
|
val = int(self.vertical_align * 2)
|
||||||
style.append(style.makeelement(w('position'), **{w('val'):str(val)}))
|
style.append(makeelement(style, 'position', val=str(val)))
|
||||||
elif isinstance(self.vertical_align, basestring):
|
elif isinstance(self.vertical_align, basestring):
|
||||||
val = {'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(
|
val = {'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(
|
||||||
self.vertical_align.lower())
|
self.vertical_align.lower())
|
||||||
if val:
|
if val:
|
||||||
style.append(style.makeelement(w('vertAlign'), **{w('val'):val}))
|
style.append(makeelement(style, 'vertAlign', val=val))
|
||||||
|
|
||||||
bdr = self.serialize_borders(style.makeelement(w('bdr')))
|
bdr = self.serialize_borders(makeelement(style, 'bdr', normal_style))
|
||||||
if len(bdr):
|
if len(bdr):
|
||||||
style.append(bdr)
|
style.append(bdr)
|
||||||
|
|
||||||
@ -204,19 +240,24 @@ class BlockStyle(DOCXStyle):
|
|||||||
|
|
||||||
DOCXStyle.__init__(self)
|
DOCXStyle.__init__(self)
|
||||||
|
|
||||||
def serialize(self, style):
|
def serialize(self, styles, normal_style):
|
||||||
spacing = style.makeelement(w('spacing'))
|
style = DOCXStyle.serialize(self, styles, normal_style)
|
||||||
|
|
||||||
|
spacing = makeelement(style, 'spacing')
|
||||||
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
|
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
|
||||||
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
|
getter = attrgetter('css_margin_' + edge)
|
||||||
|
css_val, css_unit = parse_css_length(getter(self))
|
||||||
if css_unit in ('em', 'ex'):
|
if css_unit in ('em', 'ex'):
|
||||||
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||||
if lines > 0:
|
if (self is normal_style and lines > 0) or getter(self) != getter(normal_style):
|
||||||
spacing.set(w(attr + 'Lines'), str(lines))
|
spacing.set(w(attr + 'Lines'), str(lines))
|
||||||
else:
|
else:
|
||||||
val = getattr(self, 'margin_' + edge)
|
getter = attrgetter('margin_' + edge)
|
||||||
if val > 0:
|
val = getter(self)
|
||||||
|
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||||
spacing.set(w(attr), str(val))
|
spacing.set(w(attr), str(val))
|
||||||
if self.css_line_height != 'normal':
|
|
||||||
|
if (self is normal_style and self.css_line_height != 'normal') or self.css_line_height != normal_style.css_line_height:
|
||||||
try:
|
try:
|
||||||
css_val, css_unit = float(self.css_line_height), 'ratio'
|
css_val, css_unit = float(self.css_line_height), 'ratio'
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -226,50 +267,54 @@ class BlockStyle(DOCXStyle):
|
|||||||
val = int(css_val * 240 * mult)
|
val = int(css_val * 240 * mult)
|
||||||
spacing.set(w('line'), str(val))
|
spacing.set(w('line'), str(val))
|
||||||
else:
|
else:
|
||||||
spacing.set(w('line'), str(self.line_height))
|
spacing.set(w('line'), (0 if self.css_line_height == 'normal' else str(self.line_height)))
|
||||||
spacing.set(w('lineRule', 'exactly'))
|
spacing.set(w('lineRule', 'exactly'))
|
||||||
|
|
||||||
if spacing.attrib:
|
if spacing.attrib:
|
||||||
style.append(spacing)
|
style.append(spacing)
|
||||||
|
|
||||||
ind = style.makeelement(w('ind'))
|
ind = makeelement(style, 'ind')
|
||||||
for edge in ('left', 'right'):
|
for edge in ('left', 'right'):
|
||||||
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
|
getter = attrgetter('css_margin_' + edge)
|
||||||
|
css_val, css_unit = parse_css_length(getter(self))
|
||||||
if css_unit in ('em', 'ex'):
|
if css_unit in ('em', 'ex'):
|
||||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||||
if chars > 0:
|
if (self is normal_style and chars > 0) or getter(self) != getter(normal_style):
|
||||||
ind.set(w(edge + 'Chars'), str(chars))
|
ind.set(w(edge + 'Chars'), str(chars))
|
||||||
else:
|
else:
|
||||||
val = getattr(self, 'margin_' + edge)
|
getter = attrgetter('margin_' + edge)
|
||||||
if val > 0:
|
val = getter(self)
|
||||||
ind.set(w(attr), str(val))
|
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||||
|
ind.set(w(edge), str(val))
|
||||||
css_val, css_unit = parse_css_length(self.css_text_indent)
|
css_val, css_unit = parse_css_length(self.css_text_indent)
|
||||||
if css_unit in ('em', 'ex'):
|
if css_unit in ('em', 'ex'):
|
||||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||||
if chars > 0:
|
if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent:
|
||||||
ind.set('firstLineChars', str(chars))
|
ind.set('firstLineChars', str(chars))
|
||||||
else:
|
else:
|
||||||
val = self.text_indent
|
val = self.text_indent
|
||||||
if val > 0:
|
if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent:
|
||||||
ind.set('firstLine', str(val))
|
ind.set('firstLine', str(val))
|
||||||
if ind.attrib:
|
if ind.attrib:
|
||||||
style.append(ind)
|
style.append(ind)
|
||||||
|
|
||||||
if self.background_color:
|
if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
|
||||||
shd = style.makeelement(w('shd'))
|
makeelement(style, 'shd', val='clear', color='auto', fill=self.background_color or 'auto')
|
||||||
style.append(shd)
|
|
||||||
shd.set(w('val'), 'clear'), shd.set(w('fill'), self.background_color), shd.set(w('color'), 'auto')
|
|
||||||
|
|
||||||
pbdr = self.serialize_borders(style.makeelement(w('pBdr')))
|
pbdr = self.serialize_borders(style.makeelement(w('pBdr')), normal_style)
|
||||||
if len(pbdr):
|
if len(pbdr):
|
||||||
style.append(pbdr)
|
style.append(pbdr)
|
||||||
jc = style.makeelement(w('jc'))
|
|
||||||
jc.set(w('val'), self.text_align)
|
if self is normal_style or self.text_align != normal_style.text_align:
|
||||||
style.append(jc)
|
style.append(makeelement(style, 'jc', val=self.text_align))
|
||||||
if self.page_break_before:
|
|
||||||
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
|
if (self is normal_style and self.page_break_before) or self.page_break_before != normal_style.page_break_before:
|
||||||
if self.keep_lines:
|
style.append(makeelement(style, 'pageBreakBefore', bmap(self.page_break_before)))
|
||||||
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
|
if (self is normal_style and self.keep_lines) or self.keep_lines != normal_style.keep_lines:
|
||||||
|
style.append(makeelement(style, 'keepLines', bmap(self.keep_lines)))
|
||||||
|
|
||||||
|
if self is not normal_style and self.next_style is not None:
|
||||||
|
style.append(style.makeelement(w('next'), **{w('val'):self.next_style}))
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
|
||||||
@ -295,3 +340,29 @@ class StylesManager(object):
|
|||||||
else:
|
else:
|
||||||
ans = existing
|
ans = existing
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def finalize(self, blocks):
|
||||||
|
block_counts, run_counts = Counter(), Counter()
|
||||||
|
block_rmap, run_rmap = defaultdict(list), defaultdict(list)
|
||||||
|
for block in blocks:
|
||||||
|
block_counts[block.style] += 1
|
||||||
|
block_rmap[block.style].append(block)
|
||||||
|
for run in block.runs:
|
||||||
|
run_counts[run.style] += 1
|
||||||
|
run_rmap[run.style].append(run)
|
||||||
|
for i, (block_style, count) in enumerate(block_counts.most_common()):
|
||||||
|
if i == 0:
|
||||||
|
normal_block_style = block_style
|
||||||
|
normal_block_style.id = 'BlockNormal'
|
||||||
|
normal_block_style.name = 'Normal'
|
||||||
|
else:
|
||||||
|
block_style.id = 'Block%d' % i
|
||||||
|
block_style.name = 'Paragraph %d' % i
|
||||||
|
for i, (text_style, count) in enumerate(run_counts.most_common()):
|
||||||
|
if i == 0:
|
||||||
|
normal_text_style = text_style
|
||||||
|
normal_text_style.id = 'TextNormal'
|
||||||
|
normal_text_style.name = 'Normal'
|
||||||
|
else:
|
||||||
|
block_style.id = 'Text%d' % i
|
||||||
|
block_style.name = 'Text %d' % i
|
||||||
|
Loading…
x
Reference in New Issue
Block a user