mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on DOCX input
This commit is contained in:
parent
876c0de8e7
commit
bcb19457e2
265
src/calibre/ebooks/docx/block_styles.py
Normal file
265
src/calibre/ebooks/docx/block_styles.py
Normal file
@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import OrderedDict
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
class Inherit:
|
||||
pass
|
||||
inherit = Inherit()
|
||||
|
||||
def binary_property(parent, name):
|
||||
vals = XPath('./w:%s' % name)(parent)
|
||||
if not vals:
|
||||
return inherit
|
||||
val = get(vals[0], 'w:val', 'on')
|
||||
return True if val in {'on', '1', 'true'} else False
|
||||
|
||||
def simple_color(col, auto='black'):
|
||||
if not col or col == 'auto' or len(col) != 6:
|
||||
return auto
|
||||
return '#'+col
|
||||
|
||||
def simple_float(val, mult=1.0):
|
||||
try:
|
||||
return float(val) * mult
|
||||
except (ValueError, TypeError, AttributeError, KeyError):
|
||||
return None
|
||||
|
||||
|
||||
LINE_STYLES = { # {{{
|
||||
'basicBlackDashes': 'dashed',
|
||||
'basicBlackDots': 'dotted',
|
||||
'basicBlackSquares': 'dashed',
|
||||
'basicThinLines': 'solid',
|
||||
'dashDotStroked': 'groove',
|
||||
'dashed': 'dashed',
|
||||
'dashSmallGap': 'dashed',
|
||||
'dotDash': 'dashed',
|
||||
'dotDotDash': 'dashed',
|
||||
'dotted': 'dotted',
|
||||
'double': 'double',
|
||||
'inset': 'inset',
|
||||
'nil': 'none',
|
||||
'none': 'none',
|
||||
'outset': 'outset',
|
||||
'single': 'solid',
|
||||
'thick': 'solid',
|
||||
'thickThinLargeGap': 'double',
|
||||
'thickThinMediumGap': 'double',
|
||||
'thickThinSmallGap' : 'double',
|
||||
'thinThickLargeGap': 'double',
|
||||
'thinThickMediumGap': 'double',
|
||||
'thinThickSmallGap': 'double',
|
||||
'thinThickThinLargeGap': 'double',
|
||||
'thinThickThinMediumGap': 'double',
|
||||
'thinThickThinSmallGap': 'double',
|
||||
'threeDEmboss': 'ridge',
|
||||
'threeDEngrave': 'groove',
|
||||
'triple': 'double',
|
||||
} # }}}
|
||||
|
||||
# Read from XML {{{
|
||||
def read_border(parent, dest):
|
||||
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
||||
'border_%s_style':inherit, 'border_%s_color':inherit}
|
||||
vals = {}
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
vals.update({k % edge:v for k, v in tvals.iteritems()})
|
||||
|
||||
for border in XPath('./w:pBdr')(parent):
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
for elem in XPath('./w:%s' % edge):
|
||||
color = get(elem, 'w:color')
|
||||
if color is not None:
|
||||
vals['border_%s_color' % edge] = simple_color(color)
|
||||
style = get(elem, 'w:val')
|
||||
if style is not None:
|
||||
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
vals['padding_%s' % edge] = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
for key, val in vals.iteritems():
|
||||
setattr(dest, key, val)
|
||||
|
||||
def read_indent(parent, dest):
|
||||
padding_left = padding_right = text_indent = inherit
|
||||
for indent in XPath('./w:ind')(parent):
|
||||
l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
|
||||
pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
|
||||
if pl is not None:
|
||||
padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
|
||||
|
||||
r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
|
||||
pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
|
||||
if pr is not None:
|
||||
padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
|
||||
|
||||
h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
|
||||
fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
|
||||
ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
|
||||
simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
|
||||
if ti is not None:
|
||||
text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
|
||||
|
||||
setattr(dest, 'margin_left', padding_left)
|
||||
setattr(dest, 'margin_right', padding_right)
|
||||
setattr(dest, 'text_indent', text_indent)
|
||||
|
||||
def read_justification(parent, dest):
|
||||
ans = inherit
|
||||
for jc in XPath('./w:jc[@w:val]')(parent):
|
||||
val = get(jc, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
|
||||
ans = 'justify'
|
||||
if val in {'left', 'center', 'right',}:
|
||||
ans = val
|
||||
setattr(dest, 'text_align', ans)
|
||||
|
||||
def read_spacing(parent, dest):
|
||||
padding_top = padding_bottom = line_height = inherit
|
||||
for s in XPath('./w:spacing')(parent):
|
||||
a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
|
||||
pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
|
||||
if pb is not None:
|
||||
padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
|
||||
|
||||
b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
|
||||
pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
|
||||
if pt is not None:
|
||||
padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
|
||||
|
||||
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
|
||||
if l is not None:
|
||||
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
|
||||
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
|
||||
|
||||
setattr(dest, 'margin_top', padding_top)
|
||||
setattr(dest, 'margin_bottom', padding_bottom)
|
||||
setattr(dest, 'line_height', line_height)
|
||||
|
||||
def read_direction(parent, dest):
|
||||
ans = inherit
|
||||
for jc in XPath('./w:textFlow[@w:val]')(parent):
|
||||
val = get(jc, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if 'rl' in val.lower():
|
||||
ans = 'rtl'
|
||||
setattr(dest, 'direction', ans)
|
||||
|
||||
def read_shd(parent, dest):
|
||||
ans = inherit
|
||||
for shd in XPath('./w:shd[@w:fill]')(parent):
|
||||
val = get(shd, 'w:fill')
|
||||
if val:
|
||||
ans = simple_color(val, auto='transparent')
|
||||
setattr(dest, 'background_color', ans)
|
||||
# }}}
|
||||
|
||||
class ParagraphStyle(object):
|
||||
|
||||
all_properties = (
|
||||
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||
|
||||
# Border margins padding
|
||||
'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
|
||||
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
|
||||
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
|
||||
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
|
||||
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
|
||||
|
||||
# Misc.
|
||||
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||
)
|
||||
|
||||
def __init__(self, pPr=None):
|
||||
self.linked_style = None
|
||||
if pPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
for p in (
|
||||
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||
):
|
||||
setattr(self, p, binary_property(pPr, p))
|
||||
|
||||
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(pPr, self)
|
||||
|
||||
for s in XPath('./w:pStyle[@w:val]')(pPr):
|
||||
self.linked_style = get(s, 'w:val')
|
||||
|
||||
self._css = None
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
if other.linked_style is not None:
|
||||
self.linked_style = other.linked_style
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
for p in self.all_properties:
|
||||
val = getattr(self, p)
|
||||
if val is inherit:
|
||||
setattr(self, p, getattr(parent, p))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
if self._css is None:
|
||||
self._css = c = OrderedDict()
|
||||
if self.keepLines is True:
|
||||
c['page-break-inside'] = 'avoid'
|
||||
if self.pageBreakBefore is True:
|
||||
c['page-break-before'] = 'always'
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
val = getattr(self, 'border_%s_width' % edge)
|
||||
if val is not inherit:
|
||||
c['border-left-width'] = '%.3gpt' % val
|
||||
for x in ('style', 'color'):
|
||||
val = getattr(self, 'border_%s_%s' % (edge, x))
|
||||
if val is not inherit:
|
||||
c['border-%s-%s' % (edge, x)] = val
|
||||
val = getattr(self, 'padding_%s' % edge)
|
||||
if val is not inherit:
|
||||
c['padding-%s' % edge] = '%.3gpt' % val
|
||||
val = getattr(self, 'margin_%s' % edge)
|
||||
if val is not inherit:
|
||||
c['margin-%s' % edge] = val
|
||||
|
||||
for x in ('text_indent', 'text_align', 'line_height', 'background_color'):
|
||||
val = getattr(self, x)
|
||||
if val is not inherit:
|
||||
c[x.replace('_', '-')] = val
|
||||
return self._css
|
||||
|
||||
# TODO: keepNext must be done at markup level
|
||||
|
||||
|
228
src/calibre/ebooks/docx/char_styles.py
Normal file
228
src/calibre/ebooks/docx/char_styles.py
Normal file
@ -0,0 +1,228 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import OrderedDict
|
||||
from calibre.ebooks.docx.block_styles import ( # noqa
|
||||
inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
# Read from XML {{{
|
||||
def read_text_border(parent, dest):
|
||||
border_color = border_style = border_width = padding = inherit
|
||||
elems = XPath('./w:bdr')(parent)
|
||||
if elems:
|
||||
border_color = simple_color('auto')
|
||||
border_style = 'solid'
|
||||
border_width = 1
|
||||
for elem in elems:
|
||||
color = get(elem, 'w:color')
|
||||
if color is not None:
|
||||
border_color = simple_color(color)
|
||||
style = get(elem, 'w:val')
|
||||
if style is not None:
|
||||
border_style = LINE_STYLES.get(style, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
padding = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
border_width = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
setattr(dest, 'border_color', border_color)
|
||||
setattr(dest, 'border_style', border_style)
|
||||
setattr(dest, 'border_width', border_width)
|
||||
setattr(dest, 'padding', padding)
|
||||
|
||||
def read_color(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:color[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
ans = simple_color(val)
|
||||
setattr(dest, 'color', ans)
|
||||
|
||||
def read_highlight(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:highlight[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if not val or val == 'none':
|
||||
val = 'transparent'
|
||||
ans = val
|
||||
setattr(dest, 'highlight', ans)
|
||||
|
||||
def read_lang(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:lang[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
try:
|
||||
code = int(val, 16)
|
||||
except (ValueError, TypeError):
|
||||
ans = val
|
||||
else:
|
||||
from calibre.ebooks.docx.lcid import lcid
|
||||
val = lcid.get(code, None)
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'lang', ans)
|
||||
|
||||
def read_letter_spacing(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:spacing[@w:val]')(parent):
|
||||
val = simple_float(get(col, 'w:val'), 0.05)
|
||||
if val is not None:
|
||||
ans = val
|
||||
setattr(dest, 'letter_spacing', ans)
|
||||
|
||||
def read_sz(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:sz[@w:val]')(parent):
|
||||
val = simple_float(get(col, 'w:val'), 0.5)
|
||||
if val is not None:
|
||||
ans = val
|
||||
setattr(dest, 'font_size', ans)
|
||||
|
||||
def read_underline(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:u[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if val:
|
||||
ans = 'underline'
|
||||
setattr(dest, 'text_decoration', ans)
|
||||
|
||||
def read_vert_align(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:vertAlign[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if val and val in {'baseline', 'subscript', 'superscript'}:
|
||||
ans = val
|
||||
setattr(dest, 'vert_align', ans)
|
||||
# }}}
|
||||
|
||||
class RunStyle(object):
|
||||
|
||||
all_properties = {
|
||||
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
|
||||
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
|
||||
|
||||
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
|
||||
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
|
||||
}
|
||||
|
||||
toggle_properties = {
|
||||
'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'vanish',
|
||||
}
|
||||
|
||||
def __init__(self, rPr=None):
|
||||
self.linked_style = None
|
||||
if rPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
for p in (
|
||||
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
|
||||
'smallCaps', 'strike', 'vanish',
|
||||
):
|
||||
setattr(self, p, binary_property(rPr, p))
|
||||
|
||||
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(rPr, self)
|
||||
|
||||
for s in XPath('./w:rStyle[@w:val]')(rPr):
|
||||
self.linked_style = get(s, 'w:val')
|
||||
|
||||
self._css = None
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
if other.linked_style is not None:
|
||||
self.linked_style = other.linked_style
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
for p in self.all_properties:
|
||||
val = getattr(self, p)
|
||||
if val is inherit:
|
||||
setattr(self, p, getattr(parent, p))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
if self._css is None:
|
||||
c = self._css = OrderedDict()
|
||||
td = set()
|
||||
if self.text_decoration is not inherit:
|
||||
td.add(self.text_decoration)
|
||||
if self.strike:
|
||||
td.add('line-through')
|
||||
if self.dstrike:
|
||||
td.add('overline')
|
||||
td.add('line-through')
|
||||
if td:
|
||||
c['text-decoration'] = ' '.join(td)
|
||||
if self.caps is True:
|
||||
c['text-transform'] = 'uppercase'
|
||||
if self.i is True:
|
||||
c['font-style'] = 'italic'
|
||||
if self.shadow:
|
||||
c['text-shadow'] = '2px 2px'
|
||||
if self.smallCaps is True:
|
||||
c['font-variant'] = 'small-caps'
|
||||
if self.vanish is True:
|
||||
c['display'] = 'none'
|
||||
|
||||
for x in ('color', 'style', 'width'):
|
||||
val = getattr(self, 'border_'+x)
|
||||
if x == 'width' and val is not inherit:
|
||||
val = '%.3gpt' % val
|
||||
if val is not inherit:
|
||||
c['border-%s' % x] = val
|
||||
if self.padding is not inherit:
|
||||
c['padding'] = '%.3gpt' % self.padding
|
||||
|
||||
for x in ('color', 'background_color'):
|
||||
val = getattr(self, x)
|
||||
if val is not inherit:
|
||||
c[x.replace('_', '-')] = val
|
||||
|
||||
for x in ('letter_spacing', 'font_size'):
|
||||
val = getattr(self, x)
|
||||
if val is not inherit:
|
||||
c[x.replace('_', '-')] = '%.3gpt' % val
|
||||
|
||||
if self.highlight is not inherit and self.highlight != 'transparent':
|
||||
c['background-color'] = self.highlight
|
||||
return self._css
|
||||
|
||||
def same_border(self, other):
|
||||
for x in (self, other):
|
||||
has_border = False
|
||||
for y in ('color', 'style', 'width'):
|
||||
if ('border-%s' % y) in x.css:
|
||||
has_border = True
|
||||
break
|
||||
if not has_border:
|
||||
return False
|
||||
|
||||
s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
|
||||
o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
|
||||
return s == o
|
||||
|
@ -6,356 +6,23 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import OrderedDict
|
||||
from collections import OrderedDict, Counter
|
||||
|
||||
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
|
||||
from calibre.ebooks.docx.char_styles import RunStyle
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
class Inherit:
|
||||
pass
|
||||
inherit = Inherit()
|
||||
|
||||
def binary_property(parent, name):
|
||||
vals = XPath('./w:%s' % name)(parent)
|
||||
if not vals:
|
||||
return inherit
|
||||
val = get(vals[0], 'w:val', 'on')
|
||||
return True if val in {'on', '1', 'true'} else False
|
||||
|
||||
def simple_color(col, auto='black'):
|
||||
if not col or col == 'auto' or len(col) != 6:
|
||||
return auto
|
||||
return '#'+col
|
||||
|
||||
def simple_float(val, mult=1.0):
|
||||
try:
|
||||
return float(val) * mult
|
||||
except (ValueError, TypeError, AttributeError, KeyError):
|
||||
return None
|
||||
|
||||
# Block styles {{{
|
||||
|
||||
LINE_STYLES = { # {{{
|
||||
'basicBlackDashes': 'dashed',
|
||||
'basicBlackDots': 'dotted',
|
||||
'basicBlackSquares': 'dashed',
|
||||
'basicThinLines': 'solid',
|
||||
'dashDotStroked': 'groove',
|
||||
'dashed': 'dashed',
|
||||
'dashSmallGap': 'dashed',
|
||||
'dotDash': 'dashed',
|
||||
'dotDotDash': 'dashed',
|
||||
'dotted': 'dotted',
|
||||
'double': 'double',
|
||||
'inset': 'inset',
|
||||
'nil': 'none',
|
||||
'none': 'none',
|
||||
'outset': 'outset',
|
||||
'single': 'solid',
|
||||
'thick': 'solid',
|
||||
'thickThinLargeGap': 'double',
|
||||
'thickThinMediumGap': 'double',
|
||||
'thickThinSmallGap' : 'double',
|
||||
'thinThickLargeGap': 'double',
|
||||
'thinThickMediumGap': 'double',
|
||||
'thinThickSmallGap': 'double',
|
||||
'thinThickThinLargeGap': 'double',
|
||||
'thinThickThinMediumGap': 'double',
|
||||
'thinThickThinSmallGap': 'double',
|
||||
'threeDEmboss': 'ridge',
|
||||
'threeDEngrave': 'groove',
|
||||
'triple': 'double',
|
||||
} # }}}
|
||||
|
||||
def read_border(parent, dest):
|
||||
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
||||
'border_%s_style':inherit, 'border_%s_color':inherit}
|
||||
vals = {}
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
vals.update({k % edge:v for k, v in tvals.iteritems()})
|
||||
|
||||
for border in XPath('./w:pBdr')(parent):
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
for elem in XPath('./w:%s' % edge):
|
||||
color = get(elem, 'w:color')
|
||||
if color is not None:
|
||||
vals['border_%s_color' % edge] = simple_color(color)
|
||||
style = get(elem, 'w:val')
|
||||
if style is not None:
|
||||
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
vals['padding_%s' % edge] = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
for key, val in vals.iteritems():
|
||||
setattr(dest, key, val)
|
||||
|
||||
def read_indent(parent, dest):
|
||||
padding_left = padding_right = text_indent = inherit
|
||||
for indent in XPath('./w:ind')(parent):
|
||||
l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
|
||||
pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
|
||||
if pl is not None:
|
||||
padding_left = '%.3f%s' % (pl, 'em' if lc is not None else 'pt')
|
||||
|
||||
r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
|
||||
pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
|
||||
if pr is not None:
|
||||
padding_right = '%.3f%s' % (pr, 'em' if rc is not None else 'pt')
|
||||
|
||||
h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
|
||||
fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
|
||||
ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
|
||||
simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
|
||||
if ti is not None:
|
||||
text_indent = '%.3f%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
|
||||
|
||||
setattr(dest, 'margin_left', padding_left)
|
||||
setattr(dest, 'margin_right', padding_right)
|
||||
setattr(dest, 'text_indent', text_indent)
|
||||
|
||||
def read_justification(parent, dest):
|
||||
ans = inherit
|
||||
for jc in XPath('./w:jc[@w:val]')(parent):
|
||||
val = get(jc, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
|
||||
ans = 'justify'
|
||||
if val in {'left', 'center', 'right',}:
|
||||
ans = val
|
||||
setattr(dest, 'text_align', ans)
|
||||
|
||||
def read_spacing(parent, dest):
|
||||
padding_top = padding_bottom = line_height = inherit
|
||||
for s in XPath('./w:spacing')(parent):
|
||||
a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
|
||||
pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
|
||||
if pb is not None:
|
||||
padding_bottom = '%.3f%s' % (pb, 'ex' if al is not None else 'pt')
|
||||
|
||||
b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
|
||||
pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
|
||||
if pt is not None:
|
||||
padding_top = '%.3f%s' % (pt, 'ex' if bl is not None else 'pt')
|
||||
|
||||
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
|
||||
if l is not None:
|
||||
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
|
||||
line_height = '%.3f%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
|
||||
|
||||
setattr(dest, 'margin_top', padding_top)
|
||||
setattr(dest, 'margin_bottom', padding_bottom)
|
||||
setattr(dest, 'line_height', line_height)
|
||||
|
||||
def read_direction(parent, dest):
|
||||
ans = inherit
|
||||
for jc in XPath('./w:textFlow[@w:val]')(parent):
|
||||
val = get(jc, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if 'rl' in val.lower():
|
||||
ans = 'rtl'
|
||||
setattr(dest, 'direction', ans)
|
||||
|
||||
def read_shd(parent, dest):
|
||||
ans = inherit
|
||||
for shd in XPath('./w:shd[@w:fill]')(parent):
|
||||
val = get(shd, 'w:fill')
|
||||
if val:
|
||||
ans = simple_color(val, auto='transparent')
|
||||
setattr(dest, 'background_color', ans)
|
||||
|
||||
class ParagraphStyle(object):
|
||||
|
||||
all_properties = (
|
||||
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||
|
||||
# Border margins padding
|
||||
'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
|
||||
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
|
||||
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
|
||||
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
|
||||
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
|
||||
|
||||
# Misc.
|
||||
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||
)
|
||||
|
||||
def __init__(self, pPr):
|
||||
for p in (
|
||||
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||
):
|
||||
setattr(self, p, binary_property(pPr, p))
|
||||
|
||||
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(pPr, self)
|
||||
|
||||
# TODO: numPr and outlineLvl
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
|
||||
# }}}
|
||||
|
||||
# Character styles {{{
|
||||
def read_text_border(parent, dest):
|
||||
border_color = border_style = border_width = padding = inherit
|
||||
elems = XPath('./w:bdr')(parent)
|
||||
if elems:
|
||||
border_color = simple_color('auto')
|
||||
border_style = 'solid'
|
||||
border_width = 1
|
||||
for elem in elems:
|
||||
color = get(elem, 'w:color')
|
||||
if color is not None:
|
||||
border_color = simple_color(color)
|
||||
style = get(elem, 'w:val')
|
||||
if style is not None:
|
||||
border_style = LINE_STYLES.get(style, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
padding = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
border_width = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
setattr(dest, 'border_color', border_color)
|
||||
setattr(dest, 'border_style', border_style)
|
||||
setattr(dest, 'border_width', border_width)
|
||||
setattr(dest, 'padding', padding)
|
||||
|
||||
def read_color(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:color[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
ans = simple_color(val)
|
||||
setattr(dest, 'color', ans)
|
||||
|
||||
def read_highlight(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:highlight[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if not val or val == 'none':
|
||||
val = 'transparent'
|
||||
ans = val
|
||||
setattr(dest, 'highlight', ans)
|
||||
|
||||
def read_lang(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:lang[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
try:
|
||||
code = int(val, 16)
|
||||
except (ValueError, TypeError):
|
||||
ans = val
|
||||
else:
|
||||
from calibre.ebooks.docx.lcid import lcid
|
||||
val = lcid.get(code, None)
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'lang', ans)
|
||||
|
||||
def read_letter_spacing(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:spacing[@w:val]')(parent):
|
||||
val = simple_float(get(col, 'w:val'), 0.05)
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'letter_spacing', ans)
|
||||
|
||||
def read_sz(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:sz[@w:val]')(parent):
|
||||
val = simple_float(get(col, 'w:val'), 0.5)
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'font_size', ans)
|
||||
|
||||
def read_underline(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:u[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if val:
|
||||
ans = 'underline'
|
||||
setattr(dest, 'text_decoration', ans)
|
||||
|
||||
def read_vert_align(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:vertAlign[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if val and val in {'baseline', 'subscript', 'superscript'}:
|
||||
ans = val
|
||||
setattr(dest, 'vert_align', ans)
|
||||
|
||||
|
||||
class RunStyle(object):
|
||||
|
||||
all_properties = (
|
||||
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
|
||||
'smallCaps', 'strike', 'vanish',
|
||||
|
||||
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background-color',
|
||||
'letter_spacing', 'font_size', 'text_decoration', 'vert_align',
|
||||
)
|
||||
|
||||
def __init__(self, rPr):
|
||||
for p in (
|
||||
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
|
||||
'smallCaps', 'strike', 'vanish',
|
||||
):
|
||||
setattr(self, p, binary_property(rPr, p))
|
||||
|
||||
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(rPr, self)
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
# }}}
|
||||
|
||||
class Style(object):
|
||||
'''
|
||||
Class representing a <w:style> element. Can contain block, character, etc. styles.
|
||||
'''
|
||||
|
||||
name_path = XPath('./w:name[@w:val]')
|
||||
based_on_path = XPath('./w:basedOn[@w:val]')
|
||||
link_path = XPath('./w:link[@w:val]')
|
||||
|
||||
def __init__(self, elem):
|
||||
self.resolved = False
|
||||
self.style_id = get(elem, 'w:styleId')
|
||||
self.style_type = get(elem, 'w:type')
|
||||
names = self.name_path(elem)
|
||||
@ -364,10 +31,6 @@ class Style(object):
|
||||
self.based_on = get(based_on[0], 'w:val') if based_on else None
|
||||
if self.style_type == 'numbering':
|
||||
self.based_on = None
|
||||
link = self.link_path(elem)
|
||||
self.link = get(link[0], 'w:val') if link else None
|
||||
if self.style_type not in {'paragraph', 'character'}:
|
||||
self.link = None
|
||||
|
||||
self.paragraph_style = self.character_style = None
|
||||
|
||||
@ -387,11 +50,30 @@ class Style(object):
|
||||
else:
|
||||
self.character_style.update(rs)
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
if parent.paragraph_style is not None:
|
||||
if self.paragraph_style is None:
|
||||
self.paragraph_style = ParagraphStyle()
|
||||
self.paragraph_style.resolve_based_on(parent.paragraph_style)
|
||||
if parent.character_style is not None:
|
||||
if self.character_style is None:
|
||||
self.character_style = RunStyle()
|
||||
self.character_style.resolve_based_on(parent.character_style)
|
||||
|
||||
|
||||
class Styles(object):
|
||||
|
||||
'''
|
||||
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
self.id_map = OrderedDict()
|
||||
self.para_cache = {}
|
||||
self.para_char_cache = {}
|
||||
self.run_cache = {}
|
||||
self.classes = {}
|
||||
self.counter = Counter()
|
||||
|
||||
def __iter__(self):
|
||||
for s in self.id_map.itervalues():
|
||||
@ -412,19 +94,160 @@ class Styles(object):
|
||||
if s.style_id:
|
||||
self.id_map[s.style_id] = s
|
||||
|
||||
# Nuke based_on, link attributes that refer to missing/incompatible
|
||||
# styles
|
||||
self.default_paragraph_style = self.default_character_style = None
|
||||
|
||||
for dd in XPath('./w:docDefaults')(root):
|
||||
for pd in XPath('./w:pPrDefault')(dd):
|
||||
for pPr in XPath('./w:pPr')(pd):
|
||||
ps = ParagraphStyle(pPr)
|
||||
if self.default_paragraph_style is None:
|
||||
self.default_paragraph_style = ps
|
||||
else:
|
||||
self.default_paragraph_style.update(ps)
|
||||
for pd in XPath('./w:rPrDefault')(dd):
|
||||
for pPr in XPath('./w:rPr')(pd):
|
||||
ps = RunStyle(pPr)
|
||||
if self.default_character_style is None:
|
||||
self.default_character_style = ps
|
||||
else:
|
||||
self.default_character_style.update(ps)
|
||||
|
||||
def resolve(s, p):
|
||||
if p is not None:
|
||||
if not p.resolved:
|
||||
resolve(p, self.get(p.based_on))
|
||||
s.resolve_based_on(p)
|
||||
s.resolved = True
|
||||
|
||||
for s in self:
|
||||
bo = s.based_on
|
||||
if bo is not None:
|
||||
p = self.get(bo)
|
||||
if p is None or p.style_type != s.style_type:
|
||||
s.based_on = None
|
||||
link = s.link
|
||||
if link is not None:
|
||||
p = self.get(link)
|
||||
if p is None or (s.style_type, p.style_type) not in {('paragraph', 'character'), ('character', 'paragraph')}:
|
||||
s.link = None
|
||||
if not s.resolved:
|
||||
resolve(s, self.get(s.based_on))
|
||||
|
||||
# TODO: Document defaults (docDefaults)
|
||||
def para_val(self, parent_styles, direct_formatting, attr):
|
||||
val = getattr(direct_formatting, attr)
|
||||
if val is inherit:
|
||||
for ps in reversed(parent_styles):
|
||||
pval = getattr(ps, attr)
|
||||
if pval is not inherit:
|
||||
val = pval
|
||||
break
|
||||
return val
|
||||
|
||||
def run_val(self, parent_styles, direct_formatting, attr):
|
||||
val = getattr(direct_formatting, attr)
|
||||
if val is not inherit:
|
||||
return val
|
||||
if attr in direct_formatting.toggle_properties:
|
||||
val = False
|
||||
for rs in parent_styles:
|
||||
pval = getattr(rs, attr)
|
||||
if pval is True:
|
||||
val ^= True
|
||||
return val
|
||||
for rs in reversed(parent_styles):
|
||||
rval = getattr(rs, attr)
|
||||
if rval is not inherit:
|
||||
return rval
|
||||
return val
|
||||
|
||||
def resolve_paragraph(self, p):
|
||||
ans = self.para_cache.get(p, None)
|
||||
if ans is None:
|
||||
ans = self.para_cache[p] = ParagraphStyle()
|
||||
ans.style_name = None
|
||||
direct_formatting = None
|
||||
for pPr in XPath('./w:pPr')(p):
|
||||
ps = ParagraphStyle(pPr)
|
||||
if direct_formatting is None:
|
||||
direct_formatting = ps
|
||||
else:
|
||||
direct_formatting.update(ps)
|
||||
|
||||
if direct_formatting is None:
|
||||
direct_formatting = ParagraphStyle()
|
||||
parent_styles = []
|
||||
if self.default_paragraph_style is not None:
|
||||
parent_styles.append(self.default_paragraph_style)
|
||||
if direct_formatting.linked_style is not None:
|
||||
ls = self.get(direct_formatting.linked_style)
|
||||
if ls is not None:
|
||||
ans.style_name = ls.name
|
||||
ps = ls.paragraph_style
|
||||
if ps is not None:
|
||||
parent_styles.append(ps)
|
||||
if ls.character_style is not None:
|
||||
self.para_char_cache[p] = ls.character_style
|
||||
|
||||
for attr in ans.all_properties:
|
||||
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
|
||||
return ans
|
||||
|
||||
def resolve_run(self, r):
|
||||
ans = self.run_cache.get(r, None)
|
||||
if ans is None:
|
||||
p = r.getparent()
|
||||
ans = self.run_cache[r] = RunStyle()
|
||||
direct_formatting = None
|
||||
for rPr in XPath('./w:rPr')(r):
|
||||
rs = RunStyle(rPr)
|
||||
if direct_formatting is None:
|
||||
direct_formatting = rs
|
||||
else:
|
||||
direct_formatting.update(rs)
|
||||
|
||||
if direct_formatting is None:
|
||||
direct_formatting = RunStyle()
|
||||
|
||||
parent_styles = []
|
||||
if self.default_character_style is not None:
|
||||
parent_styles.append(self.default_character_style)
|
||||
pstyle = self.para_char_cache.get(p, None)
|
||||
if pstyle is not None:
|
||||
parent_styles.append(pstyle)
|
||||
if direct_formatting.linked_style is not None:
|
||||
ls = self.get(direct_formatting.linked_style).character_style
|
||||
if ls is not None:
|
||||
parent_styles.append(ls)
|
||||
|
||||
for attr in ans.all_properties:
|
||||
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
|
||||
|
||||
return ans
|
||||
|
||||
def resolve(self, obj):
|
||||
if obj.tag.endswith('}p'):
|
||||
return self.resolve_paragraph(obj)
|
||||
if obj.tag.endswith('}r'):
|
||||
return self.resolve_run(obj)
|
||||
|
||||
def register(self, css, prefix):
|
||||
h = hash(tuple(css.iteritems()))
|
||||
ans, _ = self.classes.get(h, (None, None))
|
||||
if ans is None:
|
||||
self.counter[prefix] += 1
|
||||
ans = '%s_%d' % (prefix, self.counter[prefix])
|
||||
self.classes[h] = (ans, css)
|
||||
return ans
|
||||
|
||||
def generate_classes(self):
|
||||
for bs in self.para_cache.itervalues():
|
||||
css = bs.css
|
||||
if css:
|
||||
self.register(css, 'block')
|
||||
for bs in self.run_cache.itervalues():
|
||||
css = bs.css
|
||||
if css:
|
||||
self.register(css, 'text')
|
||||
|
||||
def class_name(self, css):
|
||||
h = hash(tuple(css.iteritems()))
|
||||
return self.classes.get(h, (None, None))[0]
|
||||
|
||||
def generate_css(self):
|
||||
ans = []
|
||||
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
|
||||
b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
|
||||
b = '\n'.join(b)
|
||||
ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
|
||||
return '\n'.join(ans)
|
||||
|
||||
|
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import sys, os
|
||||
import sys, os, re
|
||||
|
||||
from lxml import html
|
||||
from lxml.html.builder import (
|
||||
@ -14,7 +14,7 @@ from lxml.html.builder import (
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES
|
||||
from calibre.ebooks.docx.styles import Styles
|
||||
from calibre.ebooks.docx.styles import Styles, inherit
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
class Text:
|
||||
@ -35,6 +35,7 @@ class Convert(object):
|
||||
self.mi = self.docx.metadata
|
||||
self.body = BODY()
|
||||
self.styles = Styles()
|
||||
self.object_map = {}
|
||||
self.html = HTML(
|
||||
HEAD(
|
||||
META(charset='utf-8'),
|
||||
@ -75,6 +76,16 @@ class Convert(object):
|
||||
for child in self.body:
|
||||
child.tail = '\n\t'
|
||||
self.body[-1].tail = '\n'
|
||||
|
||||
self.styles.generate_classes()
|
||||
for obj, html_obj in self.object_map.iteritems():
|
||||
style = self.styles.resolve(obj)
|
||||
if style is not None:
|
||||
css = style.css
|
||||
if css:
|
||||
cls = self.styles.class_name(css)
|
||||
if cls:
|
||||
html_obj.set('class', cls)
|
||||
self.write()
|
||||
|
||||
def read_styles(self, relationships_by_type):
|
||||
@ -96,6 +107,10 @@ class Convert(object):
|
||||
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||
f.write(raw)
|
||||
css = self.styles.generate_css()
|
||||
if css:
|
||||
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
|
||||
f.write(css.encode('utf-8'))
|
||||
|
||||
def convert_p(self, p):
|
||||
dest = P()
|
||||
@ -103,10 +118,58 @@ class Convert(object):
|
||||
span = self.convert_run(run)
|
||||
dest.append(span)
|
||||
|
||||
style = self.styles.resolve_paragraph(p)
|
||||
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
|
||||
if m is not None:
|
||||
n = min(1, max(6, int(m.group(1))))
|
||||
dest.tag = 'h%d' % n
|
||||
|
||||
if style.direction == 'rtl':
|
||||
dest.set('dir', 'rtl')
|
||||
|
||||
border_runs = []
|
||||
common_borders = []
|
||||
for span in dest:
|
||||
run = self.object_map[span]
|
||||
style = self.styles.resolve_run(run)
|
||||
if not border_runs or border_runs[-1][1].same_border(style):
|
||||
border_runs.append((span, style))
|
||||
elif border_runs:
|
||||
if len(border_runs) > 1:
|
||||
common_borders.append(border_runs)
|
||||
border_runs = []
|
||||
|
||||
for border_run in common_borders:
|
||||
spans = []
|
||||
bs = {}
|
||||
for span, style in border_run:
|
||||
c = style.css
|
||||
spans.append(span)
|
||||
for x in ('width', 'color', 'style'):
|
||||
val = c.pop('border-%s' % x, None)
|
||||
if val is not None:
|
||||
bs['border-%s' % x] = val
|
||||
if bs:
|
||||
cls = self.styles.register(bs, 'text_border')
|
||||
wrapper = self.wrap_elems(spans, SPAN())
|
||||
wrapper.set('class', cls)
|
||||
|
||||
self.object_map[p] = dest
|
||||
return dest
|
||||
|
||||
def wrap_elems(self, elems, wrapper):
|
||||
p = elems[0].getparent()
|
||||
idx = p.index(elems[0])
|
||||
p.insert(idx, wrapper)
|
||||
wrapper.tail = elems[-1].tail
|
||||
elems[-1].tail = None
|
||||
for elem in elems:
|
||||
p.remove(elem)
|
||||
wrapper.append(elem)
|
||||
|
||||
def convert_run(self, run):
|
||||
ans = SPAN()
|
||||
ans.run = run
|
||||
text = Text(ans, 'text', [])
|
||||
|
||||
for child in run:
|
||||
@ -121,6 +184,7 @@ class Convert(object):
|
||||
text.buf.append(child.text)
|
||||
elif is_tag(child, 'w:cr'):
|
||||
text.add_elem(BR())
|
||||
ans.append(text.elem)
|
||||
elif is_tag(child, 'w:br'):
|
||||
typ = child.get('type', None)
|
||||
if typ in {'column', 'page'}:
|
||||
@ -132,8 +196,16 @@ class Convert(object):
|
||||
else:
|
||||
br = BR()
|
||||
text.add_elem(br)
|
||||
ans.append(text.elem)
|
||||
if text.buf:
|
||||
setattr(text.elem, text.attr, ''.join(text.buf))
|
||||
|
||||
style = self.styles.resolve_run(run)
|
||||
if style.vert_align in {'superscript', 'subscript'}:
|
||||
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
||||
if style.lang is not inherit:
|
||||
ans.lang = style.lang
|
||||
self.object_map[ans] = run
|
||||
return ans
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user