DOCX: frames

This commit is contained in:
Kovid Goyal 2013-05-21 16:52:06 +05:30
parent 830d797f7a
commit 03df8de173
2 changed files with 123 additions and 8 deletions

View File

@ -151,8 +151,8 @@ def read_spacing(parent, dest):
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
if l is not None:
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')
setattr(dest, 'margin_top', padding_top)
setattr(dest, 'margin_bottom', padding_bottom)
@ -189,6 +189,89 @@ def read_numbering(parent, dest):
val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
setattr(dest, 'numbering', val)
class Frame(object):
all_attributes = ('drop_cap', 'h', 'w', 'h_anchor', 'h_rule', 'v_anchor', 'wrap',
'h_space', 'v_space', 'lines', 'x_align', 'y_align', 'x', 'y')
def __init__(self, fp):
self.drop_cap = get(fp, 'w:dropCap', 'none')
try:
self.h = int(get(fp, 'w:h'))/20
except (ValueError, TypeError):
self.h = 0
try:
self.w = int(get(fp, 'w:w'))/20
except (ValueError, TypeError):
self.w = None
try:
self.x = int(get(fp, 'w:x'))/20
except (ValueError, TypeError):
self.x = 0
try:
self.y = int(get(fp, 'w:y'))/20
except (ValueError, TypeError):
self.y = 0
self.h_anchor = get(fp, 'w:hAnchor', 'page')
self.h_rule = get(fp, 'w:hRule', 'auto')
self.v_anchor = get(fp, 'w:vAnchor', 'page')
self.wrap = get(fp, 'w:wrap', 'around')
self.x_align = get(fp, 'w:xAlign')
self.y_align = get(fp, 'w:yAlign')
try:
self.h_space = int(get(fp, 'w:hSpace'))/20
except (ValueError, TypeError):
self.h_space = 0
try:
self.v_space = int(get(fp, 'w:vSpace'))/20
except (ValueError, TypeError):
self.v_space = 0
try:
self.lines = int(get(fp, 'w:lines'))
except (ValueError, TypeError):
self.lines = 1
def css(self, page):
is_dropcap = self.drop_cap in {'drop', 'margin'}
ans = {'overflow': 'hidden'}
if is_dropcap:
ans['float'] = 'left'
ans['margin'] = '0'
ans['padding-right'] = '0.2em'
else:
if self.h_rule != 'auto':
t = 'min-height' if self.h_rule == 'atLeast' else 'height'
ans[t] = '%.3gpt' % self.h
if self.w is not None:
ans['width'] = '%.3gpt' % self.w
ans['padding-top'] = ans['padding-bottom'] = '%.3gpt' % self.v_space
if self.wrap not in {None, 'none'}:
ans['padding-left'] = ans['padding-right'] = '%.3gpt' % self.h_space
if self.x_align is None:
fl = 'left' if self.x/page.width < 0.5 else 'right'
else:
fl = 'right' if self.x_align == 'right' else 'left'
ans['float'] = fl
return ans
def __eq__(self, other):
for x in self.all_attributes:
if getattr(other, x, inherit) != getattr(self, x):
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def read_frame(parent, dest):
ans = inherit
for fp in XPath('./w:framePr')(parent):
ans = Frame(fp)
setattr(dest, 'frame', ans)
# }}}
class ParagraphStyle(object):
@ -208,7 +291,7 @@ class ParagraphStyle(object):
# Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
'numbering', 'font_family', 'font_size',
'numbering', 'font_family', 'font_size', 'frame',
)
def __init__(self, pPr=None):
@ -225,7 +308,7 @@ class ParagraphStyle(object):
):
setattr(self, p, binary_property(pPr, p))
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'):
f = globals()['read_%s' % x]
f(pPr, self)
@ -286,5 +369,3 @@ class ParagraphStyle(object):
return self._css
# TODO: keepNext must be done at markup level

View File

@ -11,7 +11,7 @@ from collections import OrderedDict
from lxml import html
from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV)
from calibre.ebooks.docx.container import DOCX, fromstring
from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
@ -68,6 +68,8 @@ class Convert(object):
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
self.framed = [[]]
self.framed_map = {}
self.read_page_properties(doc)
for wp, page_properties in self.page_map.iteritems():
@ -75,7 +77,6 @@ class Convert(object):
p = self.convert_p(wp)
self.body.append(p)
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
# TODO: Last section properties <w:sectPr> child of <w:body>
self.styles.cascade(self.layers)
@ -90,6 +91,7 @@ class Convert(object):
lvl = 0
numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
self.apply_frames()
if len(self.body) > 0:
self.body.text = '\n\t'
@ -106,6 +108,11 @@ class Convert(object):
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
for html_obj, css in self.framed_map.iteritems():
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
self.write()
def read_page_properties(self, doc):
@ -185,6 +192,7 @@ class Convert(object):
self.object_map[dest] = p
style = self.styles.resolve_paragraph(p)
self.layers[p] = []
self.add_frame(dest, style.frame)
for run in XPath('descendant::w:r')(p):
span = self.convert_run(run)
dest.append(span)
@ -278,6 +286,32 @@ class Convert(object):
ans.lang = style.lang
return ans
def add_frame(self, html_obj, style):
last_run = self.framed[-1]
if style is inherit:
if last_run:
self.framed.append([])
return
if last_run:
if last_run[-1][1] == style:
last_run.append((html_obj, style))
else:
self.framed.append((html_obj, style))
else:
last_run.append((html_obj, style))
def apply_frames(self):
for run in filter(None, self.framed):
style = run[0][1]
paras = tuple(x[0] for x in run)
parent = paras[0].getparent()
idx = parent.index(paras[0])
frame = DIV(*paras)
parent.insert(idx, frame)
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
self.styles.register(css, 'frame')
if __name__ == '__main__':
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG