mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-07 09:01:38 -04:00
DOCX Input: Start work on tables
This commit is contained in:
parent
5887243d56
commit
6eb97d2626
@ -65,35 +65,41 @@ LINE_STYLES = { # {{{
|
||||
} # }}}
|
||||
|
||||
# Read from XML {{{
|
||||
def read_border(parent, dest):
|
||||
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
||||
'border_%s_style':inherit, 'border_%s_color':inherit}
|
||||
vals = {}
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
vals.update({k % edge:v for k, v in tvals.iteritems()})
|
||||
|
||||
for border in XPath('./w:pBdr')(parent):
|
||||
for edge in ('left', 'top', 'right', 'bottom'):
|
||||
for elem in XPath('./w:%s' % edge)(border):
|
||||
color = get(elem, 'w:color')
|
||||
if color is not None:
|
||||
vals['border_%s_color' % edge] = simple_color(color)
|
||||
style = get(elem, 'w:val')
|
||||
if style is not None:
|
||||
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
vals['padding_%s' % edge] = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||
|
||||
def read_single_border(parent, edge):
|
||||
color = style = width = padding = None
|
||||
for elem in XPath('./w:%s' % edge)(parent):
|
||||
c = get(elem, 'w:color')
|
||||
if c is not None:
|
||||
color = simple_color(c)
|
||||
s = get(elem, 'w:val')
|
||||
if s is not None:
|
||||
style = LINE_STYLES.get(s, 'solid')
|
||||
space = get(elem, 'w:space')
|
||||
if space is not None:
|
||||
try:
|
||||
padding = float(space)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
sz = get(elem, 'w:sz')
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
width = min(96, max(2, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return {p:v for p, v in zip(border_props, (padding, width, style, color))}
|
||||
|
||||
def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'):
|
||||
vals = {k % edge:inherit for edge in border_edges for k in border_props}
|
||||
|
||||
for border in XPath('./w:' + name)(parent):
|
||||
for edge in border_edges:
|
||||
for prop, val in read_single_border(border, edge).iteritems():
|
||||
if val is not None:
|
||||
vals[prop % edge] = val
|
||||
|
||||
for key, val in vals.iteritems():
|
||||
setattr(dest, key, val)
|
||||
|
@ -17,7 +17,7 @@ class Note(object):
|
||||
self.parent = parent
|
||||
|
||||
def __iter__(self):
|
||||
for p in descendants(self.parent, 'w:p'):
|
||||
for p in descendants(self.parent, 'w:p', 'w:tbl'):
|
||||
yield p
|
||||
|
||||
class Footnotes(object):
|
||||
|
@ -11,6 +11,7 @@ from collections import OrderedDict, Counter
|
||||
|
||||
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
|
||||
from calibre.ebooks.docx.char_styles import RunStyle
|
||||
from calibre.ebooks.docx.tables import TableStyle
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
class PageProperties(object):
|
||||
@ -66,10 +67,17 @@ class Style(object):
|
||||
self.based_on = None
|
||||
self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
|
||||
|
||||
self.paragraph_style = self.character_style = None
|
||||
self.paragraph_style = self.character_style = self.table_style = None
|
||||
|
||||
if self.style_type in {'paragraph', 'character'}:
|
||||
if self.style_type == 'paragraph':
|
||||
if self.style_type in {'paragraph', 'character', 'table'}:
|
||||
if self.style_type == 'table':
|
||||
for tblPr in XPath('./w:tblPr')(elem):
|
||||
ts = TableStyle(tblPr)
|
||||
if self.table_style is None:
|
||||
self.table_style = ts
|
||||
else:
|
||||
self.table_style.update(ts)
|
||||
if self.style_type in {'paragraph', 'table'}:
|
||||
for pPr in XPath('./w:pPr')(elem):
|
||||
ps = ParagraphStyle(pPr)
|
||||
if self.paragraph_style is None:
|
||||
@ -90,6 +98,10 @@ class Style(object):
|
||||
self.numbering_style_link = get(x, 'w:val')
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
if parent.table_style is not None:
|
||||
if self.table_style is None:
|
||||
self.table_style = TableStyle()
|
||||
self.table_style.resolve_based_on(parent.table_style)
|
||||
if parent.paragraph_style is not None:
|
||||
if self.paragraph_style is None:
|
||||
self.paragraph_style = ParagraphStyle()
|
||||
|
152
src/calibre/ebooks/docx/tables.py
Normal file
152
src/calibre/ebooks/docx/tables.py
Normal file
@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from lxml.html.builder import TABLE, TR, TD
|
||||
|
||||
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border ,border_props # noqa
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
def _read_width(elem):
|
||||
ans = inherit
|
||||
try:
|
||||
w = int(get(elem, 'w:w'))
|
||||
except (TypeError, ValueError):
|
||||
w = 0
|
||||
typ = get(elem, 'w:type', 'auto')
|
||||
if typ == 'nil':
|
||||
ans = '0'
|
||||
elif typ == 'auto':
|
||||
ans = 'auto'
|
||||
elif typ == 'dxa':
|
||||
ans = '%.3gpt' % (w/20)
|
||||
elif typ == 'pct':
|
||||
ans = '%.3g%%' % (w/50)
|
||||
return ans
|
||||
|
||||
def read_width(parent, dest):
|
||||
ans = inherit
|
||||
for tblW in XPath('./w:tblW')(parent):
|
||||
ans = _read_width(tblW)
|
||||
setattr(dest, 'width', ans)
|
||||
|
||||
def read_padding(parent, dest):
|
||||
name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar'
|
||||
left = top = bottom = right = inherit
|
||||
for mar in XPath('./w:%s' % name)(parent):
|
||||
for x in ('left', 'top', 'right', 'bottom'):
|
||||
for edge in XPath('./w:%s' % x)(mar):
|
||||
locals()[x] = _read_width(edge)
|
||||
for x in ('left', 'top', 'right', 'bottom'):
|
||||
setattr(dest, 'cell_padding_%s' % x, locals()[x])
|
||||
|
||||
def read_justification(parent, dest):
|
||||
left = right = inherit
|
||||
for jc in XPath('./w:jc[@w:val]')(parent):
|
||||
val = get(jc, 'w:val')
|
||||
if not val:
|
||||
continue
|
||||
if val == 'left':
|
||||
right = 'auto'
|
||||
elif val == 'right':
|
||||
left = 'auto'
|
||||
elif val == 'center':
|
||||
left = right = 'auto'
|
||||
setattr(dest, 'margin_left', left)
|
||||
setattr(dest, 'margin_right', right)
|
||||
|
||||
def read_spacing(parent, dest):
|
||||
ans = inherit
|
||||
for cs in XPath('./w:tblCellSpacing')(parent):
|
||||
ans = _read_width(cs)
|
||||
setattr(dest, 'spacing', ans)
|
||||
|
||||
def read_indent(parent, dest):
|
||||
ans = inherit
|
||||
for cs in XPath('./w:tblInd')(parent):
|
||||
ans = _read_width(cs)
|
||||
setattr(dest, 'indent', ans)
|
||||
|
||||
border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV')
|
||||
|
||||
def read_borders(parent, dest):
|
||||
name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders'
|
||||
read_border(parent, dest, border_edges, name)
|
||||
|
||||
class TableStyle(object):
|
||||
|
||||
all_properties = (
|
||||
'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||
'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color',
|
||||
'spacing', 'indent',
|
||||
) + tuple(k % edge for edge in border_edges for k in border_props)
|
||||
|
||||
def __init__(self, tblPr=None):
|
||||
if tblPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(tblPr, self)
|
||||
|
||||
self._css = None
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
for p in self.all_properties:
|
||||
val = getattr(self, p)
|
||||
if val is inherit:
|
||||
setattr(self, p, getattr(parent, p))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
return self._css
|
||||
|
||||
class Tables(object):
|
||||
|
||||
def __init__(self):
|
||||
self.tables = OrderedDict()
|
||||
|
||||
def register(self, tbl):
|
||||
self.tables[tbl] = self.current_table = []
|
||||
|
||||
def add(self, p):
|
||||
self.current_table.append(p)
|
||||
|
||||
def apply_markup(self, object_map):
|
||||
rmap = {v:k for k, v in object_map.iteritems()}
|
||||
for tbl, blocks in self.tables.iteritems():
|
||||
if not blocks:
|
||||
continue
|
||||
parent = rmap[blocks[0]].getparent()
|
||||
table = TABLE('\n\t\t')
|
||||
idx = parent.index(rmap[blocks[0]])
|
||||
parent.insert(idx, table)
|
||||
for row in XPath('./w:tr')(tbl):
|
||||
tr = TR('\n\t\t\t')
|
||||
tr.tail = '\n\t\t'
|
||||
table.append(tr)
|
||||
for tc in XPath('./w:tc')(row):
|
||||
td = TD()
|
||||
td.tail = '\n\t\t\t'
|
||||
tr.append(td)
|
||||
for p in XPath('./w:p')(tc):
|
||||
block = rmap[p]
|
||||
td.append(block)
|
||||
if len(tr):
|
||||
tr[-1].tail = '\n\t\t'
|
||||
if len(table):
|
||||
table[-1].tail = '\n\t'
|
||||
|
@ -21,6 +21,7 @@ from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||
from calibre.ebooks.docx.numbering import Numbering
|
||||
from calibre.ebooks.docx.fonts import Fonts
|
||||
from calibre.ebooks.docx.images import Images
|
||||
from calibre.ebooks.docx.tables import Tables
|
||||
from calibre.ebooks.docx.footnotes import Footnotes
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
@ -47,6 +48,7 @@ class Convert(object):
|
||||
self.body = BODY()
|
||||
self.styles = Styles()
|
||||
self.images = Images()
|
||||
self.tables = Tables()
|
||||
self.object_map = OrderedDict()
|
||||
self.html = HTML(
|
||||
HEAD(
|
||||
@ -98,15 +100,26 @@ class Convert(object):
|
||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
||||
dl[-1][0].tail = ']'
|
||||
dl.append(DD())
|
||||
in_table = False
|
||||
for wp in note:
|
||||
if wp.tag.endswith('}tbl'):
|
||||
self.tables.register(wp)
|
||||
in_table = True
|
||||
continue
|
||||
if in_table:
|
||||
if ancestor(wp, 'w:tbl') is not None:
|
||||
self.tables.add(wp)
|
||||
else:
|
||||
in_table = False
|
||||
p = self.convert_p(wp)
|
||||
dl[-1].append(p)
|
||||
|
||||
self.resolve_links(relationships_by_id)
|
||||
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
|
||||
|
||||
self.styles.cascade(self.layers)
|
||||
|
||||
self.tables.apply_markup(self.object_map)
|
||||
|
||||
numbered = []
|
||||
for html_obj, obj in self.object_map.iteritems():
|
||||
raw = obj.get('calibre_num_id', None)
|
||||
@ -154,7 +167,13 @@ class Convert(object):
|
||||
current = []
|
||||
self.page_map = OrderedDict()
|
||||
|
||||
for p in descendants(doc, 'w:p'):
|
||||
in_table = False
|
||||
|
||||
for p in descendants(doc, 'w:p', 'w:tbl'):
|
||||
if p.tag.endswith('}tbl'):
|
||||
in_table = True
|
||||
self.tables.register(p)
|
||||
continue
|
||||
sect = tuple(descendants(p, 'w:sectPr'))
|
||||
if sect:
|
||||
pr = PageProperties(sect)
|
||||
@ -163,6 +182,11 @@ class Convert(object):
|
||||
current = []
|
||||
else:
|
||||
current.append(p)
|
||||
if in_table:
|
||||
if ancestor(p, 'w:tbl') is not None:
|
||||
self.tables.add(p)
|
||||
else:
|
||||
in_table = False
|
||||
if current:
|
||||
last = XPath('./w:body/w:sectPr')(doc)
|
||||
pr = PageProperties(last)
|
||||
|
Loading…
x
Reference in New Issue
Block a user