mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
DOCX Input: Start work on tables
This commit is contained in:
parent
5887243d56
commit
6eb97d2626
@ -65,35 +65,41 @@ LINE_STYLES = { # {{{
|
|||||||
} # }}}
|
} # }}}
|
||||||
|
|
||||||
# Read from XML {{{
|
# Read from XML {{{
|
||||||
def read_border(parent, dest):
|
|
||||||
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
|
||||||
'border_%s_style':inherit, 'border_%s_color':inherit}
|
|
||||||
vals = {}
|
|
||||||
for edge in ('left', 'top', 'right', 'bottom'):
|
|
||||||
vals.update({k % edge:v for k, v in tvals.iteritems()})
|
|
||||||
|
|
||||||
for border in XPath('./w:pBdr')(parent):
|
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||||
for edge in ('left', 'top', 'right', 'bottom'):
|
|
||||||
for elem in XPath('./w:%s' % edge)(border):
|
def read_single_border(parent, edge):
|
||||||
color = get(elem, 'w:color')
|
color = style = width = padding = None
|
||||||
if color is not None:
|
for elem in XPath('./w:%s' % edge)(parent):
|
||||||
vals['border_%s_color' % edge] = simple_color(color)
|
c = get(elem, 'w:color')
|
||||||
style = get(elem, 'w:val')
|
if c is not None:
|
||||||
if style is not None:
|
color = simple_color(c)
|
||||||
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
|
s = get(elem, 'w:val')
|
||||||
space = get(elem, 'w:space')
|
if s is not None:
|
||||||
if space is not None:
|
style = LINE_STYLES.get(s, 'solid')
|
||||||
try:
|
space = get(elem, 'w:space')
|
||||||
vals['padding_%s' % edge] = float(space)
|
if space is not None:
|
||||||
except (ValueError, TypeError):
|
try:
|
||||||
pass
|
padding = float(space)
|
||||||
sz = get(elem, 'w:sz')
|
except (ValueError, TypeError):
|
||||||
if sz is not None:
|
pass
|
||||||
# we dont care about art borders (they are only used for page borders)
|
sz = get(elem, 'w:sz')
|
||||||
try:
|
if sz is not None:
|
||||||
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
|
# we dont care about art borders (they are only used for page borders)
|
||||||
except (ValueError, TypeError):
|
try:
|
||||||
pass
|
width = min(96, max(2, float(sz))) / 8
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
return {p:v for p, v in zip(border_props, (padding, width, style, color))}
|
||||||
|
|
||||||
|
def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'):
|
||||||
|
vals = {k % edge:inherit for edge in border_edges for k in border_props}
|
||||||
|
|
||||||
|
for border in XPath('./w:' + name)(parent):
|
||||||
|
for edge in border_edges:
|
||||||
|
for prop, val in read_single_border(border, edge).iteritems():
|
||||||
|
if val is not None:
|
||||||
|
vals[prop % edge] = val
|
||||||
|
|
||||||
for key, val in vals.iteritems():
|
for key, val in vals.iteritems():
|
||||||
setattr(dest, key, val)
|
setattr(dest, key, val)
|
||||||
|
@ -17,7 +17,7 @@ class Note(object):
|
|||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for p in descendants(self.parent, 'w:p'):
|
for p in descendants(self.parent, 'w:p', 'w:tbl'):
|
||||||
yield p
|
yield p
|
||||||
|
|
||||||
class Footnotes(object):
|
class Footnotes(object):
|
||||||
|
@ -11,6 +11,7 @@ from collections import OrderedDict, Counter
|
|||||||
|
|
||||||
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
|
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
|
||||||
from calibre.ebooks.docx.char_styles import RunStyle
|
from calibre.ebooks.docx.char_styles import RunStyle
|
||||||
|
from calibre.ebooks.docx.tables import TableStyle
|
||||||
from calibre.ebooks.docx.names import XPath, get
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
|
|
||||||
class PageProperties(object):
|
class PageProperties(object):
|
||||||
@ -66,10 +67,17 @@ class Style(object):
|
|||||||
self.based_on = None
|
self.based_on = None
|
||||||
self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
|
self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
|
||||||
|
|
||||||
self.paragraph_style = self.character_style = None
|
self.paragraph_style = self.character_style = self.table_style = None
|
||||||
|
|
||||||
if self.style_type in {'paragraph', 'character'}:
|
if self.style_type in {'paragraph', 'character', 'table'}:
|
||||||
if self.style_type == 'paragraph':
|
if self.style_type == 'table':
|
||||||
|
for tblPr in XPath('./w:tblPr')(elem):
|
||||||
|
ts = TableStyle(tblPr)
|
||||||
|
if self.table_style is None:
|
||||||
|
self.table_style = ts
|
||||||
|
else:
|
||||||
|
self.table_style.update(ts)
|
||||||
|
if self.style_type in {'paragraph', 'table'}:
|
||||||
for pPr in XPath('./w:pPr')(elem):
|
for pPr in XPath('./w:pPr')(elem):
|
||||||
ps = ParagraphStyle(pPr)
|
ps = ParagraphStyle(pPr)
|
||||||
if self.paragraph_style is None:
|
if self.paragraph_style is None:
|
||||||
@ -90,6 +98,10 @@ class Style(object):
|
|||||||
self.numbering_style_link = get(x, 'w:val')
|
self.numbering_style_link = get(x, 'w:val')
|
||||||
|
|
||||||
def resolve_based_on(self, parent):
|
def resolve_based_on(self, parent):
|
||||||
|
if parent.table_style is not None:
|
||||||
|
if self.table_style is None:
|
||||||
|
self.table_style = TableStyle()
|
||||||
|
self.table_style.resolve_based_on(parent.table_style)
|
||||||
if parent.paragraph_style is not None:
|
if parent.paragraph_style is not None:
|
||||||
if self.paragraph_style is None:
|
if self.paragraph_style is None:
|
||||||
self.paragraph_style = ParagraphStyle()
|
self.paragraph_style = ParagraphStyle()
|
||||||
|
152
src/calibre/ebooks/docx/tables.py
Normal file
152
src/calibre/ebooks/docx/tables.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from lxml.html.builder import TABLE, TR, TD
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border ,border_props # noqa
|
||||||
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
|
|
||||||
|
def _read_width(elem):
|
||||||
|
ans = inherit
|
||||||
|
try:
|
||||||
|
w = int(get(elem, 'w:w'))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
w = 0
|
||||||
|
typ = get(elem, 'w:type', 'auto')
|
||||||
|
if typ == 'nil':
|
||||||
|
ans = '0'
|
||||||
|
elif typ == 'auto':
|
||||||
|
ans = 'auto'
|
||||||
|
elif typ == 'dxa':
|
||||||
|
ans = '%.3gpt' % (w/20)
|
||||||
|
elif typ == 'pct':
|
||||||
|
ans = '%.3g%%' % (w/50)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def read_width(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for tblW in XPath('./w:tblW')(parent):
|
||||||
|
ans = _read_width(tblW)
|
||||||
|
setattr(dest, 'width', ans)
|
||||||
|
|
||||||
|
def read_padding(parent, dest):
|
||||||
|
name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar'
|
||||||
|
left = top = bottom = right = inherit
|
||||||
|
for mar in XPath('./w:%s' % name)(parent):
|
||||||
|
for x in ('left', 'top', 'right', 'bottom'):
|
||||||
|
for edge in XPath('./w:%s' % x)(mar):
|
||||||
|
locals()[x] = _read_width(edge)
|
||||||
|
for x in ('left', 'top', 'right', 'bottom'):
|
||||||
|
setattr(dest, 'cell_padding_%s' % x, locals()[x])
|
||||||
|
|
||||||
|
def read_justification(parent, dest):
|
||||||
|
left = right = inherit
|
||||||
|
for jc in XPath('./w:jc[@w:val]')(parent):
|
||||||
|
val = get(jc, 'w:val')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
if val == 'left':
|
||||||
|
right = 'auto'
|
||||||
|
elif val == 'right':
|
||||||
|
left = 'auto'
|
||||||
|
elif val == 'center':
|
||||||
|
left = right = 'auto'
|
||||||
|
setattr(dest, 'margin_left', left)
|
||||||
|
setattr(dest, 'margin_right', right)
|
||||||
|
|
||||||
|
def read_spacing(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for cs in XPath('./w:tblCellSpacing')(parent):
|
||||||
|
ans = _read_width(cs)
|
||||||
|
setattr(dest, 'spacing', ans)
|
||||||
|
|
||||||
|
def read_indent(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for cs in XPath('./w:tblInd')(parent):
|
||||||
|
ans = _read_width(cs)
|
||||||
|
setattr(dest, 'indent', ans)
|
||||||
|
|
||||||
|
border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV')
|
||||||
|
|
||||||
|
def read_borders(parent, dest):
|
||||||
|
name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders'
|
||||||
|
read_border(parent, dest, border_edges, name)
|
||||||
|
|
||||||
|
class TableStyle(object):
|
||||||
|
|
||||||
|
all_properties = (
|
||||||
|
'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||||
|
'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color',
|
||||||
|
'spacing', 'indent',
|
||||||
|
) + tuple(k % edge for edge in border_edges for k in border_props)
|
||||||
|
|
||||||
|
def __init__(self, tblPr=None):
|
||||||
|
if tblPr is None:
|
||||||
|
for p in self.all_properties:
|
||||||
|
setattr(self, p, inherit)
|
||||||
|
else:
|
||||||
|
for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders'):
|
||||||
|
f = globals()['read_%s' % x]
|
||||||
|
f(tblPr, self)
|
||||||
|
|
||||||
|
self._css = None
|
||||||
|
|
||||||
|
def update(self, other):
|
||||||
|
for prop in self.all_properties:
|
||||||
|
nval = getattr(other, prop)
|
||||||
|
if nval is not inherit:
|
||||||
|
setattr(self, prop, nval)
|
||||||
|
|
||||||
|
def resolve_based_on(self, parent):
|
||||||
|
for p in self.all_properties:
|
||||||
|
val = getattr(self, p)
|
||||||
|
if val is inherit:
|
||||||
|
setattr(self, p, getattr(parent, p))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def css(self):
|
||||||
|
return self._css
|
||||||
|
|
||||||
|
class Tables(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.tables = OrderedDict()
|
||||||
|
|
||||||
|
def register(self, tbl):
|
||||||
|
self.tables[tbl] = self.current_table = []
|
||||||
|
|
||||||
|
def add(self, p):
|
||||||
|
self.current_table.append(p)
|
||||||
|
|
||||||
|
def apply_markup(self, object_map):
|
||||||
|
rmap = {v:k for k, v in object_map.iteritems()}
|
||||||
|
for tbl, blocks in self.tables.iteritems():
|
||||||
|
if not blocks:
|
||||||
|
continue
|
||||||
|
parent = rmap[blocks[0]].getparent()
|
||||||
|
table = TABLE('\n\t\t')
|
||||||
|
idx = parent.index(rmap[blocks[0]])
|
||||||
|
parent.insert(idx, table)
|
||||||
|
for row in XPath('./w:tr')(tbl):
|
||||||
|
tr = TR('\n\t\t\t')
|
||||||
|
tr.tail = '\n\t\t'
|
||||||
|
table.append(tr)
|
||||||
|
for tc in XPath('./w:tc')(row):
|
||||||
|
td = TD()
|
||||||
|
td.tail = '\n\t\t\t'
|
||||||
|
tr.append(td)
|
||||||
|
for p in XPath('./w:p')(tc):
|
||||||
|
block = rmap[p]
|
||||||
|
td.append(block)
|
||||||
|
if len(tr):
|
||||||
|
tr[-1].tail = '\n\t\t'
|
||||||
|
if len(table):
|
||||||
|
table[-1].tail = '\n\t'
|
||||||
|
|
@ -21,6 +21,7 @@ from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
|||||||
from calibre.ebooks.docx.numbering import Numbering
|
from calibre.ebooks.docx.numbering import Numbering
|
||||||
from calibre.ebooks.docx.fonts import Fonts
|
from calibre.ebooks.docx.fonts import Fonts
|
||||||
from calibre.ebooks.docx.images import Images
|
from calibre.ebooks.docx.images import Images
|
||||||
|
from calibre.ebooks.docx.tables import Tables
|
||||||
from calibre.ebooks.docx.footnotes import Footnotes
|
from calibre.ebooks.docx.footnotes import Footnotes
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
@ -47,6 +48,7 @@ class Convert(object):
|
|||||||
self.body = BODY()
|
self.body = BODY()
|
||||||
self.styles = Styles()
|
self.styles = Styles()
|
||||||
self.images = Images()
|
self.images = Images()
|
||||||
|
self.tables = Tables()
|
||||||
self.object_map = OrderedDict()
|
self.object_map = OrderedDict()
|
||||||
self.html = HTML(
|
self.html = HTML(
|
||||||
HEAD(
|
HEAD(
|
||||||
@ -98,15 +100,26 @@ class Convert(object):
|
|||||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
||||||
dl[-1][0].tail = ']'
|
dl[-1][0].tail = ']'
|
||||||
dl.append(DD())
|
dl.append(DD())
|
||||||
|
in_table = False
|
||||||
for wp in note:
|
for wp in note:
|
||||||
|
if wp.tag.endswith('}tbl'):
|
||||||
|
self.tables.register(wp)
|
||||||
|
in_table = True
|
||||||
|
continue
|
||||||
|
if in_table:
|
||||||
|
if ancestor(wp, 'w:tbl') is not None:
|
||||||
|
self.tables.add(wp)
|
||||||
|
else:
|
||||||
|
in_table = False
|
||||||
p = self.convert_p(wp)
|
p = self.convert_p(wp)
|
||||||
dl[-1].append(p)
|
dl[-1].append(p)
|
||||||
|
|
||||||
self.resolve_links(relationships_by_id)
|
self.resolve_links(relationships_by_id)
|
||||||
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
|
|
||||||
|
|
||||||
self.styles.cascade(self.layers)
|
self.styles.cascade(self.layers)
|
||||||
|
|
||||||
|
self.tables.apply_markup(self.object_map)
|
||||||
|
|
||||||
numbered = []
|
numbered = []
|
||||||
for html_obj, obj in self.object_map.iteritems():
|
for html_obj, obj in self.object_map.iteritems():
|
||||||
raw = obj.get('calibre_num_id', None)
|
raw = obj.get('calibre_num_id', None)
|
||||||
@ -154,7 +167,13 @@ class Convert(object):
|
|||||||
current = []
|
current = []
|
||||||
self.page_map = OrderedDict()
|
self.page_map = OrderedDict()
|
||||||
|
|
||||||
for p in descendants(doc, 'w:p'):
|
in_table = False
|
||||||
|
|
||||||
|
for p in descendants(doc, 'w:p', 'w:tbl'):
|
||||||
|
if p.tag.endswith('}tbl'):
|
||||||
|
in_table = True
|
||||||
|
self.tables.register(p)
|
||||||
|
continue
|
||||||
sect = tuple(descendants(p, 'w:sectPr'))
|
sect = tuple(descendants(p, 'w:sectPr'))
|
||||||
if sect:
|
if sect:
|
||||||
pr = PageProperties(sect)
|
pr = PageProperties(sect)
|
||||||
@ -163,6 +182,11 @@ class Convert(object):
|
|||||||
current = []
|
current = []
|
||||||
else:
|
else:
|
||||||
current.append(p)
|
current.append(p)
|
||||||
|
if in_table:
|
||||||
|
if ancestor(p, 'w:tbl') is not None:
|
||||||
|
self.tables.add(p)
|
||||||
|
else:
|
||||||
|
in_table = False
|
||||||
if current:
|
if current:
|
||||||
last = XPath('./w:body/w:sectPr')(doc)
|
last = XPath('./w:body/w:sectPr')(doc)
|
||||||
pr = PageProperties(last)
|
pr = PageProperties(last)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user