DOCX: Nested tables and block/run table styles

This commit is contained in:
Kovid Goyal 2013-06-02 10:39:02 +05:30
parent d873be3ca8
commit ca0899956d
3 changed files with 94 additions and 51 deletions

View File

@ -118,7 +118,7 @@ class Styles(object):
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
'''
def __init__(self):
def __init__(self, tables):
self.id_map = OrderedDict()
self.para_cache = {}
self.para_char_cache = {}
@ -126,6 +126,7 @@ class Styles(object):
self.classes = {}
self.counter = Counter()
self.default_styles = {}
self.tables = tables
self.numbering_style_links = {}
def __iter__(self):
@ -226,6 +227,9 @@ class Styles(object):
parent_styles = []
if self.default_paragraph_style is not None:
parent_styles.append(self.default_paragraph_style)
ts = self.tables.para_style(p)
if ts is not None:
parent_styles.append(ts)
default_para = self.default_styles.get('paragraph', None)
if direct_formatting.linked_style is not None:
@ -278,6 +282,9 @@ class Styles(object):
default_char = self.default_styles.get('character', None)
if self.default_character_style is not None:
parent_styles.append(self.default_character_style)
ts = self.tables.run_style(p)
if ts is not None:
parent_styles.append(ts)
pstyle = self.para_char_cache.get(p, None)
if pstyle is not None:
parent_styles.append(pstyle)

View File

@ -6,8 +6,6 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from lxml.html.builder import TABLE, TR, TD
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa
@ -212,41 +210,94 @@ class TableStyle(object):
if val is inherit:
setattr(self, p, getattr(parent, p))
class Table(object):
def __init__(self, tbl, styles, para_map):
self.tbl = tbl
self.styles = styles
# Read Table Style
style = {'table':TableStyle()}
for tblPr in XPath('./w:tblPr')(tbl):
for ts in XPath('./w:tblStyle[@w:val]')(tblPr):
style_id = get(ts, 'w:val')
s = styles.get(style_id)
if s is not None:
if s.table_style is not None:
style['table'].update(s.table_style)
if s.paragraph_style is not None:
if 'paragraph' in style:
style['paragraph'].update(s.paragraph_style)
else:
style['paragraph'] = s.paragraph_style
if s.character_style is not None:
if 'run' in style:
style['run'].update(s.character_style)
else:
style['run'] = s.character_style
style['table'].update(TableStyle(tblPr))
self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
self.run_style = style.get('run', None)
self.paragraphs = XPath('./w:tr/w:tc/w:p')(tbl)
self.sub_tables = {x:Table(x, styles, para_map) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)}
para_map.update({p:self for p in self.paragraphs})
def __iter__(self):
for p in self.paragraphs:
yield p
for t in self.sub_tables.itervalues():
for p in t:
yield p
def apply_markup(self, rmap, parent=None):
table = TABLE('\n\t\t')
if parent is None:
try:
first_para = rmap[next(iter(self))]
except StopIteration:
return
parent = first_para.getparent()
idx = parent.index(first_para)
parent.insert(idx, table)
else:
parent.append(table)
for row in XPath('./w:tr')(self.tbl):
tr = TR('\n\t\t\t')
tr.tail = '\n\t\t'
table.append(tr)
for tc in XPath('./w:tc')(row):
td = TD()
td.tail = '\n\t\t\t'
tr.append(td)
for x in XPath('./w:p|./w:tbl')(tc):
if x.tag.endswith('}p'):
td.append(rmap[x])
else:
self.sub_tables[x].apply_markup(rmap, parent=td)
if len(tr):
tr[-1].tail = '\n\t\t'
if len(table):
table[-1].tail = '\n\t'
class Tables(object):
def __init__(self):
self.tables = OrderedDict()
self.tables = []
self.para_map = {}
def register(self, tbl):
self.tables[tbl] = self.current_table = []
def add(self, p):
self.current_table.append(p)
def register(self, tbl, styles):
self.tables.append(Table(tbl, styles, self.para_map))
def apply_markup(self, object_map):
rmap = {v:k for k, v in object_map.iteritems()}
for tbl, blocks in self.tables.iteritems():
if not blocks:
continue
parent = rmap[blocks[0]].getparent()
table = TABLE('\n\t\t')
idx = parent.index(rmap[blocks[0]])
parent.insert(idx, table)
for row in XPath('./w:tr')(tbl):
tr = TR('\n\t\t\t')
tr.tail = '\n\t\t'
table.append(tr)
for tc in XPath('./w:tc')(row):
td = TD()
td.tail = '\n\t\t\t'
tr.append(td)
for p in XPath('./w:p')(tc):
block = rmap[p]
td.append(block)
if len(tr):
tr[-1].tail = '\n\t\t'
if len(table):
table[-1].tail = '\n\t'
for table in self.tables:
table.apply_markup(rmap)
def para_style(self, p):
return getattr(self.para_map.get(p, None), 'paragraph_style', None)
def run_style(self, p):
return getattr(self.para_map.get(p, None), 'run_style', None)

View File

@ -46,9 +46,9 @@ class Convert(object):
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.styles = Styles()
self.images = Images()
self.tables = Tables()
self.styles = Styles(self.tables)
self.images = Images()
self.object_map = OrderedDict()
self.html = HTML(
HEAD(
@ -100,17 +100,9 @@ class Convert(object):
dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']'
dl.append(DD())
in_table = False
for wp in note:
if wp.tag.endswith('}tbl'):
self.tables.register(wp)
in_table = True
continue
if in_table:
if ancestor(wp, 'w:tbl') is not None:
self.tables.add(wp)
else:
in_table = False
self.tables.register(wp, self.styles)
p = self.convert_p(wp)
dl[-1].append(p)
@ -167,12 +159,9 @@ class Convert(object):
current = []
self.page_map = OrderedDict()
in_table = False
for p in descendants(doc, 'w:p', 'w:tbl'):
if p.tag.endswith('}tbl'):
in_table = True
self.tables.register(p)
self.tables.register(p, self.styles)
continue
sect = tuple(descendants(p, 'w:sectPr'))
if sect:
@ -182,11 +171,7 @@ class Convert(object):
current = []
else:
current.append(p)
if in_table:
if ancestor(p, 'w:tbl') is not None:
self.tables.add(p)
else:
in_table = False
if current:
last = XPath('./w:body/w:sectPr')(doc)
pr = PageProperties(last)