DOCX: Nested tables and block/run table styles

This commit is contained in:
Kovid Goyal 2013-06-02 10:39:02 +05:30
parent d873be3ca8
commit ca0899956d
3 changed files with 94 additions and 51 deletions

View File

@ -118,7 +118,7 @@ class Styles(object):
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup. Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
''' '''
def __init__(self): def __init__(self, tables):
self.id_map = OrderedDict() self.id_map = OrderedDict()
self.para_cache = {} self.para_cache = {}
self.para_char_cache = {} self.para_char_cache = {}
@ -126,6 +126,7 @@ class Styles(object):
self.classes = {} self.classes = {}
self.counter = Counter() self.counter = Counter()
self.default_styles = {} self.default_styles = {}
self.tables = tables
self.numbering_style_links = {} self.numbering_style_links = {}
def __iter__(self): def __iter__(self):
@ -226,6 +227,9 @@ class Styles(object):
parent_styles = [] parent_styles = []
if self.default_paragraph_style is not None: if self.default_paragraph_style is not None:
parent_styles.append(self.default_paragraph_style) parent_styles.append(self.default_paragraph_style)
ts = self.tables.para_style(p)
if ts is not None:
parent_styles.append(ts)
default_para = self.default_styles.get('paragraph', None) default_para = self.default_styles.get('paragraph', None)
if direct_formatting.linked_style is not None: if direct_formatting.linked_style is not None:
@ -278,6 +282,9 @@ class Styles(object):
default_char = self.default_styles.get('character', None) default_char = self.default_styles.get('character', None)
if self.default_character_style is not None: if self.default_character_style is not None:
parent_styles.append(self.default_character_style) parent_styles.append(self.default_character_style)
ts = self.tables.run_style(p)
if ts is not None:
parent_styles.append(ts)
pstyle = self.para_char_cache.get(p, None) pstyle = self.para_char_cache.get(p, None)
if pstyle is not None: if pstyle is not None:
parent_styles.append(pstyle) parent_styles.append(pstyle)

View File

@ -6,8 +6,6 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from lxml.html.builder import TABLE, TR, TD from lxml.html.builder import TABLE, TR, TD
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa
@ -212,28 +210,59 @@ class TableStyle(object):
if val is inherit: if val is inherit:
setattr(self, p, getattr(parent, p)) setattr(self, p, getattr(parent, p))
class Table(object):
class Tables(object): def __init__(self, tbl, styles, para_map):
self.tbl = tbl
self.styles = styles
def __init__(self): # Read Table Style
self.tables = OrderedDict() style = {'table':TableStyle()}
for tblPr in XPath('./w:tblPr')(tbl):
for ts in XPath('./w:tblStyle[@w:val]')(tblPr):
style_id = get(ts, 'w:val')
s = styles.get(style_id)
if s is not None:
if s.table_style is not None:
style['table'].update(s.table_style)
if s.paragraph_style is not None:
if 'paragraph' in style:
style['paragraph'].update(s.paragraph_style)
else:
style['paragraph'] = s.paragraph_style
if s.character_style is not None:
if 'run' in style:
style['run'].update(s.character_style)
else:
style['run'] = s.character_style
style['table'].update(TableStyle(tblPr))
self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
self.run_style = style.get('run', None)
self.paragraphs = XPath('./w:tr/w:tc/w:p')(tbl)
def register(self, tbl): self.sub_tables = {x:Table(x, styles, para_map) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)}
self.tables[tbl] = self.current_table = [] para_map.update({p:self for p in self.paragraphs})
def add(self, p): def __iter__(self):
self.current_table.append(p) for p in self.paragraphs:
yield p
for t in self.sub_tables.itervalues():
for p in t:
yield p
def apply_markup(self, object_map): def apply_markup(self, rmap, parent=None):
rmap = {v:k for k, v in object_map.iteritems()}
for tbl, blocks in self.tables.iteritems():
if not blocks:
continue
parent = rmap[blocks[0]].getparent()
table = TABLE('\n\t\t') table = TABLE('\n\t\t')
idx = parent.index(rmap[blocks[0]]) if parent is None:
try:
first_para = rmap[next(iter(self))]
except StopIteration:
return
parent = first_para.getparent()
idx = parent.index(first_para)
parent.insert(idx, table) parent.insert(idx, table)
for row in XPath('./w:tr')(tbl): else:
parent.append(table)
for row in XPath('./w:tr')(self.tbl):
tr = TR('\n\t\t\t') tr = TR('\n\t\t\t')
tr.tail = '\n\t\t' tr.tail = '\n\t\t'
table.append(tr) table.append(tr)
@ -241,12 +270,34 @@ class Tables(object):
td = TD() td = TD()
td.tail = '\n\t\t\t' td.tail = '\n\t\t\t'
tr.append(td) tr.append(td)
for p in XPath('./w:p')(tc): for x in XPath('./w:p|./w:tbl')(tc):
block = rmap[p] if x.tag.endswith('}p'):
td.append(block) td.append(rmap[x])
else:
self.sub_tables[x].apply_markup(rmap, parent=td)
if len(tr): if len(tr):
tr[-1].tail = '\n\t\t' tr[-1].tail = '\n\t\t'
if len(table): if len(table):
table[-1].tail = '\n\t' table[-1].tail = '\n\t'
class Tables(object):
def __init__(self):
self.tables = []
self.para_map = {}
def register(self, tbl, styles):
self.tables.append(Table(tbl, styles, self.para_map))
def apply_markup(self, object_map):
rmap = {v:k for k, v in object_map.iteritems()}
for table in self.tables:
table.apply_markup(rmap)
def para_style(self, p):
return getattr(self.para_map.get(p, None), 'paragraph_style', None)
def run_style(self, p):
return getattr(self.para_map.get(p, None), 'run_style', None)

View File

@ -46,9 +46,9 @@ class Convert(object):
self.dest_dir = dest_dir or os.getcwdu() self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata self.mi = self.docx.metadata
self.body = BODY() self.body = BODY()
self.styles = Styles()
self.images = Images()
self.tables = Tables() self.tables = Tables()
self.styles = Styles(self.tables)
self.images = Images()
self.object_map = OrderedDict() self.object_map = OrderedDict()
self.html = HTML( self.html = HTML(
HEAD( HEAD(
@ -100,17 +100,9 @@ class Convert(object):
dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']' dl[-1][0].tail = ']'
dl.append(DD()) dl.append(DD())
in_table = False
for wp in note: for wp in note:
if wp.tag.endswith('}tbl'): if wp.tag.endswith('}tbl'):
self.tables.register(wp) self.tables.register(wp, self.styles)
in_table = True
continue
if in_table:
if ancestor(wp, 'w:tbl') is not None:
self.tables.add(wp)
else:
in_table = False
p = self.convert_p(wp) p = self.convert_p(wp)
dl[-1].append(p) dl[-1].append(p)
@ -167,12 +159,9 @@ class Convert(object):
current = [] current = []
self.page_map = OrderedDict() self.page_map = OrderedDict()
in_table = False
for p in descendants(doc, 'w:p', 'w:tbl'): for p in descendants(doc, 'w:p', 'w:tbl'):
if p.tag.endswith('}tbl'): if p.tag.endswith('}tbl'):
in_table = True self.tables.register(p, self.styles)
self.tables.register(p)
continue continue
sect = tuple(descendants(p, 'w:sectPr')) sect = tuple(descendants(p, 'w:sectPr'))
if sect: if sect:
@ -182,11 +171,7 @@ class Convert(object):
current = [] current = []
else: else:
current.append(p) current.append(p)
if in_table:
if ancestor(p, 'w:tbl') is not None:
self.tables.add(p)
else:
in_table = False
if current: if current:
last = XPath('./w:body/w:sectPr')(doc) last = XPath('./w:body/w:sectPr')(doc)
pr = PageProperties(last) pr = PageProperties(last)