mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
DOCX: Nested tables and block/run table styles
This commit is contained in:
parent
d873be3ca8
commit
ca0899956d
@ -118,7 +118,7 @@ class Styles(object):
|
|||||||
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
|
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, tables):
|
||||||
self.id_map = OrderedDict()
|
self.id_map = OrderedDict()
|
||||||
self.para_cache = {}
|
self.para_cache = {}
|
||||||
self.para_char_cache = {}
|
self.para_char_cache = {}
|
||||||
@ -126,6 +126,7 @@ class Styles(object):
|
|||||||
self.classes = {}
|
self.classes = {}
|
||||||
self.counter = Counter()
|
self.counter = Counter()
|
||||||
self.default_styles = {}
|
self.default_styles = {}
|
||||||
|
self.tables = tables
|
||||||
self.numbering_style_links = {}
|
self.numbering_style_links = {}
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
@ -226,6 +227,9 @@ class Styles(object):
|
|||||||
parent_styles = []
|
parent_styles = []
|
||||||
if self.default_paragraph_style is not None:
|
if self.default_paragraph_style is not None:
|
||||||
parent_styles.append(self.default_paragraph_style)
|
parent_styles.append(self.default_paragraph_style)
|
||||||
|
ts = self.tables.para_style(p)
|
||||||
|
if ts is not None:
|
||||||
|
parent_styles.append(ts)
|
||||||
|
|
||||||
default_para = self.default_styles.get('paragraph', None)
|
default_para = self.default_styles.get('paragraph', None)
|
||||||
if direct_formatting.linked_style is not None:
|
if direct_formatting.linked_style is not None:
|
||||||
@ -278,6 +282,9 @@ class Styles(object):
|
|||||||
default_char = self.default_styles.get('character', None)
|
default_char = self.default_styles.get('character', None)
|
||||||
if self.default_character_style is not None:
|
if self.default_character_style is not None:
|
||||||
parent_styles.append(self.default_character_style)
|
parent_styles.append(self.default_character_style)
|
||||||
|
ts = self.tables.run_style(p)
|
||||||
|
if ts is not None:
|
||||||
|
parent_styles.append(ts)
|
||||||
pstyle = self.para_char_cache.get(p, None)
|
pstyle = self.para_char_cache.get(p, None)
|
||||||
if pstyle is not None:
|
if pstyle is not None:
|
||||||
parent_styles.append(pstyle)
|
parent_styles.append(pstyle)
|
||||||
|
@ -6,8 +6,6 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
from lxml.html.builder import TABLE, TR, TD
|
from lxml.html.builder import TABLE, TR, TD
|
||||||
|
|
||||||
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa
|
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa
|
||||||
@ -212,41 +210,94 @@ class TableStyle(object):
|
|||||||
if val is inherit:
|
if val is inherit:
|
||||||
setattr(self, p, getattr(parent, p))
|
setattr(self, p, getattr(parent, p))
|
||||||
|
|
||||||
|
class Table(object):
|
||||||
|
|
||||||
|
def __init__(self, tbl, styles, para_map):
|
||||||
|
self.tbl = tbl
|
||||||
|
self.styles = styles
|
||||||
|
|
||||||
|
# Read Table Style
|
||||||
|
style = {'table':TableStyle()}
|
||||||
|
for tblPr in XPath('./w:tblPr')(tbl):
|
||||||
|
for ts in XPath('./w:tblStyle[@w:val]')(tblPr):
|
||||||
|
style_id = get(ts, 'w:val')
|
||||||
|
s = styles.get(style_id)
|
||||||
|
if s is not None:
|
||||||
|
if s.table_style is not None:
|
||||||
|
style['table'].update(s.table_style)
|
||||||
|
if s.paragraph_style is not None:
|
||||||
|
if 'paragraph' in style:
|
||||||
|
style['paragraph'].update(s.paragraph_style)
|
||||||
|
else:
|
||||||
|
style['paragraph'] = s.paragraph_style
|
||||||
|
if s.character_style is not None:
|
||||||
|
if 'run' in style:
|
||||||
|
style['run'].update(s.character_style)
|
||||||
|
else:
|
||||||
|
style['run'] = s.character_style
|
||||||
|
style['table'].update(TableStyle(tblPr))
|
||||||
|
self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
|
||||||
|
self.run_style = style.get('run', None)
|
||||||
|
self.paragraphs = XPath('./w:tr/w:tc/w:p')(tbl)
|
||||||
|
|
||||||
|
self.sub_tables = {x:Table(x, styles, para_map) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)}
|
||||||
|
para_map.update({p:self for p in self.paragraphs})
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for p in self.paragraphs:
|
||||||
|
yield p
|
||||||
|
for t in self.sub_tables.itervalues():
|
||||||
|
for p in t:
|
||||||
|
yield p
|
||||||
|
|
||||||
|
def apply_markup(self, rmap, parent=None):
|
||||||
|
table = TABLE('\n\t\t')
|
||||||
|
if parent is None:
|
||||||
|
try:
|
||||||
|
first_para = rmap[next(iter(self))]
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
parent = first_para.getparent()
|
||||||
|
idx = parent.index(first_para)
|
||||||
|
parent.insert(idx, table)
|
||||||
|
else:
|
||||||
|
parent.append(table)
|
||||||
|
for row in XPath('./w:tr')(self.tbl):
|
||||||
|
tr = TR('\n\t\t\t')
|
||||||
|
tr.tail = '\n\t\t'
|
||||||
|
table.append(tr)
|
||||||
|
for tc in XPath('./w:tc')(row):
|
||||||
|
td = TD()
|
||||||
|
td.tail = '\n\t\t\t'
|
||||||
|
tr.append(td)
|
||||||
|
for x in XPath('./w:p|./w:tbl')(tc):
|
||||||
|
if x.tag.endswith('}p'):
|
||||||
|
td.append(rmap[x])
|
||||||
|
else:
|
||||||
|
self.sub_tables[x].apply_markup(rmap, parent=td)
|
||||||
|
if len(tr):
|
||||||
|
tr[-1].tail = '\n\t\t'
|
||||||
|
if len(table):
|
||||||
|
table[-1].tail = '\n\t'
|
||||||
|
|
||||||
|
|
||||||
class Tables(object):
|
class Tables(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.tables = OrderedDict()
|
self.tables = []
|
||||||
|
self.para_map = {}
|
||||||
|
|
||||||
def register(self, tbl):
|
def register(self, tbl, styles):
|
||||||
self.tables[tbl] = self.current_table = []
|
self.tables.append(Table(tbl, styles, self.para_map))
|
||||||
|
|
||||||
def add(self, p):
|
|
||||||
self.current_table.append(p)
|
|
||||||
|
|
||||||
def apply_markup(self, object_map):
|
def apply_markup(self, object_map):
|
||||||
rmap = {v:k for k, v in object_map.iteritems()}
|
rmap = {v:k for k, v in object_map.iteritems()}
|
||||||
for tbl, blocks in self.tables.iteritems():
|
for table in self.tables:
|
||||||
if not blocks:
|
table.apply_markup(rmap)
|
||||||
continue
|
|
||||||
parent = rmap[blocks[0]].getparent()
|
|
||||||
table = TABLE('\n\t\t')
|
|
||||||
idx = parent.index(rmap[blocks[0]])
|
|
||||||
parent.insert(idx, table)
|
|
||||||
for row in XPath('./w:tr')(tbl):
|
|
||||||
tr = TR('\n\t\t\t')
|
|
||||||
tr.tail = '\n\t\t'
|
|
||||||
table.append(tr)
|
|
||||||
for tc in XPath('./w:tc')(row):
|
|
||||||
td = TD()
|
|
||||||
td.tail = '\n\t\t\t'
|
|
||||||
tr.append(td)
|
|
||||||
for p in XPath('./w:p')(tc):
|
|
||||||
block = rmap[p]
|
|
||||||
td.append(block)
|
|
||||||
if len(tr):
|
|
||||||
tr[-1].tail = '\n\t\t'
|
|
||||||
if len(table):
|
|
||||||
table[-1].tail = '\n\t'
|
|
||||||
|
|
||||||
|
def para_style(self, p):
|
||||||
|
return getattr(self.para_map.get(p, None), 'paragraph_style', None)
|
||||||
|
|
||||||
|
def run_style(self, p):
|
||||||
|
return getattr(self.para_map.get(p, None), 'run_style', None)
|
||||||
|
|
||||||
|
@ -46,9 +46,9 @@ class Convert(object):
|
|||||||
self.dest_dir = dest_dir or os.getcwdu()
|
self.dest_dir = dest_dir or os.getcwdu()
|
||||||
self.mi = self.docx.metadata
|
self.mi = self.docx.metadata
|
||||||
self.body = BODY()
|
self.body = BODY()
|
||||||
self.styles = Styles()
|
|
||||||
self.images = Images()
|
|
||||||
self.tables = Tables()
|
self.tables = Tables()
|
||||||
|
self.styles = Styles(self.tables)
|
||||||
|
self.images = Images()
|
||||||
self.object_map = OrderedDict()
|
self.object_map = OrderedDict()
|
||||||
self.html = HTML(
|
self.html = HTML(
|
||||||
HEAD(
|
HEAD(
|
||||||
@ -100,17 +100,9 @@ class Convert(object):
|
|||||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
||||||
dl[-1][0].tail = ']'
|
dl[-1][0].tail = ']'
|
||||||
dl.append(DD())
|
dl.append(DD())
|
||||||
in_table = False
|
|
||||||
for wp in note:
|
for wp in note:
|
||||||
if wp.tag.endswith('}tbl'):
|
if wp.tag.endswith('}tbl'):
|
||||||
self.tables.register(wp)
|
self.tables.register(wp, self.styles)
|
||||||
in_table = True
|
|
||||||
continue
|
|
||||||
if in_table:
|
|
||||||
if ancestor(wp, 'w:tbl') is not None:
|
|
||||||
self.tables.add(wp)
|
|
||||||
else:
|
|
||||||
in_table = False
|
|
||||||
p = self.convert_p(wp)
|
p = self.convert_p(wp)
|
||||||
dl[-1].append(p)
|
dl[-1].append(p)
|
||||||
|
|
||||||
@ -167,12 +159,9 @@ class Convert(object):
|
|||||||
current = []
|
current = []
|
||||||
self.page_map = OrderedDict()
|
self.page_map = OrderedDict()
|
||||||
|
|
||||||
in_table = False
|
|
||||||
|
|
||||||
for p in descendants(doc, 'w:p', 'w:tbl'):
|
for p in descendants(doc, 'w:p', 'w:tbl'):
|
||||||
if p.tag.endswith('}tbl'):
|
if p.tag.endswith('}tbl'):
|
||||||
in_table = True
|
self.tables.register(p, self.styles)
|
||||||
self.tables.register(p)
|
|
||||||
continue
|
continue
|
||||||
sect = tuple(descendants(p, 'w:sectPr'))
|
sect = tuple(descendants(p, 'w:sectPr'))
|
||||||
if sect:
|
if sect:
|
||||||
@ -182,11 +171,7 @@ class Convert(object):
|
|||||||
current = []
|
current = []
|
||||||
else:
|
else:
|
||||||
current.append(p)
|
current.append(p)
|
||||||
if in_table:
|
|
||||||
if ancestor(p, 'w:tbl') is not None:
|
|
||||||
self.tables.add(p)
|
|
||||||
else:
|
|
||||||
in_table = False
|
|
||||||
if current:
|
if current:
|
||||||
last = XPath('./w:body/w:sectPr')(doc)
|
last = XPath('./w:body/w:sectPr')(doc)
|
||||||
pr = PageProperties(last)
|
pr = PageProperties(last)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user