From 20cc3e2c7a9547aa091b611d428e46ce900dd9c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 3 Jun 2013 11:03:41 +0530 Subject: [PATCH] DOCX: Floating and justified tables --- src/calibre/ebooks/docx/tables.py | 87 +++++++++++++++++++++++------- src/calibre/ebooks/docx/to_html.py | 9 ++-- 2 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/calibre/ebooks/docx/tables.py b/src/calibre/ebooks/docx/tables.py index 6732533b84..987c6bc53e 100644 --- a/src/calibre/ebooks/docx/tables.py +++ b/src/calibre/ebooks/docx/tables.py @@ -8,11 +8,13 @@ __copyright__ = '2013, Kovid Goyal ' from lxml.html.builder import TABLE, TR, TD -from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa +from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.names import XPath, get, is_tag # Read from XML {{{ +read_shd = rs + def _read_width(elem): ans = inherit try: @@ -73,6 +75,12 @@ def read_spacing(parent, dest): ans = _read_width(cs) setattr(dest, 'spacing', ans) +def read_float(parent, dest): + ans = inherit + for x in XPath('./w:tblpPr')(parent): + ans = x.attrib + setattr(dest, 'float', ans) + def read_indent(parent, dest): ans = inherit for cs in XPath('./w:tblInd')(parent): @@ -139,7 +147,10 @@ def read_look(parent, dest): # }}} def clone(style): - ans = type(style)() + try: + ans = type(style)() + except TypeError: + return None ans.update(style) return ans @@ -147,12 +158,16 @@ class RowStyle(object): all_properties = ('height', 'cantSplit', 'hidden', 'spacing',) - def __init__(self, tcPr=None): - if tcPr is None: + def __init__(self, trPr=None): + if trPr is None: for p in self.all_properties: setattr(self, p, inherit) else: - pass + for p in ('hidden', 'cantSplit'): + setattr(self, p, binary_property(trPr, p)) + for p in ('spacing', 'height'): + f = globals()['read_%s' % p] + f(trPr, self) class CellStyle(object): @@ -160,19 +175,19 @@ class CellStyle(object): 'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', ) + tuple(k % edge for edge in border_edges for k in border_props) - def __init__(self, trPr=None): - if trPr is None: + def __init__(self, tcPr=None): + if tcPr is None: for p in self.all_properties: setattr(self, p, inherit) else: for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'): f = globals()['read_%s' % x] - f(trPr, self) + f(tcPr, self) class TableStyle(object): all_properties = ( - 'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', + 'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', 'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color', 'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', ) + tuple(k % edge for edge in border_edges for k in border_props) @@ -183,7 +198,7 @@ class TableStyle(object): setattr(self, p, inherit) else: self.overrides = inherit - for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): + for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): f = globals()['read_%s' % x] f(tblPr, self) parent = tblPr.getparent() @@ -202,6 +217,7 @@ class TableStyle(object): orides['para'] = ParagraphStyle(pPr) for rPr in XPath('./w:rPr')(tblStylePr): orides['run'] = RunStyle(rPr) + self._css = None def update(self, other): for prop in self.all_properties: @@ -215,6 +231,37 @@ class TableStyle(object): if val is inherit: setattr(self, p, getattr(parent, p)) + @property + def css(self): + if self._css is None: + c = self._css = {} + for x in ('width', 'background_color', 'margin_left', 'margin_right'): + val = getattr(self, x) + if val is not inherit: + c[x.replace('_', '-')] = val + if self.indent not in (inherit, 'auto') and self.margin_left != 'auto': + c['margin-left'] = self.indent + if self.float is not inherit: + for x in ('left', 'top', 'right', 'bottom'): + val = self.float.get('%sFromText' % x, 0) + try: + val = '%.3gpt' % (int(val) / 20) + except (ValueError, TypeError): + val = '0' + c['margin-%s' % x] = val + if 'tblpXSpec' in self.float: + c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left' + else: + page = self.page + page_width = page.width - page.margin_left - page.margin_right + try: + x = int(self.float['tblpX']) / 20 + except (KeyError, ValueError, TypeError): + x = 0 + c['float'] = 'left' if (x/page_width) < 0.65 else 'right' + return self._css + + class Table(object): def __init__(self, tbl, styles, para_map): @@ -243,6 +290,9 @@ class Table(object): style['table'].update(TableStyle(tblPr)) self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None) self.run_style = style.get('run', None) + self.overrides = self.table_style.overrides + if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']: + self.table_style.update(self.overrides['wholeTable']['table']) self.style_map = {} self.paragraphs = [] @@ -304,12 +354,11 @@ class Table(object): return tuple(filter(self.override_allowed, overrides)) def resolve_para_style(self, p, overrides): - text_styles = [None if self.paragraph_style is None else clone(self.paragraph_style), - None if self.run_style is None else clone(self.run_style)] + text_styles = [clone(self.paragraph_style), clone(self.run_style)] for o in overrides: - if o in self.table_style.overrides: - ovr = self.table_style.overrides[o] + if o in self.overrides: + ovr = self.overrides[o] for i, name in enumerate(('para', 'run')): ops = ovr.get(name, None) if ops is not None: @@ -326,8 +375,10 @@ class Table(object): for p in t: yield p - def apply_markup(self, rmap, parent=None): + def apply_markup(self, rmap, page, parent=None): table = TABLE('\n\t\t') + self.table_style.page = page + table.set('class', self.styles.register(self.table_style.css, 'table')) if parent is None: try: first_para = rmap[next(iter(self))] @@ -350,7 +401,7 @@ class Table(object): if x.tag.endswith('}p'): td.append(rmap[x]) else: - self.sub_tables[x].apply_markup(rmap, parent=td) + self.sub_tables[x].apply_markup(rmap, page, parent=td) if len(tr): tr[-1].tail = '\n\t\t' if len(table): @@ -366,10 +417,10 @@ class Tables(object): def register(self, tbl, styles): self.tables.append(Table(tbl, styles, self.para_map)) - def apply_markup(self, object_map): + def apply_markup(self, object_map, page_map): rmap = {v:k for k, v in object_map.iteritems()} for table in self.tables: - table.apply_markup(rmap) + table.apply_markup(rmap, page_map[table.tbl]) def para_style(self, p): table = self.para_map.get(p, None) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 2f945e8980..5a2a530d4f 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -85,8 +85,9 @@ class Convert(object): self.read_page_properties(doc) for wp, page_properties in self.page_map.iteritems(): self.current_page = page_properties - p = self.convert_p(wp) - self.body.append(p) + if wp.tag.endswith('}p'): + p = self.convert_p(wp) + self.body.append(p) notes_header = None if self.footnotes.has_notes: @@ -103,6 +104,7 @@ class Convert(object): for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) + self.page_map[wp] = self.current_page p = self.convert_p(wp) dl[-1].append(p) @@ -110,7 +112,7 @@ class Convert(object): self.styles.cascade(self.layers) - self.tables.apply_markup(self.object_map) + self.tables.apply_markup(self.object_map, self.page_map) numbered = [] for html_obj, obj in self.object_map.iteritems(): @@ -162,6 +164,7 @@ class Convert(object): for p in descendants(doc, 'w:p', 'w:tbl'): if p.tag.endswith('}tbl'): self.tables.register(p, self.styles) + current.append(p) continue sect = tuple(descendants(p, 'w:sectPr')) if sect: