diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index 3e009834f0..b4533fbb80 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -65,35 +65,41 @@ LINE_STYLES = { # {{{ } # }}} # Read from XML {{{ -def read_border(parent, dest): - tvals = {'padding_%s':inherit, 'border_%s_width':inherit, - 'border_%s_style':inherit, 'border_%s_color':inherit} - vals = {} - for edge in ('left', 'top', 'right', 'bottom'): - vals.update({k % edge:v for k, v in tvals.iteritems()}) - for border in XPath('./w:pBdr')(parent): - for edge in ('left', 'top', 'right', 'bottom'): - for elem in XPath('./w:%s' % edge)(border): - color = get(elem, 'w:color') - if color is not None: - vals['border_%s_color' % edge] = simple_color(color) - style = get(elem, 'w:val') - if style is not None: - vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid') - space = get(elem, 'w:space') - if space is not None: - try: - vals['padding_%s' % edge] = float(space) - except (ValueError, TypeError): - pass - sz = get(elem, 'w:sz') - if sz is not None: - # we dont care about art borders (they are only used for page borders) - try: - vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8 - except (ValueError, TypeError): - pass +border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color') + +def read_single_border(parent, edge): + color = style = width = padding = None + for elem in XPath('./w:%s' % edge)(parent): + c = get(elem, 'w:color') + if c is not None: + color = simple_color(c) + s = get(elem, 'w:val') + if s is not None: + style = LINE_STYLES.get(s, 'solid') + space = get(elem, 'w:space') + if space is not None: + try: + padding = float(space) + except (ValueError, TypeError): + pass + sz = get(elem, 'w:sz') + if sz is not None: + # we dont care about art borders (they are only used for page borders) + try: + width = min(96, max(2, float(sz))) / 8 + except (ValueError, TypeError): + pass + return {p:v for p, v in zip(border_props, (padding, width, style, color))} + +def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'): + vals = {k % edge:inherit for edge in border_edges for k in border_props} + + for border in XPath('./w:' + name)(parent): + for edge in border_edges: + for prop, val in read_single_border(border, edge).iteritems(): + if val is not None: + vals[prop % edge] = val for key, val in vals.iteritems(): setattr(dest, key, val) diff --git a/src/calibre/ebooks/docx/footnotes.py b/src/calibre/ebooks/docx/footnotes.py index 017ae160f4..ff2613cbb1 100644 --- a/src/calibre/ebooks/docx/footnotes.py +++ b/src/calibre/ebooks/docx/footnotes.py @@ -17,7 +17,7 @@ class Note(object): self.parent = parent def __iter__(self): - for p in descendants(self.parent, 'w:p'): + for p in descendants(self.parent, 'w:p', 'w:tbl'): yield p class Footnotes(object): diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index ed95aa6c2f..f164b194b5 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -11,6 +11,7 @@ from collections import OrderedDict, Counter from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit from calibre.ebooks.docx.char_styles import RunStyle +from calibre.ebooks.docx.tables import TableStyle from calibre.ebooks.docx.names import XPath, get class PageProperties(object): @@ -66,10 +67,17 @@ class Style(object): self.based_on = None self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'} - self.paragraph_style = self.character_style = None + self.paragraph_style = self.character_style = self.table_style = None - if self.style_type in {'paragraph', 'character'}: - if self.style_type == 'paragraph': + if self.style_type in {'paragraph', 'character', 'table'}: + if self.style_type == 'table': + for tblPr in XPath('./w:tblPr')(elem): + ts = TableStyle(tblPr) + if self.table_style is None: + self.table_style = ts + else: + self.table_style.update(ts) + if self.style_type in {'paragraph', 'table'}: for pPr in XPath('./w:pPr')(elem): ps = ParagraphStyle(pPr) if self.paragraph_style is None: @@ -90,6 +98,10 @@ class Style(object): self.numbering_style_link = get(x, 'w:val') def resolve_based_on(self, parent): + if parent.table_style is not None: + if self.table_style is None: + self.table_style = TableStyle() + self.table_style.resolve_based_on(parent.table_style) if parent.paragraph_style is not None: if self.paragraph_style is None: self.paragraph_style = ParagraphStyle() diff --git a/src/calibre/ebooks/docx/tables.py b/src/calibre/ebooks/docx/tables.py new file mode 100644 index 0000000000..feec4eb274 --- /dev/null +++ b/src/calibre/ebooks/docx/tables.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from collections import OrderedDict + +from lxml.html.builder import TABLE, TR, TD + +from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border ,border_props # noqa +from calibre.ebooks.docx.names import XPath, get + +def _read_width(elem): + ans = inherit + try: + w = int(get(elem, 'w:w')) + except (TypeError, ValueError): + w = 0 + typ = get(elem, 'w:type', 'auto') + if typ == 'nil': + ans = '0' + elif typ == 'auto': + ans = 'auto' + elif typ == 'dxa': + ans = '%.3gpt' % (w/20) + elif typ == 'pct': + ans = '%.3g%%' % (w/50) + return ans + +def read_width(parent, dest): + ans = inherit + for tblW in XPath('./w:tblW')(parent): + ans = _read_width(tblW) + setattr(dest, 'width', ans) + +def read_padding(parent, dest): + name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar' + left = top = bottom = right = inherit + for mar in XPath('./w:%s' % name)(parent): + for x in ('left', 'top', 'right', 'bottom'): + for edge in XPath('./w:%s' % x)(mar): + locals()[x] = _read_width(edge) + for x in ('left', 'top', 'right', 'bottom'): + setattr(dest, 'cell_padding_%s' % x, locals()[x]) + +def read_justification(parent, dest): + left = right = inherit + for jc in XPath('./w:jc[@w:val]')(parent): + val = get(jc, 'w:val') + if not val: + continue + if val == 'left': + right = 'auto' + elif val == 'right': + left = 'auto' + elif val == 'center': + left = right = 'auto' + setattr(dest, 'margin_left', left) + setattr(dest, 'margin_right', right) + +def read_spacing(parent, dest): + ans = inherit + for cs in XPath('./w:tblCellSpacing')(parent): + ans = _read_width(cs) + setattr(dest, 'spacing', ans) + +def read_indent(parent, dest): + ans = inherit + for cs in XPath('./w:tblInd')(parent): + ans = _read_width(cs) + setattr(dest, 'indent', ans) + +border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV') + +def read_borders(parent, dest): + name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders' + read_border(parent, dest, border_edges, name) + +class TableStyle(object): + + all_properties = ( + 'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', + 'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color', + 'spacing', 'indent', + ) + tuple(k % edge for edge in border_edges for k in border_props) + + def __init__(self, tblPr=None): + if tblPr is None: + for p in self.all_properties: + setattr(self, p, inherit) + else: + for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders'): + f = globals()['read_%s' % x] + f(tblPr, self) + + self._css = None + + def update(self, other): + for prop in self.all_properties: + nval = getattr(other, prop) + if nval is not inherit: + setattr(self, prop, nval) + + def resolve_based_on(self, parent): + for p in self.all_properties: + val = getattr(self, p) + if val is inherit: + setattr(self, p, getattr(parent, p)) + + @property + def css(self): + return self._css + +class Tables(object): + + def __init__(self): + self.tables = OrderedDict() + + def register(self, tbl): + self.tables[tbl] = self.current_table = [] + + def add(self, p): + self.current_table.append(p) + + def apply_markup(self, object_map): + rmap = {v:k for k, v in object_map.iteritems()} + for tbl, blocks in self.tables.iteritems(): + if not blocks: + continue + parent = rmap[blocks[0]].getparent() + table = TABLE('\n\t\t') + idx = parent.index(rmap[blocks[0]]) + parent.insert(idx, table) + for row in XPath('./w:tr')(tbl): + tr = TR('\n\t\t\t') + tr.tail = '\n\t\t' + table.append(tr) + for tc in XPath('./w:tc')(row): + td = TD() + td.tail = '\n\t\t\t' + tr.append(td) + for p in XPath('./w:p')(tc): + block = rmap[p] + td.append(block) + if len(tr): + tr[-1].tail = '\n\t\t' + if len(table): + table[-1].tail = '\n\t' + diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index a46687d6c3..1250b60dd2 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -21,6 +21,7 @@ from calibre.ebooks.docx.styles import Styles, inherit, PageProperties from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.images import Images +from calibre.ebooks.docx.tables import Tables from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC @@ -47,6 +48,7 @@ class Convert(object): self.body = BODY() self.styles = Styles() self.images = Images() + self.tables = Tables() self.object_map = OrderedDict() self.html = HTML( HEAD( @@ -98,15 +100,26 @@ class Convert(object): dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl[-1][0].tail = ']' dl.append(DD()) + in_table = False for wp in note: + if wp.tag.endswith('}tbl'): + self.tables.register(wp) + in_table = True + continue + if in_table: + if ancestor(wp, 'w:tbl') is not None: + self.tables.add(wp) + else: + in_table = False p = self.convert_p(wp) dl[-1].append(p) self.resolve_links(relationships_by_id) - # TODO: tables child of (nested tables?) self.styles.cascade(self.layers) + self.tables.apply_markup(self.object_map) + numbered = [] for html_obj, obj in self.object_map.iteritems(): raw = obj.get('calibre_num_id', None) @@ -154,7 +167,13 @@ class Convert(object): current = [] self.page_map = OrderedDict() - for p in descendants(doc, 'w:p'): + in_table = False + + for p in descendants(doc, 'w:p', 'w:tbl'): + if p.tag.endswith('}tbl'): + in_table = True + self.tables.register(p) + continue sect = tuple(descendants(p, 'w:sectPr')) if sect: pr = PageProperties(sect) @@ -163,6 +182,11 @@ class Convert(object): current = [] else: current.append(p) + if in_table: + if ancestor(p, 'w:tbl') is not None: + self.tables.add(p) + else: + in_table = False if current: last = XPath('./w:body/w:sectPr')(doc) pr = PageProperties(last)