mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
DOCX Output: Start work on conversion of tables
This commit is contained in:
parent
de5ce8d46b
commit
0118d5d181
@ -4,7 +4,6 @@ Links
|
|||||||
Various TODOs sprinkled through the source
|
Various TODOs sprinkled through the source
|
||||||
Tables
|
Tables
|
||||||
Lists
|
Lists
|
||||||
Embed Fonts
|
|
||||||
Cover image
|
Cover image
|
||||||
RTL text
|
RTL text
|
||||||
Lang support in run styles <w:lang>
|
Lang support in run styles <w:lang>
|
||||||
|
@ -12,6 +12,7 @@ from calibre.ebooks.docx.writer.container import create_skeleton
|
|||||||
from calibre.ebooks.docx.writer.styles import w, StylesManager
|
from calibre.ebooks.docx.writer.styles import w, StylesManager
|
||||||
from calibre.ebooks.docx.writer.images import ImagesManager
|
from calibre.ebooks.docx.writer.images import ImagesManager
|
||||||
from calibre.ebooks.docx.writer.fonts import FontsManager
|
from calibre.ebooks.docx.writer.fonts import FontsManager
|
||||||
|
from calibre.ebooks.docx.writer.tables import Table
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||||
from calibre.ebooks.oeb.base import XPath, barename
|
from calibre.ebooks.oeb.base import XPath, barename
|
||||||
|
|
||||||
@ -157,23 +158,74 @@ class Block(object):
|
|||||||
|
|
||||||
class Blocks(object):
|
class Blocks(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, styles_manager):
|
||||||
|
self.styles_manager = styles_manager
|
||||||
self.all_blocks = []
|
self.all_blocks = []
|
||||||
self.pos = 0
|
self.pos = 0
|
||||||
self.current_block = None
|
self.current_block = None
|
||||||
|
self.items = []
|
||||||
|
self.tables = []
|
||||||
|
self.current_table = None
|
||||||
|
self.open_html_blocks = set()
|
||||||
|
|
||||||
def start_new_block(self, styles_manager, html_block, style):
|
def current_or_new_block(self, html_tag, tag_style):
|
||||||
|
return self.current_block or self.start_new_block(html_tag, tag_style)
|
||||||
|
|
||||||
|
def end_current_block(self):
|
||||||
if self.current_block is not None:
|
if self.current_block is not None:
|
||||||
self.all_blocks.append(self.current_block)
|
self.all_blocks.append(self.current_block)
|
||||||
self.current_block = Block(styles_manager, html_block, style)
|
if self.current_table is not None:
|
||||||
|
self.current_table.add_block(self.current_block)
|
||||||
|
else:
|
||||||
|
self.block_map[self.current_block] = len(self.items)
|
||||||
|
self.items.append(self.current_block)
|
||||||
|
self.current_block = None
|
||||||
|
|
||||||
|
def start_new_block(self, html_block, style):
|
||||||
|
self.end_current_block()
|
||||||
|
self.current_block = Block(self.styles_manager, html_block, style)
|
||||||
|
self.open_html_blocks.add(html_block)
|
||||||
return self.current_block
|
return self.current_block
|
||||||
|
|
||||||
|
def start_new_table(self, html_tag, tag_style=None):
|
||||||
|
self.current_table = Table(html_tag, tag_style)
|
||||||
|
self.tables.append(self.current_table)
|
||||||
|
|
||||||
|
def start_new_row(self, html_tag, tag_style):
|
||||||
|
if self.current_table is None:
|
||||||
|
self.start_new_table(html_tag)
|
||||||
|
self.current_table.start_new_row(html_tag, tag_style)
|
||||||
|
|
||||||
|
def start_new_cell(self, html_tag, tag_style):
|
||||||
|
if self.current_table is None:
|
||||||
|
self.start_new_table(html_tag)
|
||||||
|
self.current_table.start_new_cell(html_tag, tag_style)
|
||||||
|
|
||||||
|
def finish_tag(self, html_tag):
|
||||||
|
if self.current_block is not None and html_tag in self.open_html_blocks:
|
||||||
|
self.end_current_block()
|
||||||
|
self.open_html_blocks.discard(html_tag)
|
||||||
|
|
||||||
|
if self.current_table is not None:
|
||||||
|
table_finished = self.current_table.finish_tag(html_tag)
|
||||||
|
if table_finished:
|
||||||
|
table = self.tables[-1]
|
||||||
|
del self.tables[-1]
|
||||||
|
if self.tables:
|
||||||
|
self.current_table = self.tables[-1]
|
||||||
|
self.current_table.add_table(table)
|
||||||
|
else:
|
||||||
|
self.current_table = None
|
||||||
|
self.block_map[table] = len(self.items)
|
||||||
|
self.items.append(table)
|
||||||
|
|
||||||
def serialize(self, body):
|
def serialize(self, body):
|
||||||
for block in self.all_blocks:
|
for item in self.items:
|
||||||
block.serialize(body)
|
item.serialize(body)
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.pos = len(self.all_blocks)
|
self.pos = len(self.all_blocks)
|
||||||
|
self.block_map = {}
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
if self.current_block is not None:
|
if self.current_block is not None:
|
||||||
@ -182,7 +234,9 @@ class Blocks(object):
|
|||||||
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
|
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
|
||||||
# Delete the empty block corresponding to the <body> tag when the
|
# Delete the empty block corresponding to the <body> tag when the
|
||||||
# body tag has no inline content before its first sub-block
|
# body tag has no inline content before its first sub-block
|
||||||
|
block = self.all_blocks[self.pos]
|
||||||
del self.all_blocks[self.pos]
|
del self.all_blocks[self.pos]
|
||||||
|
del self.items[self.block_map.pop(block)]
|
||||||
if self.pos > 0 and self.pos < len(self.all_blocks):
|
if self.pos > 0 and self.pos < len(self.all_blocks):
|
||||||
# Insert a page break corresponding to the start of the html file
|
# Insert a page break corresponding to the start of the html file
|
||||||
self.all_blocks[self.pos].page_break_before = True
|
self.all_blocks[self.pos].page_break_before = True
|
||||||
@ -193,8 +247,6 @@ class Convert(object):
|
|||||||
self.oeb, self.docx = oeb, docx
|
self.oeb, self.docx = oeb, docx
|
||||||
self.log, self.opts = docx.log, docx.opts
|
self.log, self.opts = docx.log, docx.opts
|
||||||
|
|
||||||
self.blocks = Blocks()
|
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
self.svg_rasterizer = SVGRasterizer()
|
self.svg_rasterizer = SVGRasterizer()
|
||||||
@ -203,6 +255,7 @@ class Convert(object):
|
|||||||
self.styles_manager = StylesManager()
|
self.styles_manager = StylesManager()
|
||||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
|
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
|
||||||
self.fonts_manager = FontsManager(self.oeb, self.opts)
|
self.fonts_manager = FontsManager(self.oeb, self.opts)
|
||||||
|
self.blocks = Blocks(self.styles_manager)
|
||||||
|
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
self.process_item(item)
|
self.process_item(item)
|
||||||
@ -228,61 +281,63 @@ class Convert(object):
|
|||||||
if tag_style.is_hidden:
|
if tag_style.is_hidden:
|
||||||
return
|
return
|
||||||
display = tag_style._get('display')
|
display = tag_style._get('display')
|
||||||
inlined = True
|
|
||||||
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
||||||
if self.blocks.current_block is not None:
|
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
|
||||||
elif display == 'list-item':
|
elif display == 'list-item':
|
||||||
# TODO: Implement this
|
# TODO: Implement this
|
||||||
inlined = False
|
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
elif display.startswith('table') or display == 'inline-table':
|
elif display.startswith('table') or display == 'inline-table':
|
||||||
inlined = False
|
|
||||||
# TODO: implement this
|
|
||||||
if display == 'table-cell':
|
if display == 'table-cell':
|
||||||
|
self.blocks.start_new_cell(html_tag, tag_style)
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
|
elif display == 'table-row':
|
||||||
|
self.blocks.start_new_row(html_tag, tag_style)
|
||||||
|
elif display in {'table', 'inline-table'}:
|
||||||
|
self.blocks.start_new_table(html_tag, tag_style)
|
||||||
else:
|
else:
|
||||||
if tagname == 'img' and tag_style['float'] in {'left', 'right'}:
|
if tagname == 'img' and tag_style['float'] in {'left', 'right'}:
|
||||||
# Image is floating so dont start a new paragraph for it
|
# Image is floating so dont start a new paragraph for it
|
||||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
else:
|
else:
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
inlined = False
|
|
||||||
|
|
||||||
for child in html_tag.iterchildren('*'):
|
for child in html_tag.iterchildren('*'):
|
||||||
self.process_tag(child, stylizer)
|
self.process_tag(child, stylizer)
|
||||||
|
|
||||||
if not is_first_tag and html_tag.tail:
|
is_block = html_tag in self.blocks.open_html_blocks
|
||||||
if inlined:
|
self.blocks.finish_tag(html_tag)
|
||||||
self.add_text_to_current_block(html_tag.tail, stylizer.style(html_tag.getparent()))
|
if is_block and tag_style['page-break-after'] == 'avoid':
|
||||||
elif html_tag.tail.strip():
|
self.blocks.all_blocks[-1].keep_next = True
|
||||||
self.blocks.start_new_block(self.styles_manager, html_tag.getparent(), stylizer.style(html_tag.getparent()))
|
|
||||||
self.add_text_to_current_block(html_tag.tail, stylizer.style(html_tag.getparent()))
|
|
||||||
|
|
||||||
def add_text_to_current_block(self, text, tag_style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False):
|
if display == 'table-row':
|
||||||
block = self.blocks.current_block
|
return # We ignore the tail for these tags
|
||||||
if block is not None:
|
|
||||||
block.add_text(text, tag_style, ignore_leading_whitespace=ignore_leading_whitespace, html_parent=html_parent, is_parent_style=is_parent_style)
|
if not is_first_tag and html_tag.tail and (not is_block or not html_tag.tail.isspace()):
|
||||||
|
# Ignore trailing space after a block tag, as otherwise it will
|
||||||
|
# become a new empty paragraph
|
||||||
|
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
|
||||||
|
block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True)
|
||||||
|
|
||||||
def add_block_tag(self, tagname, html_tag, tag_style, stylizer):
|
def add_block_tag(self, tagname, html_tag, tag_style, stylizer):
|
||||||
block = self.blocks.start_new_block(self.styles_manager, html_tag, tag_style)
|
block = self.blocks.start_new_block(html_tag, tag_style)
|
||||||
if tagname == 'img':
|
if tagname == 'img':
|
||||||
self.images_manager.add_image(html_tag, block, stylizer)
|
self.images_manager.add_image(html_tag, block, stylizer)
|
||||||
else:
|
else:
|
||||||
if html_tag.text:
|
if html_tag.text:
|
||||||
block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True)
|
block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True)
|
||||||
if tag_style['page-break-after'] == 'avoid':
|
|
||||||
block.keep_next = True
|
|
||||||
|
|
||||||
def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
|
def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
|
||||||
if tagname == 'br':
|
if tagname == 'br':
|
||||||
if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
|
if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
|
||||||
self.blocks.current_block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'))
|
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
|
||||||
|
block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'))
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
self.images_manager.add_image(html_tag, self.blocks.current_block, stylizer)
|
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
|
||||||
|
self.images_manager.add_image(html_tag, block, stylizer)
|
||||||
else:
|
else:
|
||||||
if html_tag.text:
|
if html_tag.text:
|
||||||
self.add_text_to_current_block(html_tag.text, tag_style, html_parent=html_tag)
|
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
|
||||||
|
block.add_text(html_tag.text, tag_style, is_parent_style=False)
|
||||||
|
|
||||||
def write(self):
|
def write(self):
|
||||||
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
|
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
|
||||||
|
96
src/calibre/ebooks/docx/writer/tables.py
Normal file
96
src/calibre/ebooks/docx/writer/tables.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.names import makeelement
|
||||||
|
|
||||||
|
class Cell(object):
|
||||||
|
|
||||||
|
def __init__(self, html_tag, tag_style):
|
||||||
|
self.html_tag = html_tag
|
||||||
|
self.items = []
|
||||||
|
|
||||||
|
def add_block(self, block):
|
||||||
|
self.items.append(block)
|
||||||
|
|
||||||
|
def add_table(self, table):
|
||||||
|
self.items.append(table)
|
||||||
|
return table
|
||||||
|
|
||||||
|
def serialize(self, parent):
|
||||||
|
tc = makeelement(parent, 'w:tc')
|
||||||
|
for item in self.items:
|
||||||
|
item.serialize(tc)
|
||||||
|
|
||||||
|
class Row(object):
|
||||||
|
|
||||||
|
def __init__(self, html_tag, tag_style=None):
|
||||||
|
self.html_tag = html_tag
|
||||||
|
self.cells = []
|
||||||
|
self.current_cell = None
|
||||||
|
|
||||||
|
def start_new_cell(self, html_tag, tag_style):
|
||||||
|
self.current_cell = Cell(html_tag, tag_style)
|
||||||
|
|
||||||
|
def finish_tag(self, html_tag):
|
||||||
|
if self.current_cell is not None:
|
||||||
|
if html_tag is self.current_cell.html_tag:
|
||||||
|
self.cells.append(self.current_cell)
|
||||||
|
self.current_cell = None
|
||||||
|
|
||||||
|
def add_block(self, block):
|
||||||
|
self.current_cell.add_block(block)
|
||||||
|
|
||||||
|
def add_table(self, table):
|
||||||
|
return self.current_cell.add_table(table)
|
||||||
|
|
||||||
|
def serialize(self, parent):
|
||||||
|
tr = makeelement(parent, 'w:tr')
|
||||||
|
for cell in self.cells:
|
||||||
|
cell.serialize(tr)
|
||||||
|
|
||||||
|
class Table(object):
|
||||||
|
|
||||||
|
def __init__(self, html_tag, tag_style=None):
|
||||||
|
self.html_tag = html_tag
|
||||||
|
self.rows = []
|
||||||
|
self.current_row = None
|
||||||
|
|
||||||
|
def finish_tag(self, html_tag):
|
||||||
|
if self.current_row is not None:
|
||||||
|
self.current_row.finish_tag(html_tag)
|
||||||
|
if self.current_row.html_tag is html_tag:
|
||||||
|
self.rows.append(self.current_row)
|
||||||
|
self.current_row = None
|
||||||
|
table_ended = self.html_tag is html_tag
|
||||||
|
return table_ended
|
||||||
|
|
||||||
|
def start_new_row(self, html_tag, html_style):
|
||||||
|
if self.current_row is not None:
|
||||||
|
self.rows.append(self.current_row)
|
||||||
|
self.current_row = Row(html_tag, html_style)
|
||||||
|
|
||||||
|
def start_new_cell(self, html_tag, html_style):
|
||||||
|
if self.current_row is None:
|
||||||
|
self.start_new_row(html_tag, None)
|
||||||
|
self.current_row.start_new_cell(html_tag, html_style)
|
||||||
|
|
||||||
|
def add_block(self, block):
|
||||||
|
self.current_row.add_block(block)
|
||||||
|
|
||||||
|
def add_table(self, table):
|
||||||
|
return self.current_row.add_table(table)
|
||||||
|
|
||||||
|
def serialize(self, parent):
|
||||||
|
rows = [r for r in self.rows if r.cells]
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
tbl = makeelement(parent, 'w:tbl')
|
||||||
|
tblPr = makeelement(tbl, 'w:tblPr')
|
||||||
|
tblPr
|
||||||
|
for row in rows:
|
||||||
|
row.serialize(tbl)
|
Loading…
x
Reference in New Issue
Block a user