DOCX Output: Implement floating text boxes and dropcaps

This commit is contained in:
Kovid Goyal 2015-04-20 12:54:28 +05:30
parent d2d8108e7c
commit 60af0674f4
4 changed files with 131 additions and 18 deletions

View File

@ -2,9 +2,7 @@ Table of Contents
Links Links
<hr> tag (probably as an empty block with a border) <hr> tag (probably as an empty block with a border)
Various TODOs sprinkled through the source Various TODOs sprinkled through the source
Tables
Lists Lists
Cover image Cover image
RTL text RTL text
Lang support in run styles <w:lang> Lang support in run styles <w:lang>
Dropcaps (in general floating display=inline elements)

View File

@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re import re
from calibre.ebooks.docx.writer.container import create_skeleton from calibre.ebooks.docx.writer.container import create_skeleton
from calibre.ebooks.docx.writer.styles import StylesManager from calibre.ebooks.docx.writer.styles import StylesManager, FloatSpec
from calibre.ebooks.docx.writer.images import ImagesManager from calibre.ebooks.docx.writer.images import ImagesManager
from calibre.ebooks.docx.writer.fonts import FontsManager from calibre.ebooks.docx.writer.fonts import FontsManager
from calibre.ebooks.docx.writer.tables import Table from calibre.ebooks.docx.writer.tables import Table
@ -40,7 +40,6 @@ class Stylizer(Sz):
except KeyError: except KeyError:
return Style(element, self) return Style(element, self)
class TextRun(object): class TextRun(object):
ws_pat = None ws_pat = None
@ -85,6 +84,9 @@ class TextRun(object):
if preserve_whitespace: if preserve_whitespace:
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
def __repr__(self):
return repr(self.texts)
def is_empty(self): def is_empty(self):
if not self.texts: if not self.texts:
return True return True
@ -94,15 +96,26 @@ class TextRun(object):
class Block(object): class Block(object):
def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False): def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False, float_spec=None):
self.namespace = namespace self.namespace = namespace
self.parent_items = None
self.html_block = html_block self.html_block = html_block
self.float_spec = float_spec
if float_spec is not None:
float_spec.blocks.append(self)
self.html_style = style self.html_style = style
self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell) self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell)
self.styles_manager = styles_manager self.styles_manager = styles_manager
self.keep_next = False self.keep_next = False
self.page_break_before = False self.page_break_before = False
self.runs = [] self.runs = []
self.skipped = False
def resolve_skipped(self, next_block):
if not self.is_empty():
return
if len(self.html_block) > 0 and self.html_block[0] is next_block.html_block:
self.skipped = True
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False): def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False):
ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style) ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style)
@ -146,10 +159,15 @@ class Block(object):
makeelement(ppr, 'w:keepNext') makeelement(ppr, 'w:keepNext')
if self.page_break_before: if self.page_break_before:
makeelement(ppr, 'w:pageBreakBefore') makeelement(ppr, 'w:pageBreakBefore')
if self.float_spec is not None:
self.float_spec.serialize(self, ppr)
makeelement(ppr, 'w:pStyle', w_val=self.style.id) makeelement(ppr, 'w:pStyle', w_val=self.style.id)
for run in self.runs: for run in self.runs:
run.serialize(p) run.serialize(p)
def __repr__(self):
return 'Block(%r)' % self.runs
def is_empty(self): def is_empty(self):
for run in self.runs: for run in self.runs:
if not run.is_empty(): if not run.is_empty():
@ -180,11 +198,12 @@ class Blocks(object):
else: else:
self.block_map[self.current_block] = len(self.items) self.block_map[self.current_block] = len(self.items)
self.items.append(self.current_block) self.items.append(self.current_block)
self.current_block.parent_items = self.items
self.current_block = None self.current_block = None
def start_new_block(self, html_block, style, is_table_cell=False): def start_new_block(self, html_block, style, is_table_cell=False, float_spec=None):
self.end_current_block() self.end_current_block()
self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell) self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell, float_spec=float_spec)
self.open_html_blocks.add(html_block) self.open_html_blocks.add(html_block)
return self.current_block return self.current_block
@ -224,6 +243,19 @@ class Blocks(object):
for item in self.items: for item in self.items:
item.serialize(body) item.serialize(body)
def delete_block_at(self, pos=None):
pos = self.pos if pos is None else pos
block = self.all_blocks[pos]
del self.all_blocks[pos]
if self.block_map:
del self.items[self.block_map.pop(block)]
else:
items = self.items if block.parent_items is None else block.parent_items
items.remove(block)
block.parent_items = None
if block.float_spec is not None:
block.float_spec.blocks.remove(block)
def __enter__(self): def __enter__(self):
self.pos = len(self.all_blocks) self.pos = len(self.all_blocks)
self.block_map = {} self.block_map = {}
@ -235,12 +267,11 @@ class Blocks(object):
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty(): if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
# Delete the empty block corresponding to the <body> tag when the # Delete the empty block corresponding to the <body> tag when the
# body tag has no inline content before its first sub-block # body tag has no inline content before its first sub-block
block = self.all_blocks[self.pos] self.delete_block_at(self.pos)
del self.all_blocks[self.pos]
del self.items[self.block_map.pop(block)]
if self.pos > 0 and self.pos < len(self.all_blocks): if self.pos > 0 and self.pos < len(self.all_blocks):
# Insert a page break corresponding to the start of the html file # Insert a page break corresponding to the start of the html file
self.all_blocks[self.pos].page_break_before = True self.all_blocks[self.pos].page_break_before = True
self.block_map = {}
class Convert(object): class Convert(object):
@ -261,7 +292,20 @@ class Convert(object):
for item in self.oeb.spine: for item in self.oeb.spine:
self.process_item(item) self.process_item(item)
self.styles_manager.finalize(self.blocks.all_blocks) all_blocks = self.blocks.all_blocks
remove_blocks = []
for i, block in enumerate(all_blocks):
try:
nb = all_blocks[i+1]
except IndexError:
break
block.resolve_skipped(nb)
if block.skipped:
remove_blocks.append((i, block))
for pos, block in reversed(remove_blocks):
self.blocks.delete_block_at(pos)
self.styles_manager.finalize(all_blocks)
self.write() self.write()
def process_item(self, item): def process_item(self, item):
@ -274,16 +318,25 @@ class Convert(object):
with self.blocks: with self.blocks:
self.process_tag(body, stylizer, is_first_tag=i == 0) self.process_tag(body, stylizer, is_first_tag=i == 0)
def process_tag(self, html_tag, stylizer, is_first_tag=False): def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
tagname = barename(html_tag.tag) tagname = barename(html_tag.tag)
if tagname in {'script', 'style', 'title', 'meta'}: if tagname in {'script', 'style', 'title', 'meta'}:
return return
tag_style = stylizer.style(html_tag) tag_style = stylizer.style(html_tag)
if tag_style.is_hidden: if tag_style.is_hidden:
return return
display = tag_style._get('display') display = tag_style._get('display')
is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
if float_spec is None and is_float:
float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
self.add_inline_tag(tagname, html_tag, tag_style, stylizer) if is_float and float_spec.is_dropcaps:
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
float_spec = None
else:
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
elif display == 'list-item': elif display == 'list-item':
# TODO: Implement this # TODO: Implement this
self.add_block_tag(tagname, html_tag, tag_style, stylizer) self.add_block_tag(tagname, html_tag, tag_style, stylizer)
@ -297,14 +350,14 @@ class Convert(object):
self.blocks.end_current_block() self.blocks.end_current_block()
self.blocks.start_new_table(html_tag, tag_style) self.blocks.start_new_table(html_tag, tag_style)
else: else:
if tagname == 'img' and tag_style['float'] in {'left', 'right'}: if tagname == 'img' and is_float:
# Image is floating so dont start a new paragraph for it # Image is floating so dont start a new paragraph for it
self.add_inline_tag(tagname, html_tag, tag_style, stylizer) self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
else: else:
self.add_block_tag(tagname, html_tag, tag_style, stylizer) self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
for child in html_tag.iterchildren('*'): for child in html_tag.iterchildren('*'):
self.process_tag(child, stylizer) self.process_tag(child, stylizer, float_spec=float_spec)
is_block = html_tag in self.blocks.open_html_blocks is_block = html_tag in self.blocks.open_html_blocks
self.blocks.finish_tag(html_tag) self.blocks.finish_tag(html_tag)
@ -321,8 +374,8 @@ class Convert(object):
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent())) block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True) block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True)
def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False): def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None):
block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell) block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec)
if tagname == 'img': if tagname == 'img':
self.images_manager.add_image(html_tag, block, stylizer) self.images_manager.add_image(html_tag, block, stylizer)
else: else:

View File

@ -40,6 +40,67 @@ def css_font_family_to_docx(raw):
def bmap(x): def bmap(x):
return 'on' if x else 'off' return 'on' if x else 'off'
def is_dropcaps(html_tag, tag_style):
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
class FloatSpec(object):
def __init__(self, namespace, html_tag, tag_style):
self.makeelement = namespace.makeelement
self.is_dropcaps = is_dropcaps(html_tag, tag_style)
self.blocks = []
if self.is_dropcaps:
self.dropcaps_lines = 3
else:
self.x_align = tag_style['float']
self.w = self.h = None
if tag_style._get('width') != 'auto':
self.w = int(20 * max(tag_style['min-width'], tag_style['width']))
if tag_style._get('height') == 'auto':
self.h_rule = 'auto'
else:
if tag_style['min-height'] > 0:
self.h_rule, self.h = 'atLeast', tag_style['min-height']
else:
self.h_rule, self.h = 'exact', tag_style['height']
self.h = int(20 * self.h)
self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left']))
self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom']))
read_css_block_borders(self, tag_style)
def serialize(self, block, parent):
if self.is_dropcaps:
attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
else:
attrs = dict(
w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule
)
if self.w is not None:
attrs['w_w'] = str(self.w)
if self.h is not None:
attrs['w_h'] = str(self.h)
self.makeelement(parent, 'w:framePr', **attrs)
# Margins are already applied by the frame style, so override them to
# be zero on individual blocks
self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0')
attrs = {}
if block is self.blocks[0]:
attrs.update(dict(w_before='0', w_beforeLines='0'))
if block is self.blocks[-1]:
attrs.update(dict(w_after='0', w_afterLines='0'))
if attrs:
self.makeelement(parent, 'w:spacing', **attrs)
# Similarly apply the same border and padding properties to all blocks
# in this floatspec
bdr = self.makeelement(parent, 'w:pBdr')
for edge in border_edges:
padding = getattr(self, 'padding_' + edge)
width = getattr(self, 'border_%s_width' % edge)
bstyle = getattr(self, 'border_%s_style' % edge)
self.makeelement(bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge))
class DOCXStyle(object): class DOCXStyle(object):
ALL_PROPS = () ALL_PROPS = ()

View File

@ -93,6 +93,7 @@ class Cell(object):
def add_block(self, block): def add_block(self, block):
self.items.append(block) self.items.append(block)
block.parent_items = self.items
def add_table(self, table): def add_table(self, table):
self.items.append(table) self.items.append(table)