diff --git a/src/calibre/ebooks/docx/writer/TODO b/src/calibre/ebooks/docx/writer/TODO
index 0bff135431..36b7677104 100644
--- a/src/calibre/ebooks/docx/writer/TODO
+++ b/src/calibre/ebooks/docx/writer/TODO
@@ -2,9 +2,7 @@ Table of Contents
Links
tag (probably as an empty block with a border)
Various TODOs sprinkled through the source
-Tables
Lists
Cover image
RTL text
Lang support in run styles
-Dropcaps (in general floating display=inline elements)
diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py
index 6929e445bf..a71371391b 100644
--- a/src/calibre/ebooks/docx/writer/from_html.py
+++ b/src/calibre/ebooks/docx/writer/from_html.py
@@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal '
import re
from calibre.ebooks.docx.writer.container import create_skeleton
-from calibre.ebooks.docx.writer.styles import StylesManager
+from calibre.ebooks.docx.writer.styles import StylesManager, FloatSpec
from calibre.ebooks.docx.writer.images import ImagesManager
from calibre.ebooks.docx.writer.fonts import FontsManager
from calibre.ebooks.docx.writer.tables import Table
@@ -40,7 +40,6 @@ class Stylizer(Sz):
except KeyError:
return Style(element, self)
-
class TextRun(object):
ws_pat = None
@@ -85,6 +84,9 @@ class TextRun(object):
if preserve_whitespace:
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+ def __repr__(self):
+ return repr(self.texts)
+
def is_empty(self):
if not self.texts:
return True
@@ -94,15 +96,26 @@ class TextRun(object):
class Block(object):
- def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False):
+ def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False, float_spec=None):
self.namespace = namespace
+ self.parent_items = None
self.html_block = html_block
+ self.float_spec = float_spec
+ if float_spec is not None:
+ float_spec.blocks.append(self)
self.html_style = style
self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell)
self.styles_manager = styles_manager
self.keep_next = False
self.page_break_before = False
self.runs = []
+ self.skipped = False
+
+ def resolve_skipped(self, next_block):
+ if not self.is_empty():
+ return
+ if len(self.html_block) > 0 and self.html_block[0] is next_block.html_block:
+ self.skipped = True
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False):
ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style)
@@ -146,10 +159,15 @@ class Block(object):
makeelement(ppr, 'w:keepNext')
if self.page_break_before:
makeelement(ppr, 'w:pageBreakBefore')
+ if self.float_spec is not None:
+ self.float_spec.serialize(self, ppr)
makeelement(ppr, 'w:pStyle', w_val=self.style.id)
for run in self.runs:
run.serialize(p)
+ def __repr__(self):
+ return 'Block(%r)' % self.runs
+
def is_empty(self):
for run in self.runs:
if not run.is_empty():
@@ -180,11 +198,12 @@ class Blocks(object):
else:
self.block_map[self.current_block] = len(self.items)
self.items.append(self.current_block)
+ self.current_block.parent_items = self.items
self.current_block = None
- def start_new_block(self, html_block, style, is_table_cell=False):
+ def start_new_block(self, html_block, style, is_table_cell=False, float_spec=None):
self.end_current_block()
- self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell)
+ self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell, float_spec=float_spec)
self.open_html_blocks.add(html_block)
return self.current_block
@@ -224,6 +243,19 @@ class Blocks(object):
for item in self.items:
item.serialize(body)
+ def delete_block_at(self, pos=None):
+ pos = self.pos if pos is None else pos
+ block = self.all_blocks[pos]
+ del self.all_blocks[pos]
+ if self.block_map:
+ del self.items[self.block_map.pop(block)]
+ else:
+ items = self.items if block.parent_items is None else block.parent_items
+ items.remove(block)
+ block.parent_items = None
+ if block.float_spec is not None:
+ block.float_spec.blocks.remove(block)
+
def __enter__(self):
self.pos = len(self.all_blocks)
self.block_map = {}
@@ -235,12 +267,11 @@ class Blocks(object):
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
# Delete the empty block corresponding to the tag when the
# body tag has no inline content before its first sub-block
- block = self.all_blocks[self.pos]
- del self.all_blocks[self.pos]
- del self.items[self.block_map.pop(block)]
+ self.delete_block_at(self.pos)
if self.pos > 0 and self.pos < len(self.all_blocks):
# Insert a page break corresponding to the start of the html file
self.all_blocks[self.pos].page_break_before = True
+ self.block_map = {}
class Convert(object):
@@ -261,7 +292,20 @@ class Convert(object):
for item in self.oeb.spine:
self.process_item(item)
- self.styles_manager.finalize(self.blocks.all_blocks)
+ all_blocks = self.blocks.all_blocks
+ remove_blocks = []
+ for i, block in enumerate(all_blocks):
+ try:
+ nb = all_blocks[i+1]
+ except IndexError:
+ break
+ block.resolve_skipped(nb)
+ if block.skipped:
+ remove_blocks.append((i, block))
+ for pos, block in reversed(remove_blocks):
+ self.blocks.delete_block_at(pos)
+
+ self.styles_manager.finalize(all_blocks)
self.write()
def process_item(self, item):
@@ -274,16 +318,25 @@ class Convert(object):
with self.blocks:
self.process_tag(body, stylizer, is_first_tag=i == 0)
- def process_tag(self, html_tag, stylizer, is_first_tag=False):
+ def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
tagname = barename(html_tag.tag)
if tagname in {'script', 'style', 'title', 'meta'}:
return
tag_style = stylizer.style(html_tag)
if tag_style.is_hidden:
return
+
display = tag_style._get('display')
+ is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
+ if float_spec is None and is_float:
+ float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
+
if display in {'inline', 'inline-block'} or tagname == 'br': #
has display:block but we dont want to start a new paragraph
- self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
+ if is_float and float_spec.is_dropcaps:
+ self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
+ float_spec = None
+ else:
+ self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
elif display == 'list-item':
# TODO: Implement this
self.add_block_tag(tagname, html_tag, tag_style, stylizer)
@@ -297,14 +350,14 @@ class Convert(object):
self.blocks.end_current_block()
self.blocks.start_new_table(html_tag, tag_style)
else:
- if tagname == 'img' and tag_style['float'] in {'left', 'right'}:
+ if tagname == 'img' and is_float:
# Image is floating so dont start a new paragraph for it
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
else:
- self.add_block_tag(tagname, html_tag, tag_style, stylizer)
+ self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
for child in html_tag.iterchildren('*'):
- self.process_tag(child, stylizer)
+ self.process_tag(child, stylizer, float_spec=float_spec)
is_block = html_tag in self.blocks.open_html_blocks
self.blocks.finish_tag(html_tag)
@@ -321,8 +374,8 @@ class Convert(object):
block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True)
- def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False):
- block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell)
+ def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None):
+ block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec)
if tagname == 'img':
self.images_manager.add_image(html_tag, block, stylizer)
else:
diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py
index e52fe1ba05..6689c730b2 100644
--- a/src/calibre/ebooks/docx/writer/styles.py
+++ b/src/calibre/ebooks/docx/writer/styles.py
@@ -40,6 +40,67 @@ def css_font_family_to_docx(raw):
def bmap(x):
return 'on' if x else 'off'
+def is_dropcaps(html_tag, tag_style):
+ return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
+
+class FloatSpec(object):
+
+ def __init__(self, namespace, html_tag, tag_style):
+ self.makeelement = namespace.makeelement
+ self.is_dropcaps = is_dropcaps(html_tag, tag_style)
+ self.blocks = []
+ if self.is_dropcaps:
+ self.dropcaps_lines = 3
+ else:
+ self.x_align = tag_style['float']
+ self.w = self.h = None
+ if tag_style._get('width') != 'auto':
+ self.w = int(20 * max(tag_style['min-width'], tag_style['width']))
+ if tag_style._get('height') == 'auto':
+ self.h_rule = 'auto'
+ else:
+ if tag_style['min-height'] > 0:
+ self.h_rule, self.h = 'atLeast', tag_style['min-height']
+ else:
+ self.h_rule, self.h = 'exact', tag_style['height']
+ self.h = int(20 * self.h)
+ self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left']))
+ self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom']))
+
+ read_css_block_borders(self, tag_style)
+
+ def serialize(self, block, parent):
+ if self.is_dropcaps:
+ attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
+ else:
+ attrs = dict(
+ w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
+ w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule
+ )
+ if self.w is not None:
+ attrs['w_w'] = str(self.w)
+ if self.h is not None:
+ attrs['w_h'] = str(self.h)
+ self.makeelement(parent, 'w:framePr', **attrs)
+ # Margins are already applied by the frame style, so override them to
+ # be zero on individual blocks
+ self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0')
+ attrs = {}
+ if block is self.blocks[0]:
+ attrs.update(dict(w_before='0', w_beforeLines='0'))
+ if block is self.blocks[-1]:
+ attrs.update(dict(w_after='0', w_afterLines='0'))
+ if attrs:
+ self.makeelement(parent, 'w:spacing', **attrs)
+ # Similarly apply the same border and padding properties to all blocks
+ # in this floatspec
+ bdr = self.makeelement(parent, 'w:pBdr')
+ for edge in border_edges:
+ padding = getattr(self, 'padding_' + edge)
+ width = getattr(self, 'border_%s_width' % edge)
+ bstyle = getattr(self, 'border_%s_style' % edge)
+ self.makeelement(bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge))
+
class DOCXStyle(object):
ALL_PROPS = ()
diff --git a/src/calibre/ebooks/docx/writer/tables.py b/src/calibre/ebooks/docx/writer/tables.py
index 544a79e2ab..9fb9389763 100644
--- a/src/calibre/ebooks/docx/writer/tables.py
+++ b/src/calibre/ebooks/docx/writer/tables.py
@@ -93,6 +93,7 @@ class Cell(object):
def add_block(self, block):
self.items.append(block)
+ block.parent_items = self.items
def add_table(self, table):
self.items.append(table)