diff --git a/src/calibre/ebooks/docx/writer/TODO b/src/calibre/ebooks/docx/writer/TODO index 0bff135431..36b7677104 100644 --- a/src/calibre/ebooks/docx/writer/TODO +++ b/src/calibre/ebooks/docx/writer/TODO @@ -2,9 +2,7 @@ Table of Contents Links
tag (probably as an empty block with a border) Various TODOs sprinkled through the source -Tables Lists Cover image RTL text Lang support in run styles -Dropcaps (in general floating display=inline elements) diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index 6929e445bf..a71371391b 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal ' import re from calibre.ebooks.docx.writer.container import create_skeleton -from calibre.ebooks.docx.writer.styles import StylesManager +from calibre.ebooks.docx.writer.styles import StylesManager, FloatSpec from calibre.ebooks.docx.writer.images import ImagesManager from calibre.ebooks.docx.writer.fonts import FontsManager from calibre.ebooks.docx.writer.tables import Table @@ -40,7 +40,6 @@ class Stylizer(Sz): except KeyError: return Style(element, self) - class TextRun(object): ws_pat = None @@ -85,6 +84,9 @@ class TextRun(object): if preserve_whitespace: t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') + def __repr__(self): + return repr(self.texts) + def is_empty(self): if not self.texts: return True @@ -94,15 +96,26 @@ class TextRun(object): class Block(object): - def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False): + def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False, float_spec=None): self.namespace = namespace + self.parent_items = None self.html_block = html_block + self.float_spec = float_spec + if float_spec is not None: + float_spec.blocks.append(self) self.html_style = style self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell) self.styles_manager = styles_manager self.keep_next = False self.page_break_before = False self.runs = [] + self.skipped = False + + def resolve_skipped(self, next_block): + if not self.is_empty(): + return + if len(self.html_block) > 0 and self.html_block[0] is next_block.html_block: + self.skipped = True def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False): ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style) @@ -146,10 +159,15 @@ class Block(object): makeelement(ppr, 'w:keepNext') if self.page_break_before: makeelement(ppr, 'w:pageBreakBefore') + if self.float_spec is not None: + self.float_spec.serialize(self, ppr) makeelement(ppr, 'w:pStyle', w_val=self.style.id) for run in self.runs: run.serialize(p) + def __repr__(self): + return 'Block(%r)' % self.runs + def is_empty(self): for run in self.runs: if not run.is_empty(): @@ -180,11 +198,12 @@ class Blocks(object): else: self.block_map[self.current_block] = len(self.items) self.items.append(self.current_block) + self.current_block.parent_items = self.items self.current_block = None - def start_new_block(self, html_block, style, is_table_cell=False): + def start_new_block(self, html_block, style, is_table_cell=False, float_spec=None): self.end_current_block() - self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell) + self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell, float_spec=float_spec) self.open_html_blocks.add(html_block) return self.current_block @@ -224,6 +243,19 @@ class Blocks(object): for item in self.items: item.serialize(body) + def delete_block_at(self, pos=None): + pos = self.pos if pos is None else pos + block = self.all_blocks[pos] + del self.all_blocks[pos] + if self.block_map: + del self.items[self.block_map.pop(block)] + else: + items = self.items if block.parent_items is None else block.parent_items + items.remove(block) + block.parent_items = None + if block.float_spec is not None: + block.float_spec.blocks.remove(block) + def __enter__(self): self.pos = len(self.all_blocks) self.block_map = {} @@ -235,12 +267,11 @@ class Blocks(object): if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty(): # Delete the empty block corresponding to the tag when the # body tag has no inline content before its first sub-block - block = self.all_blocks[self.pos] - del self.all_blocks[self.pos] - del self.items[self.block_map.pop(block)] + self.delete_block_at(self.pos) if self.pos > 0 and self.pos < len(self.all_blocks): # Insert a page break corresponding to the start of the html file self.all_blocks[self.pos].page_break_before = True + self.block_map = {} class Convert(object): @@ -261,7 +292,20 @@ class Convert(object): for item in self.oeb.spine: self.process_item(item) - self.styles_manager.finalize(self.blocks.all_blocks) + all_blocks = self.blocks.all_blocks + remove_blocks = [] + for i, block in enumerate(all_blocks): + try: + nb = all_blocks[i+1] + except IndexError: + break + block.resolve_skipped(nb) + if block.skipped: + remove_blocks.append((i, block)) + for pos, block in reversed(remove_blocks): + self.blocks.delete_block_at(pos) + + self.styles_manager.finalize(all_blocks) self.write() def process_item(self, item): @@ -274,16 +318,25 @@ class Convert(object): with self.blocks: self.process_tag(body, stylizer, is_first_tag=i == 0) - def process_tag(self, html_tag, stylizer, is_first_tag=False): + def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None): tagname = barename(html_tag.tag) if tagname in {'script', 'style', 'title', 'meta'}: return tag_style = stylizer.style(html_tag) if tag_style.is_hidden: return + display = tag_style._get('display') + is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag + if float_spec is None and is_float: + float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style) + if display in {'inline', 'inline-block'} or tagname == 'br': #
has display:block but we dont want to start a new paragraph - self.add_inline_tag(tagname, html_tag, tag_style, stylizer) + if is_float and float_spec.is_dropcaps: + self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec) + float_spec = None + else: + self.add_inline_tag(tagname, html_tag, tag_style, stylizer) elif display == 'list-item': # TODO: Implement this self.add_block_tag(tagname, html_tag, tag_style, stylizer) @@ -297,14 +350,14 @@ class Convert(object): self.blocks.end_current_block() self.blocks.start_new_table(html_tag, tag_style) else: - if tagname == 'img' and tag_style['float'] in {'left', 'right'}: + if tagname == 'img' and is_float: # Image is floating so dont start a new paragraph for it self.add_inline_tag(tagname, html_tag, tag_style, stylizer) else: - self.add_block_tag(tagname, html_tag, tag_style, stylizer) + self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec) for child in html_tag.iterchildren('*'): - self.process_tag(child, stylizer) + self.process_tag(child, stylizer, float_spec=float_spec) is_block = html_tag in self.blocks.open_html_blocks self.blocks.finish_tag(html_tag) @@ -321,8 +374,8 @@ class Convert(object): block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent())) block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True) - def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False): - block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell) + def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None): + block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec) if tagname == 'img': self.images_manager.add_image(html_tag, block, stylizer) else: diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index e52fe1ba05..6689c730b2 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -40,6 +40,67 @@ def css_font_family_to_docx(raw): def bmap(x): return 'on' if x else 'off' +def is_dropcaps(html_tag, tag_style): + return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left' + +class FloatSpec(object): + + def __init__(self, namespace, html_tag, tag_style): + self.makeelement = namespace.makeelement + self.is_dropcaps = is_dropcaps(html_tag, tag_style) + self.blocks = [] + if self.is_dropcaps: + self.dropcaps_lines = 3 + else: + self.x_align = tag_style['float'] + self.w = self.h = None + if tag_style._get('width') != 'auto': + self.w = int(20 * max(tag_style['min-width'], tag_style['width'])) + if tag_style._get('height') == 'auto': + self.h_rule = 'auto' + else: + if tag_style['min-height'] > 0: + self.h_rule, self.h = 'atLeast', tag_style['min-height'] + else: + self.h_rule, self.h = 'exact', tag_style['height'] + self.h = int(20 * self.h) + self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left'])) + self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom'])) + + read_css_block_borders(self, tag_style) + + def serialize(self, block, parent): + if self.is_dropcaps: + attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text') + else: + attrs = dict( + w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1', + w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule + ) + if self.w is not None: + attrs['w_w'] = str(self.w) + if self.h is not None: + attrs['w_h'] = str(self.h) + self.makeelement(parent, 'w:framePr', **attrs) + # Margins are already applied by the frame style, so override them to + # be zero on individual blocks + self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0') + attrs = {} + if block is self.blocks[0]: + attrs.update(dict(w_before='0', w_beforeLines='0')) + if block is self.blocks[-1]: + attrs.update(dict(w_after='0', w_afterLines='0')) + if attrs: + self.makeelement(parent, 'w:spacing', **attrs) + # Similarly apply the same border and padding properties to all blocks + # in this floatspec + bdr = self.makeelement(parent, 'w:pBdr') + for edge in border_edges: + padding = getattr(self, 'padding_' + edge) + width = getattr(self, 'border_%s_width' % edge) + bstyle = getattr(self, 'border_%s_style' % edge) + self.makeelement(bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge)) + class DOCXStyle(object): ALL_PROPS = () diff --git a/src/calibre/ebooks/docx/writer/tables.py b/src/calibre/ebooks/docx/writer/tables.py index 544a79e2ab..9fb9389763 100644 --- a/src/calibre/ebooks/docx/writer/tables.py +++ b/src/calibre/ebooks/docx/writer/tables.py @@ -93,6 +93,7 @@ class Cell(object): def add_block(self, block): self.items.append(block) + block.parent_items = self.items def add_table(self, table): self.items.append(table)