From 31bf791ad66fe935721adde60778cd42f2a70afb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Jan 2019 15:04:51 +0530 Subject: [PATCH] DOCX Input: When converting indices, put each sub-entry on its own line. See #1811611 (Word to EPUB adds a digit at the end of some chapters) --- src/calibre/ebooks/docx/index.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/docx/index.py b/src/calibre/ebooks/docx/index.py index d7631bc4a0..b4bd00b792 100644 --- a/src/calibre/ebooks/docx/index.py +++ b/src/calibre/ebooks/docx/index.py @@ -59,9 +59,11 @@ def make_block(expand, style, parent, pos): def add_xe(xe, t, expand): - text = xe.get('text', '') + run = t.getparent() + idx = run.index(t) + t.text = xe.get('text') or ' ' pt = xe.get('page-number-text', None) - t.text = text or ' ' + if pt: p = t.getparent().getparent() r = p.makeelement(expand('w:r')) @@ -70,7 +72,9 @@ def add_xe(xe, t, expand): t2.set(expand('xml:space'), 'preserve') t2.text = ' [%s]' % pt r.append(t2) - return xe['anchor'], t.getparent() + # put separate entries on separate lines + run.insert(idx + 1, run.makeelement(expand('w:br'))) + return xe['anchor'], run def process_index(field, index, xe_fields, log, XPath, expand): @@ -139,6 +143,7 @@ def split_up_block(block, a, text, parts, ldict): span.append(a) ldict[span] = len(prefix) + """ The merge algorithm is a little tricky. We start with a list of elementary blocks. Each is an HtmlElement, a p node @@ -255,6 +260,9 @@ def polish_index_markup(index, blocks): span.append(a[0]) ldict[span] = 0 + for br in block.xpath('descendant::br'): + br.tail = None + # We want a single block for each main entry prev_block = blocks[0] for block in blocks[1:]: @@ -263,4 +271,3 @@ def polish_index_markup(index, blocks): merge_blocks(prev_block, block, 0, 0, pn, ldict) else: prev_block = block -