From a33aa68f60d78039ebe81c7f64261be9d68dea66 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Jan 2014 16:49:16 +0530 Subject: [PATCH] DOCX Input: Handle text boxes better DOCX Input: Improve handling of text boxes. Text boxes are still not supported, but they no longer cause errors or duplicate content. Fixes #1273130 [Converting from .docx fails](https://bugs.launchpad.net/calibre/+bug/1273130) --- src/calibre/ebooks/docx/to_html.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 01bb915dcb..1ba8e07182 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -373,8 +373,16 @@ class Convert(object): current_hyperlink = None hl_xpath = XPath('ancestor::w:hyperlink[1]') + def p_parent(x): + # Ensure that nested tags are handled. These can occur if a + # textbox is present inside a paragraph. + while x is not None: + x = x.getparent() + if x.tag.endswith('}p'): + return x + for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): - if x.tag.endswith('}r'): + if x.tag.endswith('}r') and p_parent(x) is p: span = self.convert_run(x) if current_anchor is not None: (dest if len(dest) == 0 else span).set('id', current_anchor)