DOCX Input: Handle text boxes better

DOCX Input: Improve handling of text boxes. Text boxes are still not
supported, but they no longer cause errors or duplicate content.
Fixes #1273130 [Converting from .docx fails](https://bugs.launchpad.net/calibre/+bug/1273130)
This commit is contained in:
Kovid Goyal 2014-01-27 16:49:16 +05:30
parent d38bc2998d
commit a33aa68f60

View File

@ -373,8 +373,16 @@ class Convert(object):
current_hyperlink = None current_hyperlink = None
hl_xpath = XPath('ancestor::w:hyperlink[1]') hl_xpath = XPath('ancestor::w:hyperlink[1]')
def p_parent(x):
# Ensure that nested <w:p> tags are handled. These can occur if a
# textbox is present inside a paragraph.
while x is not None:
x = x.getparent()
if x.tag.endswith('}p'):
return x
for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
if x.tag.endswith('}r'): if x.tag.endswith('}r') and p_parent(x) is p:
span = self.convert_run(x) span = self.convert_run(x)
if current_anchor is not None: if current_anchor is not None:
(dest if len(dest) == 0 else span).set('id', current_anchor) (dest if len(dest) == 0 else span).set('id', current_anchor)