From 336d4054d1b50fbe37f45f8366d1e5655e728db7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 29 Apr 2019 10:15:21 +0530 Subject: [PATCH] DOCX Input: Fix cross-references using the "fldSimple" markup not being recognized by calibre. Apparently some software in some circumstances generate field using fldSimple rather than instrText. fldSimple is in the spec, so add support for it to calibre's DOCX engine. --- src/calibre/ebooks/docx/fields.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py index 286588371e..f974470fc4 100644 --- a/src/calibre/ebooks/docx/fields.py +++ b/src/calibre/ebooks/docx/fields.py @@ -23,7 +23,9 @@ class Field(object): self.name = None def add_instr(self, elem): - raw = elem.text + self.add_raw(elem.text) + + def add_raw(self, raw): if not raw: return if self.name is None: @@ -113,7 +115,10 @@ class Fields(object): self.index_bookmark_prefix = self.index_bookmark_prefix.replace('-', '%d-' % c) stack = [] for elem in self.namespace.XPath( - '//*[name()="w:p" or name()="w:r" or name()="w:instrText" or (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end"))]')(doc): + '//*[name()="w:p" or name()="w:r" or' + ' name()="w:instrText" or' + ' (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end") or' + ' name()="w:fldSimple")]')(doc): if elem.tag.endswith('}fldChar'): typ = self.namespace.get(elem, 'w:fldCharType') if typ == 'begin': @@ -127,6 +132,14 @@ class Fields(object): elif elem.tag.endswith('}instrText'): if stack: stack[-1].add_instr(elem) + elif elem.tag.endswith('}fldSimple'): + field = Field(elem) + instr = self.namespace.get(elem, 'w:instr') + if instr: + field.add_raw(instr) + self.fields.append(field) + for r in self.namespace.XPath('descendant::w:r')(elem): + field.contents.append(r) else: if stack: stack[-1].contents.append(elem)