DOCX Input: Fix cross-references using the "fldSimple" markup not being recognized by calibre.

Apparently some software in some circumstances generate field
using fldSimple rather than instrText. fldSimple is in the spec, so add
support for it to calibre's DOCX engine.
This commit is contained in:
Kovid Goyal 2019-04-29 10:15:21 +05:30
parent 97b20f38f5
commit 336d4054d1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -23,7 +23,9 @@ class Field(object):
self.name = None self.name = None
def add_instr(self, elem): def add_instr(self, elem):
raw = elem.text self.add_raw(elem.text)
def add_raw(self, raw):
if not raw: if not raw:
return return
if self.name is None: if self.name is None:
@ -113,7 +115,10 @@ class Fields(object):
self.index_bookmark_prefix = self.index_bookmark_prefix.replace('-', '%d-' % c) self.index_bookmark_prefix = self.index_bookmark_prefix.replace('-', '%d-' % c)
stack = [] stack = []
for elem in self.namespace.XPath( for elem in self.namespace.XPath(
'//*[name()="w:p" or name()="w:r" or name()="w:instrText" or (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end"))]')(doc): '//*[name()="w:p" or name()="w:r" or'
' name()="w:instrText" or'
' (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end") or'
' name()="w:fldSimple")]')(doc):
if elem.tag.endswith('}fldChar'): if elem.tag.endswith('}fldChar'):
typ = self.namespace.get(elem, 'w:fldCharType') typ = self.namespace.get(elem, 'w:fldCharType')
if typ == 'begin': if typ == 'begin':
@ -127,6 +132,14 @@ class Fields(object):
elif elem.tag.endswith('}instrText'): elif elem.tag.endswith('}instrText'):
if stack: if stack:
stack[-1].add_instr(elem) stack[-1].add_instr(elem)
elif elem.tag.endswith('}fldSimple'):
field = Field(elem)
instr = self.namespace.get(elem, 'w:instr')
if instr:
field.add_raw(instr)
self.fields.append(field)
for r in self.namespace.XPath('descendant::w:r')(elem):
field.contents.append(r)
else: else:
if stack: if stack:
stack[-1].contents.append(elem) stack[-1].contents.append(elem)