DOCX: handle bookmarks defined at the paragraph level

See https://bugs.launchpad.net/calibre/+bug/1196728 for an example.
This commit is contained in:
Kovid Goyal 2013-07-03 11:24:12 +05:30
parent 80f3e7f867
commit 584beceee3

View File

@ -99,6 +99,7 @@ class Convert(object):
p = self.convert_p(wp) p = self.convert_p(wp)
self.body.append(p) self.body.append(p)
paras.append(wp) paras.append(wp)
self.read_block_anchors(doc)
self.styles.apply_contextual_spacing(paras) self.styles.apply_contextual_spacing(paras)
# Apply page breaks at the start of every section, except the first # Apply page breaks at the start of every section, except the first
# section (since that will be the start of the file) # section (since that will be the start of the file)
@ -296,6 +297,22 @@ class Convert(object):
opf.render(of, ncx, 'toc.ncx') opf.render(of, ncx, 'toc.ncx')
return os.path.join(self.dest_dir, 'metadata.opf') return os.path.join(self.dest_dir, 'metadata.opf')
def read_block_anchors(self, doc):
doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
if doc_anchors:
current_bm = None
rmap = {v:k for k, v in self.object_map.iteritems()}
for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
if p.tag.endswith('}p'):
if current_bm and p in rmap:
para = rmap[p]
if 'id' not in para.attrib:
para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues())))
self.anchor_map[current_bm] = para.get('id')
current_bm = None
elif p in doc_anchors:
current_bm = get(p, 'w:name')
def convert_p(self, p): def convert_p(self, p):
dest = P() dest = P()
self.object_map[dest] = p self.object_map[dest] = p