From 584beceee347f2a18c70728bdd4830381fabe85c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 11:24:12 +0530 Subject: [PATCH] DOCX: handle bookmarks defined at the paragraph level See https://bugs.launchpad.net/calibre/+bug/1196728 for an example. --- src/calibre/ebooks/docx/to_html.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 1fdd24267a..fae521d807 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -99,6 +99,7 @@ class Convert(object): p = self.convert_p(wp) self.body.append(p) paras.append(wp) + self.read_block_anchors(doc) self.styles.apply_contextual_spacing(paras) # Apply page breaks at the start of every section, except the first # section (since that will be the start of the file) @@ -296,6 +297,22 @@ class Convert(object): opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf') + def read_block_anchors(self, doc): + doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc)) + if doc_anchors: + current_bm = None + rmap = {v:k for k, v in self.object_map.iteritems()} + for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'): + if p.tag.endswith('}p'): + if current_bm and p in rmap: + para = rmap[p] + if 'id' not in para.attrib: + para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues()))) + self.anchor_map[current_bm] = para.get('id') + current_bm = None + elif p in doc_anchors: + current_bm = get(p, 'w:name') + def convert_p(self, p): dest = P() self.object_map[dest] = p