From 584beceee347f2a18c70728bdd4830381fabe85c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Jul 2013 11:24:12 +0530
Subject: [PATCH] DOCX: handle bookmarks defined at the paragraph level

See https://bugs.launchpad.net/calibre/+bug/1196728 for an example.
---
 src/calibre/ebooks/docx/to_html.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index 1fdd24267a..fae521d807 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -99,6 +99,7 @@ class Convert(object):
                 p = self.convert_p(wp)
                 self.body.append(p)
                 paras.append(wp)
+        self.read_block_anchors(doc)
         self.styles.apply_contextual_spacing(paras)
         # Apply page breaks at the start of every section, except the first
         # section (since that will be the start of the file)
@@ -296,6 +297,22 @@ class Convert(object):
             opf.render(of, ncx, 'toc.ncx')
         return os.path.join(self.dest_dir, 'metadata.opf')
 
+    def read_block_anchors(self, doc):
+        doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
+        if doc_anchors:
+            current_bm = None
+            rmap = {v:k for k, v in self.object_map.iteritems()}
+            for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
+                if p.tag.endswith('}p'):
+                    if current_bm and p in rmap:
+                        para = rmap[p]
+                        if 'id' not in para.attrib:
+                            para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues())))
+                        self.anchor_map[current_bm] = para.get('id')
+                        current_bm = None
+                elif p in doc_anchors:
+                    current_bm = get(p, 'w:name')
+
     def convert_p(self, p):
         dest = P()
         self.object_map[dest] = p