From e26090621174551c9782bb5a05fb619129924baa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 22 Oct 2013 15:46:31 +0530 Subject: [PATCH] DOCX Input: URL less hyperlink fields and stacked block level bookmarks DOCX Input: Add support for hyperlink fields that have only anchors and not URLs DOCX Input: Fix handling of multiple block level bookmarks at the same location. Fixes #1241451 [links are not working](https://bugs.launchpad.net/calibre/+bug/1241451) --- src/calibre/ebooks/docx/fields.py | 26 ++++++++++++++++++++------ src/calibre/ebooks/docx/to_html.py | 30 ++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py index 91dcd87596..6617728f0c 100644 --- a/src/calibre/ebooks/docx/fields.py +++ b/src/calibre/ebooks/docx/fields.py @@ -40,18 +40,16 @@ def parse_hyperlink(raw, log): raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02') for token, token_type in scanner.scan(raw)[0]: token = token.replace('\x01', '\\').replace('\x02', '"') - if not ans: - if token_type is not WORD: - log('Invalid hyperlink, first token is not a URL (%s)' % raw) - return ans - ans['url'] = token if token_type is FLAG: last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None) if last_option is not None: ans[last_option] = None elif token_type is WORD: - if last_option is not None: + if last_option is None: + ans['url'] = token + else: ans[last_option] = token + last_option = None return ans @@ -105,4 +103,20 @@ class Fields(object): for runs in all_runs: self.hyperlink_fields.append((hl, runs)) +def test_parse_hyperlink(): + import unittest + class TestParseHyperLink(unittest.TestCase): + + def test_parsing(self): + self.assertEqual(parse_hyperlink( + r'\l anchor1', None), {'anchor':'anchor1'}) + self.assertEqual(parse_hyperlink( + r'www.calibre-ebook.com', None), {'url':'www.calibre-ebook.com'}) + self.assertEqual(parse_hyperlink( + r'www.calibre-ebook.com \t target \o tt', None), {'url':'www.calibre-ebook.com', 'target':'target', 'title': 'tt'}) + self.assertEqual(parse_hyperlink( + r'"c:\\Some Folder"', None), {'url': 'c:\\Some Folder'}) + + suite = unittest.TestLoader().loadTestsFromTestCase(TestParseHyperLink) + unittest.TextTestRunner(verbosity=4).run(suite) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index cb33b255ca..a7afb5445a 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -346,18 +346,21 @@ class Convert(object): def read_block_anchors(self, doc): doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc)) if doc_anchors: - current_bm = None + current_bm = set() rmap = {v:k for k, v in self.object_map.iteritems()} for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'): if p.tag.endswith('}p'): if current_bm and p in rmap: para = rmap[p] if 'id' not in para.attrib: - para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues()))) - self.anchor_map[current_bm] = para.get('id') - current_bm = None + para.set('id', generate_anchor(next(iter(current_bm)), frozenset(self.anchor_map.itervalues()))) + for name in current_bm: + self.anchor_map[name] = para.get('id') + current_bm = set() elif p in doc_anchors: - current_bm = get(p, 'w:name') + anchor = get(p, 'w:name') + if anchor: + current_bm.add(anchor) def convert_p(self, p): dest = P() @@ -500,11 +503,18 @@ class Convert(object): tt = hyperlink.get('title', None) if tt: span.set('title', tt) - url = hyperlink['url'] - if url in self.anchor_map: - span.set('href', '#' + self.anchor_map[url]) - continue - span.set('href', url) + url = hyperlink.get('url', None) + if url is None: + anchor = hyperlink.get('anchor', None) + if anchor in self.anchor_map: + span.set('href', '#' + self.anchor_map[anchor]) + continue + self.log.warn('Hyperlink field with unknown anchor: %s' % anchor) + else: + if url in self.anchor_map: + span.set('href', '#' + self.anchor_map[url]) + continue + span.set('href', url) for img, link in self.images.links: parent = img.getparent()