mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
DOCX Input: URL less hyperlink fields and stacked block level bookmarks
DOCX Input: Add support for hyperlink fields that have only anchors and not URLs DOCX Input: Fix handling of multiple block level bookmarks at the same location. Fixes #1241451 [links are not working](https://bugs.launchpad.net/calibre/+bug/1241451)
This commit is contained in:
parent
93c872e8f3
commit
e260906211
@ -40,18 +40,16 @@ def parse_hyperlink(raw, log):
|
||||
raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02')
|
||||
for token, token_type in scanner.scan(raw)[0]:
|
||||
token = token.replace('\x01', '\\').replace('\x02', '"')
|
||||
if not ans:
|
||||
if token_type is not WORD:
|
||||
log('Invalid hyperlink, first token is not a URL (%s)' % raw)
|
||||
return ans
|
||||
ans['url'] = token
|
||||
if token_type is FLAG:
|
||||
last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None)
|
||||
if last_option is not None:
|
||||
ans[last_option] = None
|
||||
elif token_type is WORD:
|
||||
if last_option is not None:
|
||||
if last_option is None:
|
||||
ans['url'] = token
|
||||
else:
|
||||
ans[last_option] = token
|
||||
last_option = None
|
||||
return ans
|
||||
|
||||
|
||||
@ -105,4 +103,20 @@ class Fields(object):
|
||||
for runs in all_runs:
|
||||
self.hyperlink_fields.append((hl, runs))
|
||||
|
||||
def test_parse_hyperlink():
|
||||
import unittest
|
||||
|
||||
class TestParseHyperLink(unittest.TestCase):
|
||||
|
||||
def test_parsing(self):
|
||||
self.assertEqual(parse_hyperlink(
|
||||
r'\l anchor1', None), {'anchor':'anchor1'})
|
||||
self.assertEqual(parse_hyperlink(
|
||||
r'www.calibre-ebook.com', None), {'url':'www.calibre-ebook.com'})
|
||||
self.assertEqual(parse_hyperlink(
|
||||
r'www.calibre-ebook.com \t target \o tt', None), {'url':'www.calibre-ebook.com', 'target':'target', 'title': 'tt'})
|
||||
self.assertEqual(parse_hyperlink(
|
||||
r'"c:\\Some Folder"', None), {'url': 'c:\\Some Folder'})
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestParseHyperLink)
|
||||
unittest.TextTestRunner(verbosity=4).run(suite)
|
||||
|
@ -346,18 +346,21 @@ class Convert(object):
|
||||
def read_block_anchors(self, doc):
|
||||
doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
|
||||
if doc_anchors:
|
||||
current_bm = None
|
||||
current_bm = set()
|
||||
rmap = {v:k for k, v in self.object_map.iteritems()}
|
||||
for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
|
||||
if p.tag.endswith('}p'):
|
||||
if current_bm and p in rmap:
|
||||
para = rmap[p]
|
||||
if 'id' not in para.attrib:
|
||||
para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues())))
|
||||
self.anchor_map[current_bm] = para.get('id')
|
||||
current_bm = None
|
||||
para.set('id', generate_anchor(next(iter(current_bm)), frozenset(self.anchor_map.itervalues())))
|
||||
for name in current_bm:
|
||||
self.anchor_map[name] = para.get('id')
|
||||
current_bm = set()
|
||||
elif p in doc_anchors:
|
||||
current_bm = get(p, 'w:name')
|
||||
anchor = get(p, 'w:name')
|
||||
if anchor:
|
||||
current_bm.add(anchor)
|
||||
|
||||
def convert_p(self, p):
|
||||
dest = P()
|
||||
@ -500,11 +503,18 @@ class Convert(object):
|
||||
tt = hyperlink.get('title', None)
|
||||
if tt:
|
||||
span.set('title', tt)
|
||||
url = hyperlink['url']
|
||||
if url in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[url])
|
||||
continue
|
||||
span.set('href', url)
|
||||
url = hyperlink.get('url', None)
|
||||
if url is None:
|
||||
anchor = hyperlink.get('anchor', None)
|
||||
if anchor in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[anchor])
|
||||
continue
|
||||
self.log.warn('Hyperlink field with unknown anchor: %s' % anchor)
|
||||
else:
|
||||
if url in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[url])
|
||||
continue
|
||||
span.set('href', url)
|
||||
|
||||
for img, link in self.images.links:
|
||||
parent = img.getparent()
|
||||
|
Loading…
x
Reference in New Issue
Block a user