mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
DOCX Input: URL less hyperlink fields and stacked block level bookmarks
DOCX Input: Add support for hyperlink fields that have only anchors and not URLs DOCX Input: Fix handling of multiple block level bookmarks at the same location. Fixes #1241451 [links are not working](https://bugs.launchpad.net/calibre/+bug/1241451)
This commit is contained in:
parent
93c872e8f3
commit
e260906211
@ -40,18 +40,16 @@ def parse_hyperlink(raw, log):
|
|||||||
raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02')
|
raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02')
|
||||||
for token, token_type in scanner.scan(raw)[0]:
|
for token, token_type in scanner.scan(raw)[0]:
|
||||||
token = token.replace('\x01', '\\').replace('\x02', '"')
|
token = token.replace('\x01', '\\').replace('\x02', '"')
|
||||||
if not ans:
|
|
||||||
if token_type is not WORD:
|
|
||||||
log('Invalid hyperlink, first token is not a URL (%s)' % raw)
|
|
||||||
return ans
|
|
||||||
ans['url'] = token
|
|
||||||
if token_type is FLAG:
|
if token_type is FLAG:
|
||||||
last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None)
|
last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None)
|
||||||
if last_option is not None:
|
if last_option is not None:
|
||||||
ans[last_option] = None
|
ans[last_option] = None
|
||||||
elif token_type is WORD:
|
elif token_type is WORD:
|
||||||
if last_option is not None:
|
if last_option is None:
|
||||||
|
ans['url'] = token
|
||||||
|
else:
|
||||||
ans[last_option] = token
|
ans[last_option] = token
|
||||||
|
last_option = None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
@ -105,4 +103,20 @@ class Fields(object):
|
|||||||
for runs in all_runs:
|
for runs in all_runs:
|
||||||
self.hyperlink_fields.append((hl, runs))
|
self.hyperlink_fields.append((hl, runs))
|
||||||
|
|
||||||
|
def test_parse_hyperlink():
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class TestParseHyperLink(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_parsing(self):
|
||||||
|
self.assertEqual(parse_hyperlink(
|
||||||
|
r'\l anchor1', None), {'anchor':'anchor1'})
|
||||||
|
self.assertEqual(parse_hyperlink(
|
||||||
|
r'www.calibre-ebook.com', None), {'url':'www.calibre-ebook.com'})
|
||||||
|
self.assertEqual(parse_hyperlink(
|
||||||
|
r'www.calibre-ebook.com \t target \o tt', None), {'url':'www.calibre-ebook.com', 'target':'target', 'title': 'tt'})
|
||||||
|
self.assertEqual(parse_hyperlink(
|
||||||
|
r'"c:\\Some Folder"', None), {'url': 'c:\\Some Folder'})
|
||||||
|
|
||||||
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestParseHyperLink)
|
||||||
|
unittest.TextTestRunner(verbosity=4).run(suite)
|
||||||
|
@ -346,18 +346,21 @@ class Convert(object):
|
|||||||
def read_block_anchors(self, doc):
|
def read_block_anchors(self, doc):
|
||||||
doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
|
doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
|
||||||
if doc_anchors:
|
if doc_anchors:
|
||||||
current_bm = None
|
current_bm = set()
|
||||||
rmap = {v:k for k, v in self.object_map.iteritems()}
|
rmap = {v:k for k, v in self.object_map.iteritems()}
|
||||||
for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
|
for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
|
||||||
if p.tag.endswith('}p'):
|
if p.tag.endswith('}p'):
|
||||||
if current_bm and p in rmap:
|
if current_bm and p in rmap:
|
||||||
para = rmap[p]
|
para = rmap[p]
|
||||||
if 'id' not in para.attrib:
|
if 'id' not in para.attrib:
|
||||||
para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues())))
|
para.set('id', generate_anchor(next(iter(current_bm)), frozenset(self.anchor_map.itervalues())))
|
||||||
self.anchor_map[current_bm] = para.get('id')
|
for name in current_bm:
|
||||||
current_bm = None
|
self.anchor_map[name] = para.get('id')
|
||||||
|
current_bm = set()
|
||||||
elif p in doc_anchors:
|
elif p in doc_anchors:
|
||||||
current_bm = get(p, 'w:name')
|
anchor = get(p, 'w:name')
|
||||||
|
if anchor:
|
||||||
|
current_bm.add(anchor)
|
||||||
|
|
||||||
def convert_p(self, p):
|
def convert_p(self, p):
|
||||||
dest = P()
|
dest = P()
|
||||||
@ -500,7 +503,14 @@ class Convert(object):
|
|||||||
tt = hyperlink.get('title', None)
|
tt = hyperlink.get('title', None)
|
||||||
if tt:
|
if tt:
|
||||||
span.set('title', tt)
|
span.set('title', tt)
|
||||||
url = hyperlink['url']
|
url = hyperlink.get('url', None)
|
||||||
|
if url is None:
|
||||||
|
anchor = hyperlink.get('anchor', None)
|
||||||
|
if anchor in self.anchor_map:
|
||||||
|
span.set('href', '#' + self.anchor_map[anchor])
|
||||||
|
continue
|
||||||
|
self.log.warn('Hyperlink field with unknown anchor: %s' % anchor)
|
||||||
|
else:
|
||||||
if url in self.anchor_map:
|
if url in self.anchor_map:
|
||||||
span.set('href', '#' + self.anchor_map[url])
|
span.set('href', '#' + self.anchor_map[url])
|
||||||
continue
|
continue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user