mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix set_internal_links()
This commit is contained in:
parent
22ee415241
commit
5d3e24e105
@ -200,9 +200,9 @@ class KF8Writer(object):
|
|||||||
aid = aidbase + j
|
aid = aidbase + j
|
||||||
tag.attrib['aid'] = to_base(aid, base=32)
|
tag.attrib['aid'] = to_base(aid, base=32)
|
||||||
if tag.tag == XHTML('body'):
|
if tag.tag == XHTML('body'):
|
||||||
self.id_map[(item.href, '')] = (i, tag.attrib['aid'])
|
self.id_map[(item.href, '')] = tag.attrib['aid']
|
||||||
if id_ is not None:
|
if id_ is not None:
|
||||||
self.id_map[(item.href, id_)] = (i, tag.attrib['aid'])
|
self.id_map[(item.href, id_)] = tag.attrib['aid']
|
||||||
|
|
||||||
j += 1
|
j += 1
|
||||||
|
|
||||||
|
@ -172,11 +172,11 @@ class Chunker(object):
|
|||||||
body = root.xpath('//body')[0]
|
body = root.xpath('//body')[0]
|
||||||
body.tail = '\n'
|
body.tail = '\n'
|
||||||
|
|
||||||
if self.orig_dumps is not None:
|
if orig_dumps is not None:
|
||||||
self.orig_dumps.append(tostring(root, xml_declaration=True,
|
orig_dumps.append(tostring(root, xml_declaration=True,
|
||||||
with_tail=True))
|
with_tail=True))
|
||||||
self.orig_dumps[-1] = close_self_closing_tags(
|
orig_dumps[-1] = close_self_closing_tags(
|
||||||
self.orig_dumps[-1].replace(b'<html',
|
orig_dumps[-1].replace(b'<html',
|
||||||
bytes('<html xmlns="%s"'%XHTML_NS), 1))
|
bytes('<html xmlns="%s"'%XHTML_NS), 1))
|
||||||
|
|
||||||
# First pass: break up document into rendered strings of length no
|
# First pass: break up document into rendered strings of length no
|
||||||
@ -336,15 +336,35 @@ class Chunker(object):
|
|||||||
num += 1
|
num += 1
|
||||||
|
|
||||||
def set_internal_links(self, text):
|
def set_internal_links(self, text):
|
||||||
# First find the start pos of all tags with aids
|
# A kindle pos:fid link contains two base 32 numbers of the form
|
||||||
aid_map = {}
|
# XXXX:YYYYYYYYYY
|
||||||
|
# The first number is an index into the chunk table and the second is
|
||||||
|
# an offset from the start of the chunk to the start of the tag pointed
|
||||||
|
# to by the link.
|
||||||
|
aid_map = {} # Map of aid to (pos, fid)
|
||||||
for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', text):
|
for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', text):
|
||||||
aid_map[match.group(1)] = match.start()
|
offset = match.start()
|
||||||
|
pos_fid = None
|
||||||
|
for chunk in self.chunk_table:
|
||||||
|
if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
|
||||||
|
pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
|
||||||
|
break
|
||||||
|
if chunk.insert_pos > offset:
|
||||||
|
# This aid is in the skeleton, not in a chunk, so we use
|
||||||
|
# the chunk immediately after
|
||||||
|
pos_fid = (chunk.sequence_number, 0)
|
||||||
|
break
|
||||||
|
if pos_fid is None:
|
||||||
|
raise ValueError('Could not find chunk for aid: %r'%
|
||||||
|
match.group(1))
|
||||||
|
aid_map[match.group(1)] = (to_base(chunk.sequence_number,
|
||||||
|
base=32, min_num_digits=4),
|
||||||
|
to_href(offset-chunk.insert_pos))
|
||||||
|
|
||||||
self.aid_offset_map = aid_map
|
self.aid_offset_map = aid_map
|
||||||
|
|
||||||
def to_placeholder(x):
|
def to_placeholder(aid):
|
||||||
file_number, aid = x
|
return bytes(':'.join(aid_map[aid]))
|
||||||
return bytes('%04d:%s'%(file_number, to_href(aid_map[aid])))
|
|
||||||
|
|
||||||
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
||||||
self.placeholder_map.iteritems()}
|
self.placeholder_map.iteritems()}
|
||||||
@ -359,7 +379,7 @@ class Chunker(object):
|
|||||||
pass
|
pass
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
return re.sub(br'<[^>]+(kindle:pos:fid:\d{4}:\d{10})', sub, text)
|
return re.sub(br'<[^>]+(kindle:pos:fid:0000:\d{10})', sub, text)
|
||||||
|
|
||||||
def dump(self, orig_dumps):
|
def dump(self, orig_dumps):
|
||||||
import tempfile, shutil, os
|
import tempfile, shutil, os
|
||||||
|
Loading…
x
Reference in New Issue
Block a user