KF8 Input: Fix some links pointing a little above or below their intended target when viewing or converting KF8 files

This commit is contained in:
Kovid Goyal 2012-03-17 23:14:55 +05:30
parent a83654a499
commit 0479f31a5f
2 changed files with 19 additions and 12 deletions

View File

@ -36,7 +36,8 @@ def update_internal_links(mobi8_reader):
filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32), filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32),
int(offset, 32)) int(offset, 32))
suffix = (b'#' + idtag) if idtag else b'' suffix = (b'#' + idtag) if idtag else b''
replacement = filename.encode(mr.header.codec) + suffix replacement = filename.split('/')[-1].encode(
mr.header.codec) + suffix
tag = posfid_index_pattern.sub(replacement, tag, 1) tag = posfid_index_pattern.sub(replacement, tag, 1)
srcpieces[j] = tag srcpieces[j] = tag
part = ''.join([x.decode(mr.header.codec) for x in srcpieces]) part = ''.join([x.decode(mr.header.codec) for x in srcpieces])

View File

@ -233,7 +233,6 @@ class Mobi8Reader(object):
insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid] insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid]
pos = insertpos + offset pos = insertpos + offset
fi = self.get_file_info(pos) fi = self.get_file_info(pos)
fname = fi.filename
# an existing "id=" must exist in original xhtml otherwise it would not # an existing "id=" must exist in original xhtml otherwise it would not
# have worked for linking. Amazon seems to have added its own # have worked for linking. Amazon seems to have added its own
# additional "aid=" inside tags whose contents seem to represent some # additional "aid=" inside tags whose contents seem to represent some
@ -242,7 +241,7 @@ class Mobi8Reader(object):
# so find the closest "id=" before position the file by actually # so find the closest "id=" before position the file by actually
# searching in that file # searching in that file
idtext = self.get_id_tag(pos) idtext = self.get_id_tag(pos)
return fname, idtext return '%s/%s'%(fi.type, fi.filename), idtext
def get_id_tag(self, pos): def get_id_tag(self, pos):
# find the correct tag by actually searching in the destination # find the correct tag by actually searching in the destination
@ -253,12 +252,13 @@ class Mobi8Reader(object):
textblock = self.parts[fi.num] textblock = self.parts[fi.num]
id_map = [] id_map = []
npos = pos - fi.start npos = pos - fi.start
# if npos inside a tag then search all text before the its end of tag
# marker
pgt = textblock.find(b'>', npos) pgt = textblock.find(b'>', npos)
plt = textblock.find(b'<', npos) plt = textblock.find(b'<', npos)
if pgt < plt: # if npos inside a tag then search all text before the its end of tag marker
# else not in a tag need to search the preceding tag
if plt == npos or pgt < plt:
npos = pgt + 1 npos = pgt + 1
textblock = textblock[0:npos]
# find id links only inside of tags # find id links only inside of tags
# inside any < > pair find all "id=' and return whatever is inside # inside any < > pair find all "id=' and return whatever is inside
# the quotes # the quotes
@ -315,12 +315,18 @@ class Mobi8Reader(object):
# Add href and anchor info to the index entries # Add href and anchor info to the index entries
for entry in index_entries: for entry in index_entries:
pos = entry['pos'] pos_fid = entry['pos_fid']
fi = self.get_file_info(pos) if pos_fid is None:
if fi.filename is None: pos = entry['pos']
raise ValueError('Index entry has invalid pos: %d'%pos) fi = self.get_file_info(pos)
idtag = self.get_id_tag(pos).decode(self.header.codec) if fi.filename is None:
entry['href'] = '%s/%s'%(fi.type, fi.filename) raise ValueError('Index entry has invalid pos: %d'%pos)
idtag = self.get_id_tag(pos).decode(self.header.codec)
href = '%s/%s'%(fi.type, fi.filename)
else:
href, idtag = self.get_id_tag_by_pos_fid(*pos_fid)
entry['href'] = href
entry['idtag'] = idtag entry['idtag'] = idtag
# Build the TOC object # Build the TOC object