KF8 Input: Fix some links pointing a little above or below their intended target when viewing or converting KF8 files

2025-07-09 03:04:10 -04:00 · 2012-03-17 23:14:55 +05:30 · 2012-03-17 23:14:55 +05:30 · 0479f31a5f
commit 0479f31a5f
parent a83654a499
2 changed files with 19 additions and 12 deletions
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -36,7 +36,8 @@ def update_internal_links(mobi8_reader):
                    filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32),
                            int(offset, 32))
                    suffix = (b'#' + idtag) if idtag else b''
-                    replacement = filename.encode(mr.header.codec) + suffix
+                    replacement = filename.split('/')[-1].encode(
                            mr.header.codec) + suffix
                    tag = posfid_index_pattern.sub(replacement, tag, 1)
                srcpieces[j] = tag
        part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -233,7 +233,6 @@ class Mobi8Reader(object):
        insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid]
        pos = insertpos + offset
        fi = self.get_file_info(pos)
        fname = fi.filename
        # an existing "id=" must exist in original xhtml otherwise it would not
        # have worked for linking.  Amazon seems to have added its own
        # additional "aid=" inside tags whose contents seem to represent some
@ -242,7 +241,7 @@ class Mobi8Reader(object):
        # so find the closest "id=" before position the file by actually
        # searching in that file
        idtext = self.get_id_tag(pos)
-        return fname, idtext
+        return '%s/%s'%(fi.type, fi.filename), idtext
    def get_id_tag(self, pos):
        # find the correct tag by actually searching in the destination
@ -253,12 +252,13 @@ class Mobi8Reader(object):
        textblock = self.parts[fi.num]
        id_map = []
        npos = pos - fi.start
        # if npos inside a tag then search all text before the its end of tag
        # marker
        pgt = textblock.find(b'>', npos)
        plt = textblock.find(b'<', npos)
-        if pgt < plt:
+        # if npos inside a tag then search all text before the its end of tag marker
        # else not in a tag need to search the preceding tag
        if plt == npos or pgt < plt:
            npos = pgt + 1
        textblock = textblock[0:npos]
        # find id links only inside of tags
        #    inside any < > pair find all "id=' and return whatever is inside
        #    the quotes
@ -315,12 +315,18 @@ class Mobi8Reader(object):
        # Add href and anchor info to the index entries
        for entry in index_entries:
-            pos = entry['pos']
+            pos_fid = entry['pos_fid']
-            fi = self.get_file_info(pos)
+            if pos_fid is None:
-            if fi.filename is None:
+                pos = entry['pos']
-                raise ValueError('Index entry has invalid pos: %d'%pos)
+                fi = self.get_file_info(pos)
-            idtag = self.get_id_tag(pos).decode(self.header.codec)
+                if fi.filename is None:
-            entry['href'] = '%s/%s'%(fi.type, fi.filename)
+                    raise ValueError('Index entry has invalid pos: %d'%pos)
                idtag = self.get_id_tag(pos).decode(self.header.codec)
                href = '%s/%s'%(fi.type, fi.filename)
            else:
                href, idtag = self.get_id_tag_by_pos_fid(*pos_fid)
            entry['href'] = href
            entry['idtag'] = idtag
        # Build the TOC object