From 0479f31a5f072a8c02661c373391eaf498dc1209 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 17 Mar 2012 23:14:55 +0530 Subject: [PATCH] KF8 Input: Fix some links pointing a little above or below their intended target when viewing or converting KF8 files --- src/calibre/ebooks/mobi/reader/markup.py | 3 ++- src/calibre/ebooks/mobi/reader/mobi8.py | 28 ++++++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/markup.py b/src/calibre/ebooks/mobi/reader/markup.py index 721de28ff4..8bb7f211f3 100644 --- a/src/calibre/ebooks/mobi/reader/markup.py +++ b/src/calibre/ebooks/mobi/reader/markup.py @@ -36,7 +36,8 @@ def update_internal_links(mobi8_reader): filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32), int(offset, 32)) suffix = (b'#' + idtag) if idtag else b'' - replacement = filename.encode(mr.header.codec) + suffix + replacement = filename.split('/')[-1].encode( + mr.header.codec) + suffix tag = posfid_index_pattern.sub(replacement, tag, 1) srcpieces[j] = tag part = ''.join([x.decode(mr.header.codec) for x in srcpieces]) diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 5105e20f0b..ec7166ebb0 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -233,7 +233,6 @@ class Mobi8Reader(object): insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid] pos = insertpos + offset fi = self.get_file_info(pos) - fname = fi.filename # an existing "id=" must exist in original xhtml otherwise it would not # have worked for linking. Amazon seems to have added its own # additional "aid=" inside tags whose contents seem to represent some @@ -242,7 +241,7 @@ class Mobi8Reader(object): # so find the closest "id=" before position the file by actually # searching in that file idtext = self.get_id_tag(pos) - return fname, idtext + return '%s/%s'%(fi.type, fi.filename), idtext def get_id_tag(self, pos): # find the correct tag by actually searching in the destination @@ -253,12 +252,13 @@ class Mobi8Reader(object): textblock = self.parts[fi.num] id_map = [] npos = pos - fi.start - # if npos inside a tag then search all text before the its end of tag - # marker pgt = textblock.find(b'>', npos) plt = textblock.find(b'<', npos) - if pgt < plt: + # if npos inside a tag then search all text before the its end of tag marker + # else not in a tag need to search the preceding tag + if plt == npos or pgt < plt: npos = pgt + 1 + textblock = textblock[0:npos] # find id links only inside of tags # inside any < > pair find all "id=' and return whatever is inside # the quotes @@ -315,12 +315,18 @@ class Mobi8Reader(object): # Add href and anchor info to the index entries for entry in index_entries: - pos = entry['pos'] - fi = self.get_file_info(pos) - if fi.filename is None: - raise ValueError('Index entry has invalid pos: %d'%pos) - idtag = self.get_id_tag(pos).decode(self.header.codec) - entry['href'] = '%s/%s'%(fi.type, fi.filename) + pos_fid = entry['pos_fid'] + if pos_fid is None: + pos = entry['pos'] + fi = self.get_file_info(pos) + if fi.filename is None: + raise ValueError('Index entry has invalid pos: %d'%pos) + idtag = self.get_id_tag(pos).decode(self.header.codec) + href = '%s/%s'%(fi.type, fi.filename) + else: + href, idtag = self.get_id_tag_by_pos_fid(*pos_fid) + + entry['href'] = href entry['idtag'] = idtag # Build the TOC object