diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 430d695fd1..955fbab460 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -200,9 +200,9 @@ class KF8Writer(object): aid = aidbase + j tag.attrib['aid'] = to_base(aid, base=32) if tag.tag == XHTML('body'): - self.id_map[(item.href, '')] = (i, tag.attrib['aid']) + self.id_map[(item.href, '')] = tag.attrib['aid'] if id_ is not None: - self.id_map[(item.href, id_)] = (i, tag.attrib['aid']) + self.id_map[(item.href, id_)] = tag.attrib['aid'] j += 1 diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index d04f119316..4da540cac6 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -172,11 +172,11 @@ class Chunker(object): body = root.xpath('//body')[0] body.tail = '\n' - if self.orig_dumps is not None: - self.orig_dumps.append(tostring(root, xml_declaration=True, + if orig_dumps is not None: + orig_dumps.append(tostring(root, xml_declaration=True, with_tail=True)) - self.orig_dumps[-1] = close_self_closing_tags( - self.orig_dumps[-1].replace(b']+? aid=[\'"]([A-Z0-9]+)[\'"]', text): - aid_map[match.group(1)] = match.start() + offset = match.start() + pos_fid = None + for chunk in self.chunk_table: + if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length: + pos_fid = (chunk.sequence_number, offset-chunk.insert_pos) + break + if chunk.insert_pos > offset: + # This aid is in the skeleton, not in a chunk, so we use + # the chunk immediately after + pos_fid = (chunk.sequence_number, 0) + break + if pos_fid is None: + raise ValueError('Could not find chunk for aid: %r'% + match.group(1)) + aid_map[match.group(1)] = (to_base(chunk.sequence_number, + base=32, min_num_digits=4), + to_href(offset-chunk.insert_pos)) + self.aid_offset_map = aid_map - def to_placeholder(x): - file_number, aid = x - return bytes('%04d:%s'%(file_number, to_href(aid_map[aid]))) + def to_placeholder(aid): + return bytes(':'.join(aid_map[aid])) placeholder_map = {bytes(k):to_placeholder(v) for k, v in self.placeholder_map.iteritems()} @@ -359,7 +379,7 @@ class Chunker(object): pass return raw - return re.sub(br'<[^>]+(kindle:pos:fid:\d{4}:\d{10})', sub, text) + return re.sub(br'<[^>]+(kindle:pos:fid:0000:\d{10})', sub, text) def dump(self, orig_dumps): import tempfile, shutil, os