KF8 Output: Set offsets to tags in the skelton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works.

2025-08-30 23:00:21 -04:00 · 2012-05-21 15:16:41 +05:30 · 2012-05-21 15:16:41 +05:30 · 331bdb2fae
commit 331bdb2fae
parent 8283515d51
3 changed files with 65 additions and 30 deletions
--- a/src/calibre/ebooks/mobi/writer8/index.py
+++ b/src/calibre/ebooks/mobi/writer8/index.py
@ -316,6 +316,9 @@ class NCXIndex(Index):
            desc = entry.get('description', None)
            if desc:
                strings.append(desc)
            kind = entry.get('kind', None)
            if kind:
                strings.append(kind)
        self.cncx = CNCX(strings)
        def to_entry(x):
@ -324,7 +327,7 @@ class NCXIndex(Index):
                    'first_child', 'last_child'):
                if f in x:
                    ans[f] = x[f]
-            for f in ('label', 'description', 'author'):
+            for f in ('label', 'description', 'author', 'kind'):
                if f in x:
                    ans[f] = self.cncx[x[f]]
            return ('%02x'%x['index'], ans)
@ -333,3 +336,20 @@ class NCXIndex(Index):
 class NonLinearNCXIndex(NCXIndex):
    control_byte_count = 2
    tag_types = tuple(map(TagMeta, (
        ('offset',             1, 1, 1, 0),
        ('length',             2, 1, 2, 0),
        ('label',              3, 1, 4, 0),
        ('depth',              4, 1, 8, 0),
        ('kind',               5, 1, 16, 0),
        ('parent',             21, 1, 32, 0),
        ('first_child',        22, 1, 64, 0),
        ('last_child',         23, 1, 128, 0),
        EndTagTable,
        ('pos_fid',            6, 2, 1, 0),
        EndTagTable
    )))
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
 from calibre.ebooks.oeb.parse_utils import barename
 from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
 from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
-        ChunkIndex, GuideIndex)
+        ChunkIndex, GuideIndex, NonLinearNCXIndex)
 from calibre.ebooks.mobi.writer8.mobi import KF8Book
 from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
 from calibre.ebooks.mobi.writer8.toc import TOCAdder
@ -316,9 +316,8 @@ class KF8Writer(object):
        # Flatten the ToC into a depth first list
        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
-            entry = {'id': id(item), 'index': i, 'href':item.href or '',
+            entry = {'id': id(item), 'index': i, 'label':(item.title or
-                    'label':(item.title or _('Unknown')),
+                _('Unknown')), 'children':[]}
                    'children':[]}
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
            p = getattr(item, 'ncx_parent', None)
            if p is not None:
@ -333,14 +332,45 @@ class KF8Writer(object):
                if item.description:
                    entry['description'] = item.description
            entries.append(entry)
            href = item.href or ''
            href, frag = href.partition('#')[0::2]
            aid = self.id_map.get((href, frag), None)
            if aid is None:
                aid = self.id_map.get((href, ''), None)
            if aid is None:
                pos, fid = 0, 0
                chunk = self.chunk_table[pos]
                offset = chunk.insert_pos + fid
            else:
                pos, fid, offset = self.aid_offset_map[aid]
            entry['pos_fid'] = (pos, fid)
            entry['offset'] = offset
        # The Kindle requires entries to be sorted by (depth, playorder)
-        entries.sort(key=lambda entry: (entry['depth'], entry['index']))
+        # However, I cannot figure out how to deal with non linear ToCs, i.e.
        # ToCs whose nth entry at depth d has an offset after its n+k entry at
        # the same depth, so we sort on (depth, offset) instead. This re-orders
        # the ToC to be linear. A non-linear ToC causes section to section
        # jumping to not work. kindlegen somehow handles non-linear tocs, but I
        # cannot figure out how.
        original = sorted(entries,
                key=lambda entry: (entry['depth'], entry['index']))
        linearized = sorted(entries,
                key=lambda entry: (entry['depth'], entry['offset']))
        is_non_linear = original != linearized
        entries = linearized
        is_non_linear = False # False as we are using the linearized entries
        if is_non_linear:
            for entry in entries:
                entry['kind'] = 'chapter'
        for i, entry in enumerate(entries):
            entry['index'] = i
        id_to_index = {entry['id']:entry['index'] for entry in entries}
-        # Write the hierarchical and start offset information
+        # Write the hierarchical information
        for entry in entries:
            children = entry.pop('children')
            if children:
@ -348,19 +378,6 @@ class KF8Writer(object):
                entry['last_child'] = id_to_index[children[-1]]
            if 'parent_id' in entry:
                entry['parent'] = id_to_index[entry.pop('parent_id')]
            href = entry.pop('href')
            href, frag = href.partition('#')[0::2]
            aid = self.id_map.get((href, frag), None)
            if aid is None:
                aid = self.id_map.get((href, ''), None)
            if aid is None:
                pos, fid = 0, 0
            else:
                pos, fid = self.aid_offset_map[aid]
            chunk = self.chunk_table[pos]
            offset = chunk.insert_pos + fid
            entry['pos_fid'] = (pos, fid)
            entry['offset'] = offset
        # Write the lengths
        def get_next_start(entry):
@ -369,13 +386,13 @@ class KF8Writer(object):
            if enders:
                return min(enders)
            return len(self.flows[0])
        for entry in entries:
            entry['length'] = get_next_start(entry) - entry['offset']
        self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
                self.uncompressed_record_lengths)
-        self.ncx_records = NCXIndex(entries)()
+        idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
        self.ncx_records = idx_type(entries)()
    def create_guide(self):
        self.start_offset = None
@ -389,12 +406,9 @@ class KF8Writer(object):
                aid = self.id_map.get((href, ''))
            if aid is None:
                continue
-            pos, fid = self.aid_offset_map[aid]
+            pos, fid, offset = self.aid_offset_map[aid]
            if is_guide_ref_start(ref):
-                chunk = self.chunk_table[pos]
+                self.start_offset = offset
                skel = [s for s in self.skel_table if s.file_number ==
                        chunk.file_number][0]
                self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
            self.guide_table.append(GuideRef(ref.title or
                _('Unknown'), ref.type, (pos, fid)))
--- a/src/calibre/ebooks/mobi/writer8/skeleton.py
+++ b/src/calibre/ebooks/mobi/writer8/skeleton.py
@ -364,12 +364,13 @@ class Chunker(object):
            pos_fid = None
            for chunk in self.chunk_table:
                if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
-                    pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
+                    pos_fid = (chunk.sequence_number, offset-chunk.insert_pos,
                            offset)
                    break
                if chunk.insert_pos > offset:
                    # This aid is in the skeleton, not in a chunk, so we use
                    # the chunk immediately after
-                    pos_fid = (chunk.sequence_number, 0)
+                    pos_fid = (chunk.sequence_number, 0, offset)
                    break
            if pos_fid is None:
                raise ValueError('Could not find chunk for aid: %r'%
@ -379,7 +380,7 @@ class Chunker(object):
        self.aid_offset_map = aid_map
        def to_placeholder(aid):
-            pos, fid = aid_map[aid]
+            pos, fid, _ = aid_map[aid]
            pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
            return bytes(':off:'.join((pos, fid)))