KF8 Output: Set offsets to tags in the skelton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works.

2025-07-09 03:04:10 -04:00 · 2012-05-21 15:16:41 +05:30 · 2012-05-21 15:16:41 +05:30 · 331bdb2fae
commit 331bdb2fae
parent 8283515d51
3 changed files with 65 additions and 30 deletions
--- a/src/calibre/ebooks/mobi/writer8/index.py
+++ b/src/calibre/ebooks/mobi/writer8/index.py
@ -316,6 +316,9 @@ class NCXIndex(Index):
            desc = entry.get('description', None)
            if desc:
                strings.append(desc)
+            kind = entry.get('kind', None)
+            if kind:
+                strings.append(kind)
        self.cncx = CNCX(strings)

        def to_entry(x):
@ -324,7 +327,7 @@ class NCXIndex(Index):
                    'first_child', 'last_child'):
                if f in x:
                    ans[f] = x[f]
-            for f in ('label', 'description', 'author'):
+            for f in ('label', 'description', 'author', 'kind'):
                if f in x:
                    ans[f] = self.cncx[x[f]]
            return ('%02x'%x['index'], ans)
@ -333,3 +336,20 @@ class NCXIndex(Index):



+class NonLinearNCXIndex(NCXIndex):
+    control_byte_count = 2
+    tag_types = tuple(map(TagMeta, (
+        ('offset',             1, 1, 1, 0),
+        ('length',             2, 1, 2, 0),
+        ('label',              3, 1, 4, 0),
+        ('depth',              4, 1, 8, 0),
+        ('kind',               5, 1, 16, 0),
+        ('parent',             21, 1, 32, 0),
+        ('first_child',        22, 1, 64, 0),
+        ('last_child',         23, 1, 128, 0),
+        EndTagTable,
+        ('pos_fid',            6, 2, 1, 0),
+        EndTagTable
+    )))
+
+
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
 from calibre.ebooks.oeb.parse_utils import barename
 from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
 from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
-        ChunkIndex, GuideIndex)
+        ChunkIndex, GuideIndex, NonLinearNCXIndex)
 from calibre.ebooks.mobi.writer8.mobi import KF8Book
 from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
 from calibre.ebooks.mobi.writer8.toc import TOCAdder
@ -316,9 +316,8 @@ class KF8Writer(object):
        # Flatten the ToC into a depth first list
        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
-            entry = {'id': id(item), 'index': i, 'href':item.href or '',
-                    'label':(item.title or _('Unknown')),
-                    'children':[]}
+            entry = {'id': id(item), 'index': i, 'label':(item.title or
+                _('Unknown')), 'children':[]}
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
            p = getattr(item, 'ncx_parent', None)
            if p is not None:
@ -333,14 +332,45 @@ class KF8Writer(object):
                if item.description:
                    entry['description'] = item.description
            entries.append(entry)
+            href = item.href or ''
+            href, frag = href.partition('#')[0::2]
+            aid = self.id_map.get((href, frag), None)
+            if aid is None:
+                aid = self.id_map.get((href, ''), None)
+            if aid is None:
+                pos, fid = 0, 0
+                chunk = self.chunk_table[pos]
+                offset = chunk.insert_pos + fid
+            else:
+                pos, fid, offset = self.aid_offset_map[aid]
+
+            entry['pos_fid'] = (pos, fid)
+            entry['offset'] = offset

        # The Kindle requires entries to be sorted by (depth, playorder)
-        entries.sort(key=lambda entry: (entry['depth'], entry['index']))
+        # However, I cannot figure out how to deal with non linear ToCs, i.e.
+        # ToCs whose nth entry at depth d has an offset after its n+k entry at
+        # the same depth, so we sort on (depth, offset) instead. This re-orders
+        # the ToC to be linear. A non-linear ToC causes section to section
+        # jumping to not work. kindlegen somehow handles non-linear tocs, but I
+        # cannot figure out how.
+        original = sorted(entries,
+                key=lambda entry: (entry['depth'], entry['index']))
+        linearized = sorted(entries,
+                key=lambda entry: (entry['depth'], entry['offset']))
+        is_non_linear = original != linearized
+        entries = linearized
+        is_non_linear = False # False as we are using the linearized entries
+
+        if is_non_linear:
+            for entry in entries:
+                entry['kind'] = 'chapter'
+
        for i, entry in enumerate(entries):
            entry['index'] = i
        id_to_index = {entry['id']:entry['index'] for entry in entries}

-        # Write the hierarchical and start offset information
+        # Write the hierarchical information
        for entry in entries:
            children = entry.pop('children')
            if children:
@ -348,19 +378,6 @@ class KF8Writer(object):
                entry['last_child'] = id_to_index[children[-1]]
            if 'parent_id' in entry:
                entry['parent'] = id_to_index[entry.pop('parent_id')]
-            href = entry.pop('href')
-            href, frag = href.partition('#')[0::2]
-            aid = self.id_map.get((href, frag), None)
-            if aid is None:
-                aid = self.id_map.get((href, ''), None)
-            if aid is None:
-                pos, fid = 0, 0
-            else:
-                pos, fid = self.aid_offset_map[aid]
-            chunk = self.chunk_table[pos]
-            offset = chunk.insert_pos + fid
-            entry['pos_fid'] = (pos, fid)
-            entry['offset'] = offset

        # Write the lengths
        def get_next_start(entry):
@ -369,13 +386,13 @@ class KF8Writer(object):
            if enders:
                return min(enders)
            return len(self.flows[0])
-
        for entry in entries:
            entry['length'] = get_next_start(entry) - entry['offset']

        self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
                self.uncompressed_record_lengths)
-        self.ncx_records = NCXIndex(entries)()
+        idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
+        self.ncx_records = idx_type(entries)()

    def create_guide(self):
        self.start_offset = None
@ -389,12 +406,9 @@ class KF8Writer(object):
                aid = self.id_map.get((href, ''))
            if aid is None:
                continue
-            pos, fid = self.aid_offset_map[aid]
+            pos, fid, offset = self.aid_offset_map[aid]
            if is_guide_ref_start(ref):
-                chunk = self.chunk_table[pos]
-                skel = [s for s in self.skel_table if s.file_number ==
-                        chunk.file_number][0]
-                self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
+                self.start_offset = offset
            self.guide_table.append(GuideRef(ref.title or
                _('Unknown'), ref.type, (pos, fid)))

--- a/src/calibre/ebooks/mobi/writer8/skeleton.py
+++ b/src/calibre/ebooks/mobi/writer8/skeleton.py
@ -364,12 +364,13 @@ class Chunker(object):
            pos_fid = None
            for chunk in self.chunk_table:
                if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
-                    pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
+                    pos_fid = (chunk.sequence_number, offset-chunk.insert_pos,
+                            offset)
                    break
                if chunk.insert_pos > offset:
                    # This aid is in the skeleton, not in a chunk, so we use
                    # the chunk immediately after
-                    pos_fid = (chunk.sequence_number, 0)
+                    pos_fid = (chunk.sequence_number, 0, offset)
                    break
            if pos_fid is None:
                raise ValueError('Could not find chunk for aid: %r'%
@ -379,7 +380,7 @@ class Chunker(object):
        self.aid_offset_map = aid_map

        def to_placeholder(aid):
-            pos, fid = aid_map[aid]
+            pos, fid, _ = aid_map[aid]
            pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
            return bytes(':off:'.join((pos, fid)))