diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py index c37afb81ff..5452579b74 100644 --- a/src/calibre/ebooks/mobi/writer8/index.py +++ b/src/calibre/ebooks/mobi/writer8/index.py @@ -316,6 +316,9 @@ class NCXIndex(Index): desc = entry.get('description', None) if desc: strings.append(desc) + kind = entry.get('kind', None) + if kind: + strings.append(kind) self.cncx = CNCX(strings) def to_entry(x): @@ -324,7 +327,7 @@ class NCXIndex(Index): 'first_child', 'last_child'): if f in x: ans[f] = x[f] - for f in ('label', 'description', 'author'): + for f in ('label', 'description', 'author', 'kind'): if f in x: ans[f] = self.cncx[x[f]] return ('%02x'%x['index'], ans) @@ -333,3 +336,20 @@ class NCXIndex(Index): +class NonLinearNCXIndex(NCXIndex): + control_byte_count = 2 + tag_types = tuple(map(TagMeta, ( + ('offset', 1, 1, 1, 0), + ('length', 2, 1, 2, 0), + ('label', 3, 1, 4, 0), + ('depth', 4, 1, 8, 0), + ('kind', 5, 1, 16, 0), + ('parent', 21, 1, 32, 0), + ('first_child', 22, 1, 64, 0), + ('last_child', 23, 1, 128, 0), + EndTagTable, + ('pos_fid', 6, 2, 1, 0), + EndTagTable + ))) + + diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index f6847fdca0..0f45b8e494 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath, from calibre.ebooks.oeb.parse_utils import barename from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex, - ChunkIndex, GuideIndex) + ChunkIndex, GuideIndex, NonLinearNCXIndex) from calibre.ebooks.mobi.writer8.mobi import KF8Book from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences from calibre.ebooks.mobi.writer8.toc import TOCAdder @@ -316,9 +316,8 @@ class KF8Writer(object): # Flatten the ToC into a depth first list fl = toc.iterdescendants() for i, item in enumerate(fl): - entry = {'id': id(item), 'index': i, 'href':item.href or '', - 'label':(item.title or _('Unknown')), - 'children':[]} + entry = {'id': id(item), 'index': i, 'label':(item.title or + _('Unknown')), 'children':[]} entry['depth'] = getattr(item, 'ncx_hlvl', 0) p = getattr(item, 'ncx_parent', None) if p is not None: @@ -333,14 +332,45 @@ class KF8Writer(object): if item.description: entry['description'] = item.description entries.append(entry) + href = item.href or '' + href, frag = href.partition('#')[0::2] + aid = self.id_map.get((href, frag), None) + if aid is None: + aid = self.id_map.get((href, ''), None) + if aid is None: + pos, fid = 0, 0 + chunk = self.chunk_table[pos] + offset = chunk.insert_pos + fid + else: + pos, fid, offset = self.aid_offset_map[aid] + + entry['pos_fid'] = (pos, fid) + entry['offset'] = offset # The Kindle requires entries to be sorted by (depth, playorder) - entries.sort(key=lambda entry: (entry['depth'], entry['index'])) + # However, I cannot figure out how to deal with non linear ToCs, i.e. + # ToCs whose nth entry at depth d has an offset after its n+k entry at + # the same depth, so we sort on (depth, offset) instead. This re-orders + # the ToC to be linear. A non-linear ToC causes section to section + # jumping to not work. kindlegen somehow handles non-linear tocs, but I + # cannot figure out how. + original = sorted(entries, + key=lambda entry: (entry['depth'], entry['index'])) + linearized = sorted(entries, + key=lambda entry: (entry['depth'], entry['offset'])) + is_non_linear = original != linearized + entries = linearized + is_non_linear = False # False as we are using the linearized entries + + if is_non_linear: + for entry in entries: + entry['kind'] = 'chapter' + for i, entry in enumerate(entries): entry['index'] = i id_to_index = {entry['id']:entry['index'] for entry in entries} - # Write the hierarchical and start offset information + # Write the hierarchical information for entry in entries: children = entry.pop('children') if children: @@ -348,19 +378,6 @@ class KF8Writer(object): entry['last_child'] = id_to_index[children[-1]] if 'parent_id' in entry: entry['parent'] = id_to_index[entry.pop('parent_id')] - href = entry.pop('href') - href, frag = href.partition('#')[0::2] - aid = self.id_map.get((href, frag), None) - if aid is None: - aid = self.id_map.get((href, ''), None) - if aid is None: - pos, fid = 0, 0 - else: - pos, fid = self.aid_offset_map[aid] - chunk = self.chunk_table[pos] - offset = chunk.insert_pos + fid - entry['pos_fid'] = (pos, fid) - entry['offset'] = offset # Write the lengths def get_next_start(entry): @@ -369,13 +386,13 @@ class KF8Writer(object): if enders: return min(enders) return len(self.flows[0]) - for entry in entries: entry['length'] = get_next_start(entry) - entry['offset'] self.has_tbs = apply_trailing_byte_sequences(entries, self.records, self.uncompressed_record_lengths) - self.ncx_records = NCXIndex(entries)() + idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex + self.ncx_records = idx_type(entries)() def create_guide(self): self.start_offset = None @@ -389,12 +406,9 @@ class KF8Writer(object): aid = self.id_map.get((href, '')) if aid is None: continue - pos, fid = self.aid_offset_map[aid] + pos, fid, offset = self.aid_offset_map[aid] if is_guide_ref_start(ref): - chunk = self.chunk_table[pos] - skel = [s for s in self.skel_table if s.file_number == - chunk.file_number][0] - self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid + self.start_offset = offset self.guide_table.append(GuideRef(ref.title or _('Unknown'), ref.type, (pos, fid))) diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index 8fd4714e1c..ebdaab1672 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -364,12 +364,13 @@ class Chunker(object): pos_fid = None for chunk in self.chunk_table: if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length: - pos_fid = (chunk.sequence_number, offset-chunk.insert_pos) + pos_fid = (chunk.sequence_number, offset-chunk.insert_pos, + offset) break if chunk.insert_pos > offset: # This aid is in the skeleton, not in a chunk, so we use # the chunk immediately after - pos_fid = (chunk.sequence_number, 0) + pos_fid = (chunk.sequence_number, 0, offset) break if pos_fid is None: raise ValueError('Could not find chunk for aid: %r'% @@ -379,7 +380,7 @@ class Chunker(object): self.aid_offset_map = aid_map def to_placeholder(aid): - pos, fid = aid_map[aid] + pos, fid, _ = aid_map[aid] pos, fid = to_base(pos, min_num_digits=4), to_href(fid) return bytes(':off:'.join((pos, fid)))