KF8 Output: Set offsets to tags in the skelton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works.

This commit is contained in:
Kovid Goyal 2012-05-21 15:16:41 +05:30
parent 8283515d51
commit 331bdb2fae
3 changed files with 65 additions and 30 deletions

View File

@ -316,6 +316,9 @@ class NCXIndex(Index):
desc = entry.get('description', None)
if desc:
strings.append(desc)
kind = entry.get('kind', None)
if kind:
strings.append(kind)
self.cncx = CNCX(strings)
def to_entry(x):
@ -324,7 +327,7 @@ class NCXIndex(Index):
'first_child', 'last_child'):
if f in x:
ans[f] = x[f]
for f in ('label', 'description', 'author'):
for f in ('label', 'description', 'author', 'kind'):
if f in x:
ans[f] = self.cncx[x[f]]
return ('%02x'%x['index'], ans)
@ -333,3 +336,20 @@ class NCXIndex(Index):
class NonLinearNCXIndex(NCXIndex):
control_byte_count = 2
tag_types = tuple(map(TagMeta, (
('offset', 1, 1, 1, 0),
('length', 2, 1, 2, 0),
('label', 3, 1, 4, 0),
('depth', 4, 1, 8, 0),
('kind', 5, 1, 16, 0),
('parent', 21, 1, 32, 0),
('first_child', 22, 1, 64, 0),
('last_child', 23, 1, 128, 0),
EndTagTable,
('pos_fid', 6, 2, 1, 0),
EndTagTable
)))

View File

@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
from calibre.ebooks.oeb.parse_utils import barename
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
ChunkIndex, GuideIndex)
ChunkIndex, GuideIndex, NonLinearNCXIndex)
from calibre.ebooks.mobi.writer8.mobi import KF8Book
from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
from calibre.ebooks.mobi.writer8.toc import TOCAdder
@ -316,9 +316,8 @@ class KF8Writer(object):
# Flatten the ToC into a depth first list
fl = toc.iterdescendants()
for i, item in enumerate(fl):
entry = {'id': id(item), 'index': i, 'href':item.href or '',
'label':(item.title or _('Unknown')),
'children':[]}
entry = {'id': id(item), 'index': i, 'label':(item.title or
_('Unknown')), 'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
p = getattr(item, 'ncx_parent', None)
if p is not None:
@ -333,14 +332,45 @@ class KF8Writer(object):
if item.description:
entry['description'] = item.description
entries.append(entry)
href = item.href or ''
href, frag = href.partition('#')[0::2]
aid = self.id_map.get((href, frag), None)
if aid is None:
aid = self.id_map.get((href, ''), None)
if aid is None:
pos, fid = 0, 0
chunk = self.chunk_table[pos]
offset = chunk.insert_pos + fid
else:
pos, fid, offset = self.aid_offset_map[aid]
entry['pos_fid'] = (pos, fid)
entry['offset'] = offset
# The Kindle requires entries to be sorted by (depth, playorder)
entries.sort(key=lambda entry: (entry['depth'], entry['index']))
# However, I cannot figure out how to deal with non linear ToCs, i.e.
# ToCs whose nth entry at depth d has an offset after its n+k entry at
# the same depth, so we sort on (depth, offset) instead. This re-orders
# the ToC to be linear. A non-linear ToC causes section to section
# jumping to not work. kindlegen somehow handles non-linear tocs, but I
# cannot figure out how.
original = sorted(entries,
key=lambda entry: (entry['depth'], entry['index']))
linearized = sorted(entries,
key=lambda entry: (entry['depth'], entry['offset']))
is_non_linear = original != linearized
entries = linearized
is_non_linear = False # False as we are using the linearized entries
if is_non_linear:
for entry in entries:
entry['kind'] = 'chapter'
for i, entry in enumerate(entries):
entry['index'] = i
id_to_index = {entry['id']:entry['index'] for entry in entries}
# Write the hierarchical and start offset information
# Write the hierarchical information
for entry in entries:
children = entry.pop('children')
if children:
@ -348,19 +378,6 @@ class KF8Writer(object):
entry['last_child'] = id_to_index[children[-1]]
if 'parent_id' in entry:
entry['parent'] = id_to_index[entry.pop('parent_id')]
href = entry.pop('href')
href, frag = href.partition('#')[0::2]
aid = self.id_map.get((href, frag), None)
if aid is None:
aid = self.id_map.get((href, ''), None)
if aid is None:
pos, fid = 0, 0
else:
pos, fid = self.aid_offset_map[aid]
chunk = self.chunk_table[pos]
offset = chunk.insert_pos + fid
entry['pos_fid'] = (pos, fid)
entry['offset'] = offset
# Write the lengths
def get_next_start(entry):
@ -369,13 +386,13 @@ class KF8Writer(object):
if enders:
return min(enders)
return len(self.flows[0])
for entry in entries:
entry['length'] = get_next_start(entry) - entry['offset']
self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
self.uncompressed_record_lengths)
self.ncx_records = NCXIndex(entries)()
idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
self.ncx_records = idx_type(entries)()
def create_guide(self):
self.start_offset = None
@ -389,12 +406,9 @@ class KF8Writer(object):
aid = self.id_map.get((href, ''))
if aid is None:
continue
pos, fid = self.aid_offset_map[aid]
pos, fid, offset = self.aid_offset_map[aid]
if is_guide_ref_start(ref):
chunk = self.chunk_table[pos]
skel = [s for s in self.skel_table if s.file_number ==
chunk.file_number][0]
self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
self.start_offset = offset
self.guide_table.append(GuideRef(ref.title or
_('Unknown'), ref.type, (pos, fid)))

View File

@ -364,12 +364,13 @@ class Chunker(object):
pos_fid = None
for chunk in self.chunk_table:
if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
pos_fid = (chunk.sequence_number, offset-chunk.insert_pos,
offset)
break
if chunk.insert_pos > offset:
# This aid is in the skeleton, not in a chunk, so we use
# the chunk immediately after
pos_fid = (chunk.sequence_number, 0)
pos_fid = (chunk.sequence_number, 0, offset)
break
if pos_fid is None:
raise ValueError('Could not find chunk for aid: %r'%
@ -379,7 +380,7 @@ class Chunker(object):
self.aid_offset_map = aid_map
def to_placeholder(aid):
pos, fid = aid_map[aid]
pos, fid, _ = aid_map[aid]
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
return bytes(':off:'.join((pos, fid)))