mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Set offsets to tags in the skelton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works.
This commit is contained in:
parent
8283515d51
commit
331bdb2fae
@ -316,6 +316,9 @@ class NCXIndex(Index):
|
||||
desc = entry.get('description', None)
|
||||
if desc:
|
||||
strings.append(desc)
|
||||
kind = entry.get('kind', None)
|
||||
if kind:
|
||||
strings.append(kind)
|
||||
self.cncx = CNCX(strings)
|
||||
|
||||
def to_entry(x):
|
||||
@ -324,7 +327,7 @@ class NCXIndex(Index):
|
||||
'first_child', 'last_child'):
|
||||
if f in x:
|
||||
ans[f] = x[f]
|
||||
for f in ('label', 'description', 'author'):
|
||||
for f in ('label', 'description', 'author', 'kind'):
|
||||
if f in x:
|
||||
ans[f] = self.cncx[x[f]]
|
||||
return ('%02x'%x['index'], ans)
|
||||
@ -333,3 +336,20 @@ class NCXIndex(Index):
|
||||
|
||||
|
||||
|
||||
class NonLinearNCXIndex(NCXIndex):
|
||||
control_byte_count = 2
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('offset', 1, 1, 1, 0),
|
||||
('length', 2, 1, 2, 0),
|
||||
('label', 3, 1, 4, 0),
|
||||
('depth', 4, 1, 8, 0),
|
||||
('kind', 5, 1, 16, 0),
|
||||
('parent', 21, 1, 32, 0),
|
||||
('first_child', 22, 1, 64, 0),
|
||||
('last_child', 23, 1, 128, 0),
|
||||
EndTagTable,
|
||||
('pos_fid', 6, 2, 1, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||
from calibre.ebooks.oeb.parse_utils import barename
|
||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||
ChunkIndex, GuideIndex)
|
||||
ChunkIndex, GuideIndex, NonLinearNCXIndex)
|
||||
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
||||
from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
|
||||
from calibre.ebooks.mobi.writer8.toc import TOCAdder
|
||||
@ -316,9 +316,8 @@ class KF8Writer(object):
|
||||
# Flatten the ToC into a depth first list
|
||||
fl = toc.iterdescendants()
|
||||
for i, item in enumerate(fl):
|
||||
entry = {'id': id(item), 'index': i, 'href':item.href or '',
|
||||
'label':(item.title or _('Unknown')),
|
||||
'children':[]}
|
||||
entry = {'id': id(item), 'index': i, 'label':(item.title or
|
||||
_('Unknown')), 'children':[]}
|
||||
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
|
||||
p = getattr(item, 'ncx_parent', None)
|
||||
if p is not None:
|
||||
@ -333,14 +332,45 @@ class KF8Writer(object):
|
||||
if item.description:
|
||||
entry['description'] = item.description
|
||||
entries.append(entry)
|
||||
href = item.href or ''
|
||||
href, frag = href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''), None)
|
||||
if aid is None:
|
||||
pos, fid = 0, 0
|
||||
chunk = self.chunk_table[pos]
|
||||
offset = chunk.insert_pos + fid
|
||||
else:
|
||||
pos, fid, offset = self.aid_offset_map[aid]
|
||||
|
||||
entry['pos_fid'] = (pos, fid)
|
||||
entry['offset'] = offset
|
||||
|
||||
# The Kindle requires entries to be sorted by (depth, playorder)
|
||||
entries.sort(key=lambda entry: (entry['depth'], entry['index']))
|
||||
# However, I cannot figure out how to deal with non linear ToCs, i.e.
|
||||
# ToCs whose nth entry at depth d has an offset after its n+k entry at
|
||||
# the same depth, so we sort on (depth, offset) instead. This re-orders
|
||||
# the ToC to be linear. A non-linear ToC causes section to section
|
||||
# jumping to not work. kindlegen somehow handles non-linear tocs, but I
|
||||
# cannot figure out how.
|
||||
original = sorted(entries,
|
||||
key=lambda entry: (entry['depth'], entry['index']))
|
||||
linearized = sorted(entries,
|
||||
key=lambda entry: (entry['depth'], entry['offset']))
|
||||
is_non_linear = original != linearized
|
||||
entries = linearized
|
||||
is_non_linear = False # False as we are using the linearized entries
|
||||
|
||||
if is_non_linear:
|
||||
for entry in entries:
|
||||
entry['kind'] = 'chapter'
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
entry['index'] = i
|
||||
id_to_index = {entry['id']:entry['index'] for entry in entries}
|
||||
|
||||
# Write the hierarchical and start offset information
|
||||
# Write the hierarchical information
|
||||
for entry in entries:
|
||||
children = entry.pop('children')
|
||||
if children:
|
||||
@ -348,19 +378,6 @@ class KF8Writer(object):
|
||||
entry['last_child'] = id_to_index[children[-1]]
|
||||
if 'parent_id' in entry:
|
||||
entry['parent'] = id_to_index[entry.pop('parent_id')]
|
||||
href = entry.pop('href')
|
||||
href, frag = href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''), None)
|
||||
if aid is None:
|
||||
pos, fid = 0, 0
|
||||
else:
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
chunk = self.chunk_table[pos]
|
||||
offset = chunk.insert_pos + fid
|
||||
entry['pos_fid'] = (pos, fid)
|
||||
entry['offset'] = offset
|
||||
|
||||
# Write the lengths
|
||||
def get_next_start(entry):
|
||||
@ -369,13 +386,13 @@ class KF8Writer(object):
|
||||
if enders:
|
||||
return min(enders)
|
||||
return len(self.flows[0])
|
||||
|
||||
for entry in entries:
|
||||
entry['length'] = get_next_start(entry) - entry['offset']
|
||||
|
||||
self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
|
||||
self.uncompressed_record_lengths)
|
||||
self.ncx_records = NCXIndex(entries)()
|
||||
idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
|
||||
self.ncx_records = idx_type(entries)()
|
||||
|
||||
def create_guide(self):
|
||||
self.start_offset = None
|
||||
@ -389,12 +406,9 @@ class KF8Writer(object):
|
||||
aid = self.id_map.get((href, ''))
|
||||
if aid is None:
|
||||
continue
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
pos, fid, offset = self.aid_offset_map[aid]
|
||||
if is_guide_ref_start(ref):
|
||||
chunk = self.chunk_table[pos]
|
||||
skel = [s for s in self.skel_table if s.file_number ==
|
||||
chunk.file_number][0]
|
||||
self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
|
||||
self.start_offset = offset
|
||||
self.guide_table.append(GuideRef(ref.title or
|
||||
_('Unknown'), ref.type, (pos, fid)))
|
||||
|
||||
|
@ -364,12 +364,13 @@ class Chunker(object):
|
||||
pos_fid = None
|
||||
for chunk in self.chunk_table:
|
||||
if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
|
||||
pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
|
||||
pos_fid = (chunk.sequence_number, offset-chunk.insert_pos,
|
||||
offset)
|
||||
break
|
||||
if chunk.insert_pos > offset:
|
||||
# This aid is in the skeleton, not in a chunk, so we use
|
||||
# the chunk immediately after
|
||||
pos_fid = (chunk.sequence_number, 0)
|
||||
pos_fid = (chunk.sequence_number, 0, offset)
|
||||
break
|
||||
if pos_fid is None:
|
||||
raise ValueError('Could not find chunk for aid: %r'%
|
||||
@ -379,7 +380,7 @@ class Chunker(object):
|
||||
self.aid_offset_map = aid_map
|
||||
|
||||
def to_placeholder(aid):
|
||||
pos, fid = aid_map[aid]
|
||||
pos, fid, _ = aid_map[aid]
|
||||
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
||||
return bytes(':off:'.join((pos, fid)))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user