mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Generate trailing bytes for books with flat ToCs
This commit is contained in:
parent
669fc85958
commit
a883b577a6
@ -82,6 +82,9 @@ class Index(object):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '\n'.join(self.render())
|
return '\n'.join(self.render())
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.records)
|
||||||
|
|
||||||
class SKELIndex(Index):
|
class SKELIndex(Index):
|
||||||
|
|
||||||
def __init__(self, skelidx, records, codec):
|
def __init__(self, skelidx, records, codec):
|
||||||
|
@ -14,7 +14,7 @@ from calibre import CurrentDir
|
|||||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
||||||
GuideIndex)
|
GuideIndex)
|
||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record, decode_tbs
|
||||||
from calibre.ebooks.mobi.debug import format_bytes
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
|
|
||||||
@ -88,6 +88,7 @@ class MOBIFile(object):
|
|||||||
self.read_fdst()
|
self.read_fdst()
|
||||||
self.read_indices()
|
self.read_indices()
|
||||||
self.build_files()
|
self.build_files()
|
||||||
|
self.read_tbs()
|
||||||
|
|
||||||
def print_header(self, f=sys.stdout):
|
def print_header(self, f=sys.stdout):
|
||||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||||
@ -183,6 +184,45 @@ class MOBIFile(object):
|
|||||||
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
|
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
|
||||||
payload))
|
payload))
|
||||||
|
|
||||||
|
def read_tbs(self):
|
||||||
|
from calibre.ebooks.mobi.writer8.tbs import (Entry,
|
||||||
|
collect_indexing_data)
|
||||||
|
entry_map = []
|
||||||
|
for index in self.ncx_index:
|
||||||
|
enders = [e['pos'] for e in self.ncx_index if e['pos'] >
|
||||||
|
index['pos'] and
|
||||||
|
e['hlvl'] <= index['hlvl']]
|
||||||
|
end = min(enders+[len(self.raw_text)])
|
||||||
|
|
||||||
|
entry_map.append(Entry(index=index['num'], title=index['text'],
|
||||||
|
depth=index['hlvl'],
|
||||||
|
parent=index['parent'] if index['parent'] > -1 else None,
|
||||||
|
first_child=index['child1'] if index['child1'] > -1 else None,
|
||||||
|
last_child=index['childn'] if index['childn'] > -1 else None,
|
||||||
|
start=index['pos'], length=end-index['pos']))
|
||||||
|
|
||||||
|
indexing_data = collect_indexing_data(entry_map,
|
||||||
|
len(self.text_records))
|
||||||
|
self.indexing_data = []
|
||||||
|
for i, data in enumerate(indexing_data):
|
||||||
|
rec = self.text_records[i]
|
||||||
|
tbs_bytes = rec.trailing_data.get('indexing', b'')
|
||||||
|
desc = ['Record #%d'%i]
|
||||||
|
for x in ('starts', 'completes', 'ends', 'spans'):
|
||||||
|
points = ['\t%d at depth: %d'%(e.index, e.depth) for e in
|
||||||
|
getattr(data, x)]
|
||||||
|
if points:
|
||||||
|
desc.append(x+':')
|
||||||
|
desc.extend(points)
|
||||||
|
desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
|
||||||
|
val, extra, consumed = decode_tbs(tbs_bytes, flag_size=3)
|
||||||
|
extra = {bin(k):v for k, v in extra.iteritems()}
|
||||||
|
desc.append('First sequence: %r %r'%(val, extra))
|
||||||
|
byts = tbs_bytes[consumed:]
|
||||||
|
if byts:
|
||||||
|
desc.append('Remaining bytes: %s'%format_bytes(byts))
|
||||||
|
desc.append('')
|
||||||
|
self.indexing_data.append('\n'.join(desc))
|
||||||
|
|
||||||
def inspect_mobi(mobi_file, ddir):
|
def inspect_mobi(mobi_file, ddir):
|
||||||
f = MOBIFile(mobi_file)
|
f = MOBIFile(mobi_file)
|
||||||
@ -193,7 +233,8 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(alltext, 'wb') as of:
|
with open(alltext, 'wb') as of:
|
||||||
of.write(f.raw_text)
|
of.write(f.raw_text)
|
||||||
|
|
||||||
for x in ('text_records', 'images', 'fonts', 'binary', 'files', 'flows'):
|
for x in ('text_records', 'images', 'fonts', 'binary', 'files', 'flows',
|
||||||
|
'tbs'):
|
||||||
os.mkdir(os.path.join(ddir, x))
|
os.mkdir(os.path.join(ddir, x))
|
||||||
|
|
||||||
for rec in f.text_records:
|
for rec in f.text_records:
|
||||||
@ -219,6 +260,8 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(os.path.join(ddir, 'guide.record'), 'wb') as fo:
|
with open(os.path.join(ddir, 'guide.record'), 'wb') as fo:
|
||||||
fo.write(str(f.guide_index).encode('utf-8'))
|
fo.write(str(f.guide_index).encode('utf-8'))
|
||||||
|
|
||||||
|
with open(os.path.join(ddir, 'tbs', 'all.txt'), 'wb') as fo:
|
||||||
|
fo.write(('\n'.join(f.indexing_data)).encode('utf-8'))
|
||||||
|
|
||||||
for part in f.files:
|
for part in f.files:
|
||||||
part.dump(os.path.join(ddir, 'files'))
|
part.dump(os.path.join(ddir, 'files'))
|
||||||
|
@ -397,7 +397,10 @@ class MobiWriter(object):
|
|||||||
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
||||||
header_fields['fdst_record'] = NULL_INDEX
|
header_fields['fdst_record'] = NULL_INDEX
|
||||||
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
||||||
header_fields['extra_data_flags'] = 0b11
|
extra_data_flags = 0b1 # Has multibyte overlap bytes
|
||||||
|
if self.primary_index_record_idx is not None:
|
||||||
|
extra_data_flags |= 0b10
|
||||||
|
header_fields['extra_data_flags'] = extra_data_flags
|
||||||
|
|
||||||
for k, v in {'last_text_record':'last_text_record_idx',
|
for k, v in {'last_text_record':'last_text_record_idx',
|
||||||
'first_non_text_record':'first_non_text_record_idx',
|
'first_non_text_record':'first_non_text_record_idx',
|
||||||
|
@ -27,6 +27,7 @@ from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
|||||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||||
ChunkIndex, GuideIndex)
|
ChunkIndex, GuideIndex)
|
||||||
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
||||||
|
from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
|
||||||
|
|
||||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||||
|
|
||||||
@ -39,6 +40,7 @@ class KF8Writer(object):
|
|||||||
def __init__(self, oeb, opts, resources):
|
def __init__(self, oeb, opts, resources):
|
||||||
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
||||||
self.compress = not self.opts.dont_compress
|
self.compress = not self.opts.dont_compress
|
||||||
|
self.has_tbs = False
|
||||||
self.log.info('Creating KF8 output')
|
self.log.info('Creating KF8 output')
|
||||||
self.used_images = set()
|
self.used_images = set()
|
||||||
self.resources = resources
|
self.resources = resources
|
||||||
@ -363,6 +365,8 @@ class KF8Writer(object):
|
|||||||
for entry in entries:
|
for entry in entries:
|
||||||
entry['length'] = get_next_start(entry) - entry['offset']
|
entry['length'] = get_next_start(entry) - entry['offset']
|
||||||
|
|
||||||
|
self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
|
||||||
|
self.last_text_record_idx+1)
|
||||||
self.ncx_records = NCXIndex(entries)()
|
self.ncx_records = NCXIndex(entries)()
|
||||||
|
|
||||||
def create_guide(self):
|
def create_guide(self):
|
||||||
|
@ -250,6 +250,8 @@ class KF8Book(object):
|
|||||||
self.full_title = utf8_text(unicode(metadata.title[0]))
|
self.full_title = utf8_text(unicode(metadata.title[0]))
|
||||||
self.title_length = len(self.full_title)
|
self.title_length = len(self.full_title)
|
||||||
self.extra_data_flags = 0b1
|
self.extra_data_flags = 0b1
|
||||||
|
if writer.has_tbs:
|
||||||
|
self.extra_data_flags |= 0b10
|
||||||
self.uid = random.randint(0, 0xffffffff)
|
self.uid = random.randint(0, 0xffffffff)
|
||||||
|
|
||||||
self.language_code = iana2mobi(str(metadata.language[0]))
|
self.language_code = iana2mobi(str(metadata.language[0]))
|
||||||
|
109
src/calibre/ebooks/mobi/writer8/tbs.py
Normal file
109
src/calibre/ebooks/mobi/writer8/tbs.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.utils import (RECORD_SIZE, encode_trailing_data,
|
||||||
|
encode_tbs)
|
||||||
|
|
||||||
|
Entry = namedtuple('IndexEntry', 'index start length depth parent '
|
||||||
|
'first_child last_child title')
|
||||||
|
Data = namedtuple('Data', 'starts ends completes spans')
|
||||||
|
|
||||||
|
def collect_indexing_data(entries, number_of_text_records):
|
||||||
|
''' For every text record calculate which index entries start, end, span or
|
||||||
|
are contained within that record.'''
|
||||||
|
|
||||||
|
data = []
|
||||||
|
for i in xrange(number_of_text_records):
|
||||||
|
record_start, next_record_start = i*RECORD_SIZE, (i+1)*RECORD_SIZE
|
||||||
|
datum = Data([], [], [], [])
|
||||||
|
data.append(datum)
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
end = entry.start + entry.length - 1
|
||||||
|
if (entry.start >= next_record_start or end < record_start):
|
||||||
|
# This entry does not have any overlap with this record
|
||||||
|
continue
|
||||||
|
if (entry.start < record_start and end >= next_record_start):
|
||||||
|
# This entry spans this record
|
||||||
|
datum.spans.append(entry)
|
||||||
|
continue
|
||||||
|
if (entry.start >= record_start and end < next_record_start):
|
||||||
|
# This entry is contained in this record
|
||||||
|
datum.completes.append(entry)
|
||||||
|
if (entry.start >= record_start and end >= next_record_start):
|
||||||
|
# This entry starts in this record
|
||||||
|
datum.starts.append(entry)
|
||||||
|
continue
|
||||||
|
if (entry.start < record_start and end < next_record_start):
|
||||||
|
# This entry ends in this record
|
||||||
|
datum.ends.append(entry)
|
||||||
|
|
||||||
|
for x in datum:
|
||||||
|
# Should be unnecessary as entries are already in this order, but
|
||||||
|
# best to be safe.
|
||||||
|
x.sort(key=lambda x:x.depth)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def generate_tbs_for_flat_index(indexing_data):
|
||||||
|
ans = []
|
||||||
|
record_type = 8 # 8 for KF8 0 for MOBI 6
|
||||||
|
enc = partial(encode_tbs, flag_size=3)
|
||||||
|
for datum in indexing_data:
|
||||||
|
tbs = b''
|
||||||
|
extra = {0b010 : record_type}
|
||||||
|
if not (datum.starts or datum.ends or datum.completes or datum.spans):
|
||||||
|
# No index entry touches this record
|
||||||
|
pass
|
||||||
|
elif datum.spans:
|
||||||
|
extra[0b001] = 0
|
||||||
|
tbs = enc(datum.spans[0].index, extra)
|
||||||
|
else:
|
||||||
|
starts, ends, completes = datum[:3]
|
||||||
|
if (not completes and len(starts) + len(ends) == 1):
|
||||||
|
# Either has the first or the last index, and no other indices.
|
||||||
|
node = (starts+ends)[0]
|
||||||
|
tbs = enc(node.index, extra)
|
||||||
|
else:
|
||||||
|
# This record contains the end of an index and
|
||||||
|
# some complete index entries. Or it contains some complete
|
||||||
|
# entries and a start. Or it contains an end, a start and
|
||||||
|
# optionally some completes. In every case, we encode the first
|
||||||
|
# entry to touch this record and the number of entries
|
||||||
|
# that touch this record.
|
||||||
|
nodes = starts + completes + ends
|
||||||
|
nodes.sort(key=lambda x:x.index)
|
||||||
|
extra[0b100] = len(nodes)
|
||||||
|
tbs = enc(nodes[0].index, extra)
|
||||||
|
ans.append(tbs)
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def apply_trailing_byte_sequences(index_table, records, number_of_text_records):
|
||||||
|
entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'],
|
||||||
|
r.get('parent', None), r.get('first_child', None), r.get('last_child',
|
||||||
|
None), r['label']) for r in index_table)
|
||||||
|
|
||||||
|
indexing_data = collect_indexing_data(entries, number_of_text_records)
|
||||||
|
max_depth = max(e['depth'] for e in index_table)
|
||||||
|
if max_depth > 0:
|
||||||
|
# TODO: Implement for hierarchical ToCs
|
||||||
|
tbs = []
|
||||||
|
else:
|
||||||
|
tbs = generate_tbs_for_flat_index(indexing_data)
|
||||||
|
if not tbs:
|
||||||
|
return False
|
||||||
|
for i, tbs_bytes in enumerate(tbs):
|
||||||
|
records[i+1] += encode_trailing_data(tbs_bytes)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user