KF8 Output: Start work on the index layer

This commit is contained in:
Kovid Goyal 2012-04-20 22:39:32 +05:30
parent 6c631e0e64
commit 081897ae57
3 changed files with 132 additions and 41 deletions

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import struct, string, imghdr, zlib, os
from collections import OrderedDict
from io import BytesIO
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
from calibre.ebooks import normalize
@ -549,3 +550,48 @@ def create_text_record(text):
return data, overlap
class CNCX(object): # {{{
'''
Create the CNCX records. These are records containing all the strings from
an index. Each record is of the form: <vwi string size><utf-8 encoded
string>
'''
MAX_STRING_LENGTH = 500
def __init__(self, strings=()):
self.strings = OrderedDict((s, 0) for s in strings)
self.records = []
offset = 0
buf = BytesIO()
for key in tuple(self.strings.iterkeys()):
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
l = len(utf8)
sz_bytes = encint(l)
raw = sz_bytes + utf8
if 0xfbf8 - buf.tell() < 6 + len(raw):
# Records in PDB files cannot be larger than 0x10000, so we
# stop well before that.
pad = 0xfbf8 - buf.tell()
buf.write(b'\0' * pad)
self.records.append(buf.getvalue())
buf.truncate(0)
offset = len(self.records) * 0x10000
buf.write(raw)
self.strings[key] = offset
offset += len(raw)
self.records.append(align_block(buf.getvalue()))
def __getitem__(self, string):
return self.strings[string]
def __bool__(self):
return bool(self.records)
__nonzero__ = __bool__
# }}}

View File

@ -13,54 +13,21 @@ from cStringIO import StringIO
from collections import OrderedDict, defaultdict
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
encode_tbs, align_block, utf8_text, RECORD_SIZE)
encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_)
class CNCX(object): # {{{
'''
Create the CNCX records. These are records containing all the strings from
the NCX. Each record is of the form: <vwi string size><utf-8 encoded
string>
'''
MAX_STRING_LENGTH = 500
class CNCX(CNCX_): # {{{
def __init__(self, toc, is_periodical):
self.strings = OrderedDict()
strings = []
for item in toc.iterdescendants(breadth_first=True):
self.strings[item.title] = 0
strings.append(item.title)
if is_periodical:
self.strings[item.klass] = 0
strings.append(item.klass)
if item.author:
self.strings[item.author] = 0
strings.append(item.author)
if item.description:
self.strings[item.description] = 0
self.records = []
offset = 0
buf = StringIO()
for key in tuple(self.strings.iterkeys()):
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
l = len(utf8)
sz_bytes = encint(l)
raw = sz_bytes + utf8
if 0xfbf8 - buf.tell() < 6 + len(raw):
# Records in PDB files cannot be larger than 0x10000, so we
# stop well before that.
pad = 0xfbf8 - buf.tell()
buf.write(b'\0' * pad)
self.records.append(buf.getvalue())
buf.truncate(0)
offset = len(self.records) * 0x10000
buf.write(raw)
self.strings[key] = offset
offset += len(raw)
self.records.append(align_block(buf.getvalue()))
def __getitem__(self, string):
return self.strings[string]
strings.append(item.description)
CNCX_.__init__(self, strings)
# }}}
class TAGX(object): # {{{

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import namedtuple
from struct import pack
from calibre.ebooks.mobi.utils import CNCX
TagMeta = namedtuple('TagMeta',
'name number values_per_entry bitmask end_flag')
EndTagTable = TagMeta('eof', 0, 0, 0, 1)
class Index(object):
control_byte_count = 1
cncx = CNCX()
tag_types = (EndTagTable,)
@classmethod
def generate_tagx(cls):
header = b'TAGX'
byts = bytearray()
for tag_meta in cls.tag_types:
byts.extend(tag_meta[1:])
# table length, control byte count
header += pack(b'>II', 12+len(byts), cls.control_byte_count)
return header + bytes(byts)
class SkelIndex(Index):
tag_types = tuple(map(TagMeta, (
('chunk_count', 1, 1, 3, 0),
('geometry', 6, 2, 12, 0),
EndTagTable
)))
def __init__(self, skel_table):
self.entries = [
(s.name, {
# Dont ask me why these entries have to be repeated twice
'chunk_count':(s.chunk_count, s.chunk_count),
'geometry':(s.start_pos, s.length, s.start_pos, s.length),
}) for s in skel_table
]
class ChunkIndex(Index):
tag_types = tuple(map(TagMeta, (
('cncx_offset', 2, 1, 1, 0),
('file_number', 3, 1, 2, 0),
('sequence_number', 4, 1, 4, 0),
('geometry', 6, 2, 8, 0),
EndTagTable
)))
def __init__(self, chunk_table):
self.cncx = CNCX(c.selector for c in chunk_table)
self.entries = [
('%010d'%c.insert_pos, {
'cncx_offset':self.cncx[c.selector],
'file_number':c.file_number,
'sequence_number':c.sequence_number,
'geometry':(c.start_pos, c.length),
}) for s in chunk_table
]