mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Start work on the index layer
This commit is contained in:
parent
6c631e0e64
commit
081897ae57
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, string, imghdr, zlib, os
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
from calibre.ebooks import normalize
|
||||
@ -549,3 +550,48 @@ def create_text_record(text):
|
||||
|
||||
return data, overlap
|
||||
|
||||
class CNCX(object): # {{{
|
||||
|
||||
'''
|
||||
Create the CNCX records. These are records containing all the strings from
|
||||
an index. Each record is of the form: <vwi string size><utf-8 encoded
|
||||
string>
|
||||
'''
|
||||
|
||||
MAX_STRING_LENGTH = 500
|
||||
|
||||
def __init__(self, strings=()):
|
||||
self.strings = OrderedDict((s, 0) for s in strings)
|
||||
|
||||
self.records = []
|
||||
offset = 0
|
||||
buf = BytesIO()
|
||||
for key in tuple(self.strings.iterkeys()):
|
||||
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
|
||||
l = len(utf8)
|
||||
sz_bytes = encint(l)
|
||||
raw = sz_bytes + utf8
|
||||
if 0xfbf8 - buf.tell() < 6 + len(raw):
|
||||
# Records in PDB files cannot be larger than 0x10000, so we
|
||||
# stop well before that.
|
||||
pad = 0xfbf8 - buf.tell()
|
||||
buf.write(b'\0' * pad)
|
||||
self.records.append(buf.getvalue())
|
||||
buf.truncate(0)
|
||||
offset = len(self.records) * 0x10000
|
||||
buf.write(raw)
|
||||
self.strings[key] = offset
|
||||
offset += len(raw)
|
||||
|
||||
self.records.append(align_block(buf.getvalue()))
|
||||
|
||||
def __getitem__(self, string):
|
||||
return self.strings[string]
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.records)
|
||||
__nonzero__ = __bool__
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -13,54 +13,21 @@ from cStringIO import StringIO
|
||||
from collections import OrderedDict, defaultdict
|
||||
|
||||
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
|
||||
encode_tbs, align_block, utf8_text, RECORD_SIZE)
|
||||
encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_)
|
||||
|
||||
class CNCX(object): # {{{
|
||||
|
||||
'''
|
||||
Create the CNCX records. These are records containing all the strings from
|
||||
the NCX. Each record is of the form: <vwi string size><utf-8 encoded
|
||||
string>
|
||||
'''
|
||||
|
||||
MAX_STRING_LENGTH = 500
|
||||
class CNCX(CNCX_): # {{{
|
||||
|
||||
def __init__(self, toc, is_periodical):
|
||||
self.strings = OrderedDict()
|
||||
|
||||
strings = []
|
||||
for item in toc.iterdescendants(breadth_first=True):
|
||||
self.strings[item.title] = 0
|
||||
strings.append(item.title)
|
||||
if is_periodical:
|
||||
self.strings[item.klass] = 0
|
||||
strings.append(item.klass)
|
||||
if item.author:
|
||||
self.strings[item.author] = 0
|
||||
strings.append(item.author)
|
||||
if item.description:
|
||||
self.strings[item.description] = 0
|
||||
|
||||
self.records = []
|
||||
offset = 0
|
||||
buf = StringIO()
|
||||
for key in tuple(self.strings.iterkeys()):
|
||||
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
|
||||
l = len(utf8)
|
||||
sz_bytes = encint(l)
|
||||
raw = sz_bytes + utf8
|
||||
if 0xfbf8 - buf.tell() < 6 + len(raw):
|
||||
# Records in PDB files cannot be larger than 0x10000, so we
|
||||
# stop well before that.
|
||||
pad = 0xfbf8 - buf.tell()
|
||||
buf.write(b'\0' * pad)
|
||||
self.records.append(buf.getvalue())
|
||||
buf.truncate(0)
|
||||
offset = len(self.records) * 0x10000
|
||||
buf.write(raw)
|
||||
self.strings[key] = offset
|
||||
offset += len(raw)
|
||||
|
||||
self.records.append(align_block(buf.getvalue()))
|
||||
|
||||
def __getitem__(self, string):
|
||||
return self.strings[string]
|
||||
strings.append(item.description)
|
||||
CNCX_.__init__(self, strings)
|
||||
# }}}
|
||||
|
||||
class TAGX(object): # {{{
|
||||
|
78
src/calibre/ebooks/mobi/writer8/index.py
Normal file
78
src/calibre/ebooks/mobi/writer8/index.py
Normal file
@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
from future_builtins import map
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import namedtuple
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks.mobi.utils import CNCX
|
||||
|
||||
TagMeta = namedtuple('TagMeta',
|
||||
'name number values_per_entry bitmask end_flag')
|
||||
EndTagTable = TagMeta('eof', 0, 0, 0, 1)
|
||||
|
||||
class Index(object):
|
||||
|
||||
control_byte_count = 1
|
||||
cncx = CNCX()
|
||||
tag_types = (EndTagTable,)
|
||||
|
||||
@classmethod
|
||||
def generate_tagx(cls):
|
||||
header = b'TAGX'
|
||||
byts = bytearray()
|
||||
for tag_meta in cls.tag_types:
|
||||
byts.extend(tag_meta[1:])
|
||||
# table length, control byte count
|
||||
header += pack(b'>II', 12+len(byts), cls.control_byte_count)
|
||||
return header + bytes(byts)
|
||||
|
||||
class SkelIndex(Index):
|
||||
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('chunk_count', 1, 1, 3, 0),
|
||||
('geometry', 6, 2, 12, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
def __init__(self, skel_table):
|
||||
self.entries = [
|
||||
(s.name, {
|
||||
# Dont ask me why these entries have to be repeated twice
|
||||
'chunk_count':(s.chunk_count, s.chunk_count),
|
||||
'geometry':(s.start_pos, s.length, s.start_pos, s.length),
|
||||
}) for s in skel_table
|
||||
]
|
||||
|
||||
|
||||
class ChunkIndex(Index):
|
||||
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('cncx_offset', 2, 1, 1, 0),
|
||||
('file_number', 3, 1, 2, 0),
|
||||
('sequence_number', 4, 1, 4, 0),
|
||||
('geometry', 6, 2, 8, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
def __init__(self, chunk_table):
|
||||
self.cncx = CNCX(c.selector for c in chunk_table)
|
||||
|
||||
self.entries = [
|
||||
('%010d'%c.insert_pos, {
|
||||
|
||||
'cncx_offset':self.cncx[c.selector],
|
||||
'file_number':c.file_number,
|
||||
'sequence_number':c.sequence_number,
|
||||
'geometry':(c.start_pos, c.length),
|
||||
}) for s in chunk_table
|
||||
]
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user