mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Start work on the index layer
This commit is contained in:
parent
6c631e0e64
commit
081897ae57
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import struct, string, imghdr, zlib, os
|
import struct, string, imghdr, zlib, os
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||||
from calibre.ebooks import normalize
|
from calibre.ebooks import normalize
|
||||||
@ -549,3 +550,48 @@ def create_text_record(text):
|
|||||||
|
|
||||||
return data, overlap
|
return data, overlap
|
||||||
|
|
||||||
|
class CNCX(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Create the CNCX records. These are records containing all the strings from
|
||||||
|
an index. Each record is of the form: <vwi string size><utf-8 encoded
|
||||||
|
string>
|
||||||
|
'''
|
||||||
|
|
||||||
|
MAX_STRING_LENGTH = 500
|
||||||
|
|
||||||
|
def __init__(self, strings=()):
|
||||||
|
self.strings = OrderedDict((s, 0) for s in strings)
|
||||||
|
|
||||||
|
self.records = []
|
||||||
|
offset = 0
|
||||||
|
buf = BytesIO()
|
||||||
|
for key in tuple(self.strings.iterkeys()):
|
||||||
|
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
|
||||||
|
l = len(utf8)
|
||||||
|
sz_bytes = encint(l)
|
||||||
|
raw = sz_bytes + utf8
|
||||||
|
if 0xfbf8 - buf.tell() < 6 + len(raw):
|
||||||
|
# Records in PDB files cannot be larger than 0x10000, so we
|
||||||
|
# stop well before that.
|
||||||
|
pad = 0xfbf8 - buf.tell()
|
||||||
|
buf.write(b'\0' * pad)
|
||||||
|
self.records.append(buf.getvalue())
|
||||||
|
buf.truncate(0)
|
||||||
|
offset = len(self.records) * 0x10000
|
||||||
|
buf.write(raw)
|
||||||
|
self.strings[key] = offset
|
||||||
|
offset += len(raw)
|
||||||
|
|
||||||
|
self.records.append(align_block(buf.getvalue()))
|
||||||
|
|
||||||
|
def __getitem__(self, string):
|
||||||
|
return self.strings[string]
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.records)
|
||||||
|
__nonzero__ = __bool__
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,54 +13,21 @@ from cStringIO import StringIO
|
|||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
|
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
|
||||||
encode_tbs, align_block, utf8_text, RECORD_SIZE)
|
encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_)
|
||||||
|
|
||||||
class CNCX(object): # {{{
|
class CNCX(CNCX_): # {{{
|
||||||
|
|
||||||
'''
|
|
||||||
Create the CNCX records. These are records containing all the strings from
|
|
||||||
the NCX. Each record is of the form: <vwi string size><utf-8 encoded
|
|
||||||
string>
|
|
||||||
'''
|
|
||||||
|
|
||||||
MAX_STRING_LENGTH = 500
|
|
||||||
|
|
||||||
def __init__(self, toc, is_periodical):
|
def __init__(self, toc, is_periodical):
|
||||||
self.strings = OrderedDict()
|
strings = []
|
||||||
|
|
||||||
for item in toc.iterdescendants(breadth_first=True):
|
for item in toc.iterdescendants(breadth_first=True):
|
||||||
self.strings[item.title] = 0
|
strings.append(item.title)
|
||||||
if is_periodical:
|
if is_periodical:
|
||||||
self.strings[item.klass] = 0
|
strings.append(item.klass)
|
||||||
if item.author:
|
if item.author:
|
||||||
self.strings[item.author] = 0
|
strings.append(item.author)
|
||||||
if item.description:
|
if item.description:
|
||||||
self.strings[item.description] = 0
|
strings.append(item.description)
|
||||||
|
CNCX_.__init__(self, strings)
|
||||||
self.records = []
|
|
||||||
offset = 0
|
|
||||||
buf = StringIO()
|
|
||||||
for key in tuple(self.strings.iterkeys()):
|
|
||||||
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
|
|
||||||
l = len(utf8)
|
|
||||||
sz_bytes = encint(l)
|
|
||||||
raw = sz_bytes + utf8
|
|
||||||
if 0xfbf8 - buf.tell() < 6 + len(raw):
|
|
||||||
# Records in PDB files cannot be larger than 0x10000, so we
|
|
||||||
# stop well before that.
|
|
||||||
pad = 0xfbf8 - buf.tell()
|
|
||||||
buf.write(b'\0' * pad)
|
|
||||||
self.records.append(buf.getvalue())
|
|
||||||
buf.truncate(0)
|
|
||||||
offset = len(self.records) * 0x10000
|
|
||||||
buf.write(raw)
|
|
||||||
self.strings[key] = offset
|
|
||||||
offset += len(raw)
|
|
||||||
|
|
||||||
self.records.append(align_block(buf.getvalue()))
|
|
||||||
|
|
||||||
def __getitem__(self, string):
|
|
||||||
return self.strings[string]
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class TAGX(object): # {{{
|
class TAGX(object): # {{{
|
||||||
|
78
src/calibre/ebooks/mobi/writer8/index.py
Normal file
78
src/calibre/ebooks/mobi/writer8/index.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
from future_builtins import map
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
|
from struct import pack
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.utils import CNCX
|
||||||
|
|
||||||
|
TagMeta = namedtuple('TagMeta',
|
||||||
|
'name number values_per_entry bitmask end_flag')
|
||||||
|
EndTagTable = TagMeta('eof', 0, 0, 0, 1)
|
||||||
|
|
||||||
|
class Index(object):
|
||||||
|
|
||||||
|
control_byte_count = 1
|
||||||
|
cncx = CNCX()
|
||||||
|
tag_types = (EndTagTable,)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_tagx(cls):
|
||||||
|
header = b'TAGX'
|
||||||
|
byts = bytearray()
|
||||||
|
for tag_meta in cls.tag_types:
|
||||||
|
byts.extend(tag_meta[1:])
|
||||||
|
# table length, control byte count
|
||||||
|
header += pack(b'>II', 12+len(byts), cls.control_byte_count)
|
||||||
|
return header + bytes(byts)
|
||||||
|
|
||||||
|
class SkelIndex(Index):
|
||||||
|
|
||||||
|
tag_types = tuple(map(TagMeta, (
|
||||||
|
('chunk_count', 1, 1, 3, 0),
|
||||||
|
('geometry', 6, 2, 12, 0),
|
||||||
|
EndTagTable
|
||||||
|
)))
|
||||||
|
|
||||||
|
def __init__(self, skel_table):
|
||||||
|
self.entries = [
|
||||||
|
(s.name, {
|
||||||
|
# Dont ask me why these entries have to be repeated twice
|
||||||
|
'chunk_count':(s.chunk_count, s.chunk_count),
|
||||||
|
'geometry':(s.start_pos, s.length, s.start_pos, s.length),
|
||||||
|
}) for s in skel_table
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkIndex(Index):
|
||||||
|
|
||||||
|
tag_types = tuple(map(TagMeta, (
|
||||||
|
('cncx_offset', 2, 1, 1, 0),
|
||||||
|
('file_number', 3, 1, 2, 0),
|
||||||
|
('sequence_number', 4, 1, 4, 0),
|
||||||
|
('geometry', 6, 2, 8, 0),
|
||||||
|
EndTagTable
|
||||||
|
)))
|
||||||
|
|
||||||
|
def __init__(self, chunk_table):
|
||||||
|
self.cncx = CNCX(c.selector for c in chunk_table)
|
||||||
|
|
||||||
|
self.entries = [
|
||||||
|
('%010d'%c.insert_pos, {
|
||||||
|
|
||||||
|
'cncx_offset':self.cncx[c.selector],
|
||||||
|
'file_number':c.file_number,
|
||||||
|
'sequence_number':c.sequence_number,
|
||||||
|
'geometry':(c.start_pos, c.length),
|
||||||
|
}) for s in chunk_table
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user