From 081897ae5723958830db099240dd461c521b822f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 20 Apr 2012 22:39:32 +0530 Subject: [PATCH] KF8 Output: Start work on the index layer --- src/calibre/ebooks/mobi/utils.py | 46 +++++++++++++ src/calibre/ebooks/mobi/writer2/indexer.py | 49 +++----------- src/calibre/ebooks/mobi/writer8/index.py | 78 ++++++++++++++++++++++ 3 files changed, 132 insertions(+), 41 deletions(-) create mode 100644 src/calibre/ebooks/mobi/writer8/index.py diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index fe5cd7eaf2..319af30f86 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' import struct, string, imghdr, zlib, os from collections import OrderedDict +from io import BytesIO from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail from calibre.ebooks import normalize @@ -549,3 +550,48 @@ def create_text_record(text): return data, overlap +class CNCX(object): # {{{ + + ''' + Create the CNCX records. These are records containing all the strings from + an index. Each record is of the form: + ''' + + MAX_STRING_LENGTH = 500 + + def __init__(self, strings=()): + self.strings = OrderedDict((s, 0) for s in strings) + + self.records = [] + offset = 0 + buf = BytesIO() + for key in tuple(self.strings.iterkeys()): + utf8 = utf8_text(key[:self.MAX_STRING_LENGTH]) + l = len(utf8) + sz_bytes = encint(l) + raw = sz_bytes + utf8 + if 0xfbf8 - buf.tell() < 6 + len(raw): + # Records in PDB files cannot be larger than 0x10000, so we + # stop well before that. + pad = 0xfbf8 - buf.tell() + buf.write(b'\0' * pad) + self.records.append(buf.getvalue()) + buf.truncate(0) + offset = len(self.records) * 0x10000 + buf.write(raw) + self.strings[key] = offset + offset += len(raw) + + self.records.append(align_block(buf.getvalue())) + + def __getitem__(self, string): + return self.strings[string] + + def __bool__(self): + return bool(self.records) + __nonzero__ = __bool__ + +# }}} + + diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 134fbadc60..be926a80a0 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -13,54 +13,21 @@ from cStringIO import StringIO from collections import OrderedDict, defaultdict from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, - encode_tbs, align_block, utf8_text, RECORD_SIZE) + encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_) -class CNCX(object): # {{{ - - ''' - Create the CNCX records. These are records containing all the strings from - the NCX. Each record is of the form: - ''' - - MAX_STRING_LENGTH = 500 +class CNCX(CNCX_): # {{{ def __init__(self, toc, is_periodical): - self.strings = OrderedDict() - + strings = [] for item in toc.iterdescendants(breadth_first=True): - self.strings[item.title] = 0 + strings.append(item.title) if is_periodical: - self.strings[item.klass] = 0 + strings.append(item.klass) if item.author: - self.strings[item.author] = 0 + strings.append(item.author) if item.description: - self.strings[item.description] = 0 - - self.records = [] - offset = 0 - buf = StringIO() - for key in tuple(self.strings.iterkeys()): - utf8 = utf8_text(key[:self.MAX_STRING_LENGTH]) - l = len(utf8) - sz_bytes = encint(l) - raw = sz_bytes + utf8 - if 0xfbf8 - buf.tell() < 6 + len(raw): - # Records in PDB files cannot be larger than 0x10000, so we - # stop well before that. - pad = 0xfbf8 - buf.tell() - buf.write(b'\0' * pad) - self.records.append(buf.getvalue()) - buf.truncate(0) - offset = len(self.records) * 0x10000 - buf.write(raw) - self.strings[key] = offset - offset += len(raw) - - self.records.append(align_block(buf.getvalue())) - - def __getitem__(self, string): - return self.strings[string] + strings.append(item.description) + CNCX_.__init__(self, strings) # }}} class TAGX(object): # {{{ diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py new file mode 100644 index 0000000000..a2b991a612 --- /dev/null +++ b/src/calibre/ebooks/mobi/writer8/index.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) +from future_builtins import map + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from collections import namedtuple +from struct import pack + +from calibre.ebooks.mobi.utils import CNCX + +TagMeta = namedtuple('TagMeta', + 'name number values_per_entry bitmask end_flag') +EndTagTable = TagMeta('eof', 0, 0, 0, 1) + +class Index(object): + + control_byte_count = 1 + cncx = CNCX() + tag_types = (EndTagTable,) + + @classmethod + def generate_tagx(cls): + header = b'TAGX' + byts = bytearray() + for tag_meta in cls.tag_types: + byts.extend(tag_meta[1:]) + # table length, control byte count + header += pack(b'>II', 12+len(byts), cls.control_byte_count) + return header + bytes(byts) + +class SkelIndex(Index): + + tag_types = tuple(map(TagMeta, ( + ('chunk_count', 1, 1, 3, 0), + ('geometry', 6, 2, 12, 0), + EndTagTable + ))) + + def __init__(self, skel_table): + self.entries = [ + (s.name, { + # Dont ask me why these entries have to be repeated twice + 'chunk_count':(s.chunk_count, s.chunk_count), + 'geometry':(s.start_pos, s.length, s.start_pos, s.length), + }) for s in skel_table + ] + + +class ChunkIndex(Index): + + tag_types = tuple(map(TagMeta, ( + ('cncx_offset', 2, 1, 1, 0), + ('file_number', 3, 1, 2, 0), + ('sequence_number', 4, 1, 4, 0), + ('geometry', 6, 2, 8, 0), + EndTagTable + ))) + + def __init__(self, chunk_table): + self.cncx = CNCX(c.selector for c in chunk_table) + + self.entries = [ + ('%010d'%c.insert_pos, { + + 'cncx_offset':self.cncx[c.selector], + 'file_number':c.file_number, + 'sequence_number':c.sequence_number, + 'geometry':(c.start_pos, c.length), + }) for s in chunk_table + ] + + +