A nice framework for generating MOBI header records

This commit is contained in:
Kovid Goyal 2012-04-21 11:15:31 +05:30
parent 5c72ad513b
commit 9ab4ff1840
6 changed files with 206 additions and 11 deletions

View File

@ -169,6 +169,7 @@ class MOBIOutput(OutputFormatPlugin):
self.remove_html_cover() self.remove_html_cover()
resources = Resources(oeb, opts, self.is_periodical, resources = Resources(oeb, opts, self.is_periodical,
add_fonts=create_kf8) add_fonts=create_kf8)
self.check_for_periodical()
kf8 = self.create_kf8(resources) if create_kf8 else None kf8 = self.create_kf8(resources) if create_kf8 else None
@ -203,7 +204,6 @@ class MOBIOutput(OutputFormatPlugin):
resources.add_extra_images() resources.add_extra_images()
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts) mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
from calibre.ebooks.mobi.writer2.main import MobiWriter from calibre.ebooks.mobi.writer2.main import MobiWriter
writer = MobiWriter(opts, resources, kf8, writer = MobiWriter(opts, resources, kf8,

View File

@ -17,7 +17,7 @@ from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
File = namedtuple('File', File = namedtuple('File',
'file_number name divtbl_count start_position length') 'file_number name divtbl_count start_position length')
Elem = namedtuple('Elem', Elem = namedtuple('Chunk',
'insert_pos toc_text file_number sequence_number start_pos ' 'insert_pos toc_text file_number sequence_number start_pos '
'length') 'length')
@ -110,7 +110,7 @@ class SECTIndex(Index):
for i, text in enumerate(self.table.iterkeys()): for i, text in enumerate(self.table.iterkeys()):
tag_map = self.table[text] tag_map = self.table[text]
if set(tag_map.iterkeys()) != {2, 3, 4, 6}: if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
raise ValueError('SECT Index has unknown tags: %s'% raise ValueError('Chunk Index has unknown tags: %s'%
(set(tag_map.iterkeys())-{2, 3, 4, 6})) (set(tag_map.iterkeys())-{2, 3, 4, 6}))
toc_text = self.cncx[tag_map[2][0]] toc_text = self.cncx[tag_map[2][0]]

View File

@ -198,7 +198,7 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'skel.record'), 'wb') as fo: with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
fo.write(str(f.skel_index).encode('utf-8')) fo.write(str(f.skel_index).encode('utf-8'))
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo: with open(os.path.join(ddir, 'chunks.record'), 'wb') as fo:
fo.write(str(f.sect_index).encode('utf-8')) fo.write(str(f.sect_index).encode('utf-8'))
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo: with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:

View File

@ -583,7 +583,9 @@ class CNCX(object): # {{{
self.strings[key] = offset self.strings[key] = offset
offset += len(raw) offset += len(raw)
self.records.append(align_block(buf.getvalue())) val = buf.getvalue()
if val:
self.records.append(align_block(val))
def __getitem__(self, string): def __getitem__(self, string):
return self.strings[string] return self.strings[string]
@ -592,6 +594,9 @@ class CNCX(object): # {{{
return bool(self.records) return bool(self.records)
__nonzero__ = __bool__ __nonzero__ = __bool__
def __len__(self):
return len(self.records)
# }}} # }}}

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from io import BytesIO
from collections import OrderedDict
from struct import pack
from calibre.ebooks.mobi.utils import align_block
NULL = 0xffffffff
zeroes = lambda x: b'\0'*x
nulls = lambda x: b'\xff'*x
class Header(OrderedDict):
HEADER_NAME = b''
DEFINITION = '''
'''
ALIGN_BLOCK = False
POSITIONS = {}
def __init__(self):
OrderedDict.__init__(self)
for line in self.DEFINITION.splitlines():
line = line.strip()
if not line or line.startswith('#'): continue
name, val = [x.strip() for x in line.partition('=')[0::2]]
if val:
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
'nulls':nulls})
else:
val = 0
if name in self:
raise ValueError('Duplicate field in definition: %r'%name)
self[name] = val
def __call__(self, **kwargs):
positions = {}
for name, val in kwargs.iteritems():
if name not in self:
raise KeyError('Not a valid header field: %r'%name)
self[name] = val
buf = BytesIO()
buf.write(bytes(self.HEADER_NAME))
for name, val in self.iteritems():
val = self.format_value(name, val)
positions[name] = buf.tell()
if val is None:
raise ValueError('Dynamic field %r not set'%name)
if isinstance(val, (int, long)):
val = pack(b'>I', val)
buf.write(val)
for pos_field, field in self.POSITIONS.iteritems():
buf.seek(positions[pos_field])
buf.write(pack(b'>I', positions[field]))
ans = buf.getvalue()
if self.ALIGN_BLOCK:
ans = align_block(ans)
return ans
def format_value(self, name, val):
return val

View File

@ -12,7 +12,8 @@ from collections import namedtuple
from struct import pack from struct import pack
from io import BytesIO from io import BytesIO
from calibre.ebooks.mobi.utils import CNCX, encint from calibre.ebooks.mobi.utils import CNCX, encint, align_block
from calibre.ebooks.mobi.writer8.header import Header
TagMeta = namedtuple('TagMeta', TagMeta = namedtuple('TagMeta',
'name number values_per_entry bitmask end_flag') 'name number values_per_entry bitmask end_flag')
@ -23,13 +24,79 @@ EndTagTable = TagMeta('eof', 0, 0, 0, 1)
mask_to_bit_shifts = { 1:0, 2:1, 3:0, 4:2, 8:3, 12:2, 16:4, 32:5, 48:4, 64:6, mask_to_bit_shifts = { 1:0, 2:1, 3:0, 4:2, 8:3, 12:2, 16:4, 32:5, 48:4, 64:6,
128:7, 192: 6 } 128:7, 192: 6 }
class IndexHeader(Header): # {{{
class Index(object): HEADER_NAME = b'INDX'
ALIGN_BLOCK = True
HEADER_LENGTH = 192
DEFINITION = '''
# 4 - 8: Header Length
header_length = {header_length}
# 8 - 16: Unknown
unknown1 = zeroes(8)
# 16 - 20: Index type: 0 - normal 2 - inflection
type = 2
# 20 - 24: IDXT offset (filled in later)
idxt_offset
# 24 - 28: Number of index records
num_of_records = 1
# 28 - 32: Index encoding (65001 = utf-8)
encoding = 65001
# 32 - 36: Unknown
unknown2 = NULL
# 36 - 40: Number of Index entries
num_of_entries = DYN
# 40 - 44: ORDT offset
ordt_offset
# 44 - 48: LIGT offset
ligt_offset
# 48 - 52: Number of ORDT/LIGT? entries
num_of_ordt_entries
# 52 - 56: Number of CNCX records
num_of_cncx = DYN
# 56 - 180: Unknown
unknown3 = zeroes(124)
# 180 - 184: TAGX offset
tagx_offset = {header_length}
# 184 - 192: Unknown
unknown4 = zeroes(8)
# TAGX
tagx = DYN
# Last Index entry
last_index = DYN
# IDXT
idxt = DYN
'''.format(header_length=HEADER_LENGTH)
POSITIONS = {'idxt_offset':'idxt'}
# }}}
class Index(object): # {{{
control_byte_count = 1 control_byte_count = 1
cncx = CNCX() cncx = CNCX()
tag_types = (EndTagTable,) tag_types = (EndTagTable,)
HEADER_LENGTH = IndexHeader.HEADER_LENGTH
@classmethod @classmethod
def generate_tagx(cls): def generate_tagx(cls):
header = b'TAGX' header = b'TAGX'
@ -60,17 +127,18 @@ class Index(object):
control_bytes.append(cbs) control_bytes.append(cbs)
return control_bytes return control_bytes
def build_records(self): def __call__(self):
self.control_bytes = self.calculate_control_bytes_for_each_entry( self.control_bytes = self.calculate_control_bytes_for_each_entry(
self.entries) self.entries)
self.rendered_entries = [] rendered_entries = []
offset = 0 offset = 0
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
IndexEntry = namedtuple('IndexEntry', 'offset length raw') IndexEntry = namedtuple('IndexEntry', 'offset length raw')
for i, x in enumerate(self.entries): for i, x in enumerate(self.entries):
control_bytes = self.control_bytes[i] control_bytes = self.control_bytes[i]
leading_text, tags = x leading_text, tags = x
buf = BytesIO() buf.truncate(0)
raw = bytearray(leading_text) raw = bytearray(leading_text)
raw.insert(0, len(leading_text)) raw.insert(0, len(leading_text))
buf.write(bytes(raw)) buf.write(bytes(raw))
@ -81,8 +149,53 @@ class Index(object):
for val in values: for val in values:
buf.write(encint(val)) buf.write(encint(val))
raw = buf.getvalue() raw = buf.getvalue()
self.rendered_entries.append(IndexEntry(offset, len(raw), raw)) rendered_entries.append(IndexEntry(offset, len(raw), raw))
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
offset += len(raw) offset += len(raw)
index.write(raw)
index_block = align_block(index.getvalue())
idxt_block = align_block(b'IDXT' + idxt.getvalue())
body = index_block + idxt_block
if len(body) + self.HEADER_LENGTH >= 0x10000:
raise ValueError('Index has too many entries, calibre does not'
' support generating multiple index records at this'
' time.')
header = b'INDX'
buf.truncate(0)
buf.write(pack(b'>I', self.HEADER_LENGTH))
buf.write(b'\0'*4) # Unknown
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
buf.write(b'\0'*4) # Unknown
# IDXT block offset
buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))
# Number of index entries
buf.write(pack(b'>I', len(rendered_entries)))
buf.write(b'\xff'*8) # Unknown
buf.write(b'\0'*156) # Unknown
header += buf.getvalue()
index_record = header + body
tagx = self.generate_tagx()
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
b'\0')
header = {
'num_of_entries': len(rendered_entries),
'num_of_cncx': len(self.cncx),
'tagx':tagx,
'idxt':idxt
}
header = IndexHeader()(**header)
self.records = [header, index_record]
self.records.extend(self.cncx.records)
return self.records
# }}}
class SkelIndex(Index): class SkelIndex(Index):