A nice framework for generating MOBI header records

This commit is contained in:
Kovid Goyal 2012-04-21 11:15:31 +05:30
parent 5c72ad513b
commit 9ab4ff1840
6 changed files with 206 additions and 11 deletions

View File

@ -169,6 +169,7 @@ class MOBIOutput(OutputFormatPlugin):
self.remove_html_cover()
resources = Resources(oeb, opts, self.is_periodical,
add_fonts=create_kf8)
self.check_for_periodical()
kf8 = self.create_kf8(resources) if create_kf8 else None
@ -203,7 +204,6 @@ class MOBIOutput(OutputFormatPlugin):
resources.add_extra_images()
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
from calibre.ebooks.mobi.writer2.main import MobiWriter
writer = MobiWriter(opts, resources, kf8,

View File

@ -17,7 +17,7 @@ from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
File = namedtuple('File',
'file_number name divtbl_count start_position length')
Elem = namedtuple('Elem',
Elem = namedtuple('Chunk',
'insert_pos toc_text file_number sequence_number start_pos '
'length')
@ -110,7 +110,7 @@ class SECTIndex(Index):
for i, text in enumerate(self.table.iterkeys()):
tag_map = self.table[text]
if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
raise ValueError('SECT Index has unknown tags: %s'%
raise ValueError('Chunk Index has unknown tags: %s'%
(set(tag_map.iterkeys())-{2, 3, 4, 6}))
toc_text = self.cncx[tag_map[2][0]]

View File

@ -198,7 +198,7 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
fo.write(str(f.skel_index).encode('utf-8'))
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
with open(os.path.join(ddir, 'chunks.record'), 'wb') as fo:
fo.write(str(f.sect_index).encode('utf-8'))
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:

View File

@ -583,7 +583,9 @@ class CNCX(object): # {{{
self.strings[key] = offset
offset += len(raw)
self.records.append(align_block(buf.getvalue()))
val = buf.getvalue()
if val:
self.records.append(align_block(val))
def __getitem__(self, string):
return self.strings[string]
@ -592,6 +594,9 @@ class CNCX(object): # {{{
return bool(self.records)
__nonzero__ = __bool__
def __len__(self):
return len(self.records)
# }}}

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from io import BytesIO
from collections import OrderedDict
from struct import pack
from calibre.ebooks.mobi.utils import align_block
NULL = 0xffffffff
zeroes = lambda x: b'\0'*x
nulls = lambda x: b'\xff'*x
class Header(OrderedDict):
HEADER_NAME = b''
DEFINITION = '''
'''
ALIGN_BLOCK = False
POSITIONS = {}
def __init__(self):
OrderedDict.__init__(self)
for line in self.DEFINITION.splitlines():
line = line.strip()
if not line or line.startswith('#'): continue
name, val = [x.strip() for x in line.partition('=')[0::2]]
if val:
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
'nulls':nulls})
else:
val = 0
if name in self:
raise ValueError('Duplicate field in definition: %r'%name)
self[name] = val
def __call__(self, **kwargs):
positions = {}
for name, val in kwargs.iteritems():
if name not in self:
raise KeyError('Not a valid header field: %r'%name)
self[name] = val
buf = BytesIO()
buf.write(bytes(self.HEADER_NAME))
for name, val in self.iteritems():
val = self.format_value(name, val)
positions[name] = buf.tell()
if val is None:
raise ValueError('Dynamic field %r not set'%name)
if isinstance(val, (int, long)):
val = pack(b'>I', val)
buf.write(val)
for pos_field, field in self.POSITIONS.iteritems():
buf.seek(positions[pos_field])
buf.write(pack(b'>I', positions[field]))
ans = buf.getvalue()
if self.ALIGN_BLOCK:
ans = align_block(ans)
return ans
def format_value(self, name, val):
return val

View File

@ -12,7 +12,8 @@ from collections import namedtuple
from struct import pack
from io import BytesIO
from calibre.ebooks.mobi.utils import CNCX, encint
from calibre.ebooks.mobi.utils import CNCX, encint, align_block
from calibre.ebooks.mobi.writer8.header import Header
TagMeta = namedtuple('TagMeta',
'name number values_per_entry bitmask end_flag')
@ -23,13 +24,79 @@ EndTagTable = TagMeta('eof', 0, 0, 0, 1)
mask_to_bit_shifts = { 1:0, 2:1, 3:0, 4:2, 8:3, 12:2, 16:4, 32:5, 48:4, 64:6,
128:7, 192: 6 }
class IndexHeader(Header): # {{{
class Index(object):
HEADER_NAME = b'INDX'
ALIGN_BLOCK = True
HEADER_LENGTH = 192
DEFINITION = '''
# 4 - 8: Header Length
header_length = {header_length}
# 8 - 16: Unknown
unknown1 = zeroes(8)
# 16 - 20: Index type: 0 - normal 2 - inflection
type = 2
# 20 - 24: IDXT offset (filled in later)
idxt_offset
# 24 - 28: Number of index records
num_of_records = 1
# 28 - 32: Index encoding (65001 = utf-8)
encoding = 65001
# 32 - 36: Unknown
unknown2 = NULL
# 36 - 40: Number of Index entries
num_of_entries = DYN
# 40 - 44: ORDT offset
ordt_offset
# 44 - 48: LIGT offset
ligt_offset
# 48 - 52: Number of ORDT/LIGT? entries
num_of_ordt_entries
# 52 - 56: Number of CNCX records
num_of_cncx = DYN
# 56 - 180: Unknown
unknown3 = zeroes(124)
# 180 - 184: TAGX offset
tagx_offset = {header_length}
# 184 - 192: Unknown
unknown4 = zeroes(8)
# TAGX
tagx = DYN
# Last Index entry
last_index = DYN
# IDXT
idxt = DYN
'''.format(header_length=HEADER_LENGTH)
POSITIONS = {'idxt_offset':'idxt'}
# }}}
class Index(object): # {{{
control_byte_count = 1
cncx = CNCX()
tag_types = (EndTagTable,)
HEADER_LENGTH = IndexHeader.HEADER_LENGTH
@classmethod
def generate_tagx(cls):
header = b'TAGX'
@ -60,17 +127,18 @@ class Index(object):
control_bytes.append(cbs)
return control_bytes
def build_records(self):
def __call__(self):
self.control_bytes = self.calculate_control_bytes_for_each_entry(
self.entries)
self.rendered_entries = []
rendered_entries = []
offset = 0
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
IndexEntry = namedtuple('IndexEntry', 'offset length raw')
for i, x in enumerate(self.entries):
control_bytes = self.control_bytes[i]
leading_text, tags = x
buf = BytesIO()
buf.truncate(0)
raw = bytearray(leading_text)
raw.insert(0, len(leading_text))
buf.write(bytes(raw))
@ -81,8 +149,53 @@ class Index(object):
for val in values:
buf.write(encint(val))
raw = buf.getvalue()
self.rendered_entries.append(IndexEntry(offset, len(raw), raw))
rendered_entries.append(IndexEntry(offset, len(raw), raw))
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
offset += len(raw)
index.write(raw)
index_block = align_block(index.getvalue())
idxt_block = align_block(b'IDXT' + idxt.getvalue())
body = index_block + idxt_block
if len(body) + self.HEADER_LENGTH >= 0x10000:
raise ValueError('Index has too many entries, calibre does not'
' support generating multiple index records at this'
' time.')
header = b'INDX'
buf.truncate(0)
buf.write(pack(b'>I', self.HEADER_LENGTH))
buf.write(b'\0'*4) # Unknown
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
buf.write(b'\0'*4) # Unknown
# IDXT block offset
buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))
# Number of index entries
buf.write(pack(b'>I', len(rendered_entries)))
buf.write(b'\xff'*8) # Unknown
buf.write(b'\0'*156) # Unknown
header += buf.getvalue()
index_record = header + body
tagx = self.generate_tagx()
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
b'\0')
header = {
'num_of_entries': len(rendered_entries),
'num_of_cncx': len(self.cncx),
'tagx':tagx,
'idxt':idxt
}
header = IndexHeader()(**header)
self.records = [header, index_record]
self.records.extend(self.cncx.records)
return self.records
# }}}
class SkelIndex(Index):