mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
A nice framework for generating MOBI header records
This commit is contained in:
parent
5c72ad513b
commit
9ab4ff1840
@ -169,6 +169,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
self.remove_html_cover()
|
||||
resources = Resources(oeb, opts, self.is_periodical,
|
||||
add_fonts=create_kf8)
|
||||
self.check_for_periodical()
|
||||
|
||||
kf8 = self.create_kf8(resources) if create_kf8 else None
|
||||
|
||||
@ -203,7 +204,6 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
resources.add_extra_images()
|
||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||
mobimlizer(oeb, opts)
|
||||
self.check_for_periodical()
|
||||
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
||||
from calibre.ebooks.mobi.writer2.main import MobiWriter
|
||||
writer = MobiWriter(opts, resources, kf8,
|
||||
|
@ -17,7 +17,7 @@ from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
|
||||
File = namedtuple('File',
|
||||
'file_number name divtbl_count start_position length')
|
||||
|
||||
Elem = namedtuple('Elem',
|
||||
Elem = namedtuple('Chunk',
|
||||
'insert_pos toc_text file_number sequence_number start_pos '
|
||||
'length')
|
||||
|
||||
@ -110,7 +110,7 @@ class SECTIndex(Index):
|
||||
for i, text in enumerate(self.table.iterkeys()):
|
||||
tag_map = self.table[text]
|
||||
if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
|
||||
raise ValueError('SECT Index has unknown tags: %s'%
|
||||
raise ValueError('Chunk Index has unknown tags: %s'%
|
||||
(set(tag_map.iterkeys())-{2, 3, 4, 6}))
|
||||
|
||||
toc_text = self.cncx[tag_map[2][0]]
|
||||
|
@ -198,7 +198,7 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
|
||||
fo.write(str(f.skel_index).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
|
||||
with open(os.path.join(ddir, 'chunks.record'), 'wb') as fo:
|
||||
fo.write(str(f.sect_index).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
|
||||
|
@ -583,7 +583,9 @@ class CNCX(object): # {{{
|
||||
self.strings[key] = offset
|
||||
offset += len(raw)
|
||||
|
||||
self.records.append(align_block(buf.getvalue()))
|
||||
val = buf.getvalue()
|
||||
if val:
|
||||
self.records.append(align_block(val))
|
||||
|
||||
def __getitem__(self, string):
|
||||
return self.strings[string]
|
||||
@ -592,6 +594,9 @@ class CNCX(object): # {{{
|
||||
return bool(self.records)
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __len__(self):
|
||||
return len(self.records)
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
77
src/calibre/ebooks/mobi/writer8/header.py
Normal file
77
src/calibre/ebooks/mobi/writer8/header.py
Normal file
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from io import BytesIO
|
||||
from collections import OrderedDict
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks.mobi.utils import align_block
|
||||
|
||||
NULL = 0xffffffff
|
||||
zeroes = lambda x: b'\0'*x
|
||||
nulls = lambda x: b'\xff'*x
|
||||
|
||||
class Header(OrderedDict):
|
||||
|
||||
HEADER_NAME = b''
|
||||
|
||||
DEFINITION = '''
|
||||
'''
|
||||
|
||||
ALIGN_BLOCK = False
|
||||
POSITIONS = {}
|
||||
|
||||
def __init__(self):
|
||||
OrderedDict.__init__(self)
|
||||
|
||||
for line in self.DEFINITION.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'): continue
|
||||
name, val = [x.strip() for x in line.partition('=')[0::2]]
|
||||
if val:
|
||||
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
|
||||
'nulls':nulls})
|
||||
else:
|
||||
val = 0
|
||||
if name in self:
|
||||
raise ValueError('Duplicate field in definition: %r'%name)
|
||||
self[name] = val
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
positions = {}
|
||||
for name, val in kwargs.iteritems():
|
||||
if name not in self:
|
||||
raise KeyError('Not a valid header field: %r'%name)
|
||||
self[name] = val
|
||||
|
||||
buf = BytesIO()
|
||||
buf.write(bytes(self.HEADER_NAME))
|
||||
for name, val in self.iteritems():
|
||||
val = self.format_value(name, val)
|
||||
positions[name] = buf.tell()
|
||||
if val is None:
|
||||
raise ValueError('Dynamic field %r not set'%name)
|
||||
if isinstance(val, (int, long)):
|
||||
val = pack(b'>I', val)
|
||||
buf.write(val)
|
||||
|
||||
for pos_field, field in self.POSITIONS.iteritems():
|
||||
buf.seek(positions[pos_field])
|
||||
buf.write(pack(b'>I', positions[field]))
|
||||
|
||||
ans = buf.getvalue()
|
||||
if self.ALIGN_BLOCK:
|
||||
ans = align_block(ans)
|
||||
return ans
|
||||
|
||||
|
||||
def format_value(self, name, val):
|
||||
return val
|
||||
|
||||
|
@ -12,7 +12,8 @@ from collections import namedtuple
|
||||
from struct import pack
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.ebooks.mobi.utils import CNCX, encint
|
||||
from calibre.ebooks.mobi.utils import CNCX, encint, align_block
|
||||
from calibre.ebooks.mobi.writer8.header import Header
|
||||
|
||||
TagMeta = namedtuple('TagMeta',
|
||||
'name number values_per_entry bitmask end_flag')
|
||||
@ -23,13 +24,79 @@ EndTagTable = TagMeta('eof', 0, 0, 0, 1)
|
||||
mask_to_bit_shifts = { 1:0, 2:1, 3:0, 4:2, 8:3, 12:2, 16:4, 32:5, 48:4, 64:6,
|
||||
128:7, 192: 6 }
|
||||
|
||||
class IndexHeader(Header): # {{{
|
||||
|
||||
class Index(object):
|
||||
HEADER_NAME = b'INDX'
|
||||
ALIGN_BLOCK = True
|
||||
HEADER_LENGTH = 192
|
||||
|
||||
DEFINITION = '''
|
||||
# 4 - 8: Header Length
|
||||
header_length = {header_length}
|
||||
|
||||
# 8 - 16: Unknown
|
||||
unknown1 = zeroes(8)
|
||||
|
||||
# 16 - 20: Index type: 0 - normal 2 - inflection
|
||||
type = 2
|
||||
|
||||
# 20 - 24: IDXT offset (filled in later)
|
||||
idxt_offset
|
||||
|
||||
# 24 - 28: Number of index records
|
||||
num_of_records = 1
|
||||
|
||||
# 28 - 32: Index encoding (65001 = utf-8)
|
||||
encoding = 65001
|
||||
|
||||
# 32 - 36: Unknown
|
||||
unknown2 = NULL
|
||||
|
||||
# 36 - 40: Number of Index entries
|
||||
num_of_entries = DYN
|
||||
|
||||
# 40 - 44: ORDT offset
|
||||
ordt_offset
|
||||
|
||||
# 44 - 48: LIGT offset
|
||||
ligt_offset
|
||||
|
||||
# 48 - 52: Number of ORDT/LIGT? entries
|
||||
num_of_ordt_entries
|
||||
|
||||
# 52 - 56: Number of CNCX records
|
||||
num_of_cncx = DYN
|
||||
|
||||
# 56 - 180: Unknown
|
||||
unknown3 = zeroes(124)
|
||||
|
||||
# 180 - 184: TAGX offset
|
||||
tagx_offset = {header_length}
|
||||
|
||||
# 184 - 192: Unknown
|
||||
unknown4 = zeroes(8)
|
||||
|
||||
# TAGX
|
||||
tagx = DYN
|
||||
|
||||
# Last Index entry
|
||||
last_index = DYN
|
||||
|
||||
# IDXT
|
||||
idxt = DYN
|
||||
'''.format(header_length=HEADER_LENGTH)
|
||||
|
||||
POSITIONS = {'idxt_offset':'idxt'}
|
||||
# }}}
|
||||
|
||||
class Index(object): # {{{
|
||||
|
||||
control_byte_count = 1
|
||||
cncx = CNCX()
|
||||
tag_types = (EndTagTable,)
|
||||
|
||||
HEADER_LENGTH = IndexHeader.HEADER_LENGTH
|
||||
|
||||
@classmethod
|
||||
def generate_tagx(cls):
|
||||
header = b'TAGX'
|
||||
@ -60,17 +127,18 @@ class Index(object):
|
||||
control_bytes.append(cbs)
|
||||
return control_bytes
|
||||
|
||||
def build_records(self):
|
||||
def __call__(self):
|
||||
self.control_bytes = self.calculate_control_bytes_for_each_entry(
|
||||
self.entries)
|
||||
|
||||
self.rendered_entries = []
|
||||
rendered_entries = []
|
||||
offset = 0
|
||||
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
|
||||
IndexEntry = namedtuple('IndexEntry', 'offset length raw')
|
||||
for i, x in enumerate(self.entries):
|
||||
control_bytes = self.control_bytes[i]
|
||||
leading_text, tags = x
|
||||
buf = BytesIO()
|
||||
buf.truncate(0)
|
||||
raw = bytearray(leading_text)
|
||||
raw.insert(0, len(leading_text))
|
||||
buf.write(bytes(raw))
|
||||
@ -81,8 +149,53 @@ class Index(object):
|
||||
for val in values:
|
||||
buf.write(encint(val))
|
||||
raw = buf.getvalue()
|
||||
self.rendered_entries.append(IndexEntry(offset, len(raw), raw))
|
||||
rendered_entries.append(IndexEntry(offset, len(raw), raw))
|
||||
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
|
||||
offset += len(raw)
|
||||
index.write(raw)
|
||||
|
||||
index_block = align_block(index.getvalue())
|
||||
idxt_block = align_block(b'IDXT' + idxt.getvalue())
|
||||
body = index_block + idxt_block
|
||||
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
||||
raise ValueError('Index has too many entries, calibre does not'
|
||||
' support generating multiple index records at this'
|
||||
' time.')
|
||||
|
||||
header = b'INDX'
|
||||
buf.truncate(0)
|
||||
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
||||
buf.write(b'\0'*4) # Unknown
|
||||
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
|
||||
buf.write(b'\0'*4) # Unknown
|
||||
|
||||
# IDXT block offset
|
||||
buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))
|
||||
|
||||
# Number of index entries
|
||||
buf.write(pack(b'>I', len(rendered_entries)))
|
||||
|
||||
buf.write(b'\xff'*8) # Unknown
|
||||
|
||||
buf.write(b'\0'*156) # Unknown
|
||||
|
||||
header += buf.getvalue()
|
||||
index_record = header + body
|
||||
|
||||
tagx = self.generate_tagx()
|
||||
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
|
||||
b'\0')
|
||||
header = {
|
||||
'num_of_entries': len(rendered_entries),
|
||||
'num_of_cncx': len(self.cncx),
|
||||
'tagx':tagx,
|
||||
'idxt':idxt
|
||||
}
|
||||
header = IndexHeader()(**header)
|
||||
self.records = [header, index_record]
|
||||
self.records.extend(self.cncx.records)
|
||||
return self.records
|
||||
# }}}
|
||||
|
||||
class SkelIndex(Index):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user