KF8 Output: Can now generate standalone KF8 files. There are still bugs that need to be fixed, the produced files are not usable.

This commit is contained in:
Kovid Goyal 2012-04-22 19:30:30 +05:30
parent fe1e290820
commit 57b01c645d
7 changed files with 519 additions and 154 deletions

View File

@ -164,7 +164,8 @@ class MOBIOutput(OutputFormatPlugin):
from calibre.ebooks.mobi.writer2.resources import Resources from calibre.ebooks.mobi.writer2.resources import Resources
self.log, self.opts, self.oeb = log, opts, oeb self.log, self.opts, self.oeb = log, opts, oeb
create_kf8 = tweaks.get('create_kf8', False) mobi_type = tweaks.get('test_mobi_output_type', 'old')
create_kf8 = mobi_type in ('new', 'both')
self.remove_html_cover() self.remove_html_cover()
resources = Resources(oeb, opts, self.is_periodical, resources = Resources(oeb, opts, self.is_periodical,
@ -172,13 +173,17 @@ class MOBIOutput(OutputFormatPlugin):
self.check_for_periodical() self.check_for_periodical()
kf8 = self.create_kf8(resources) if create_kf8 else None kf8 = self.create_kf8(resources) if create_kf8 else None
if mobi_type == 'new':
kf8.write(output_path)
self.extract_mobi(output_path, opts)
return
self.log('Creating MOBI 6 output') self.log('Creating MOBI 6 output')
self.write_mobi(input_plugin, output_path, kf8, resources) self.write_mobi(input_plugin, output_path, kf8, resources)
def create_kf8(self, resources): def create_kf8(self, resources):
from calibre.ebooks.mobi.writer8.main import KF8Writer from calibre.ebooks.mobi.writer8.main import create_kf8_book
return KF8Writer(self.oeb, self.opts, resources) return create_kf8_book(self.oeb, self.opts, resources)
def write_mobi(self, input_plugin, output_path, kf8, resources): def write_mobi(self, input_plugin, output_path, kf8, resources):
from calibre.ebooks.mobi.mobiml import MobiMLizer from calibre.ebooks.mobi.mobiml import MobiMLizer
@ -209,7 +214,9 @@ class MOBIOutput(OutputFormatPlugin):
writer = MobiWriter(opts, resources, kf8, writer = MobiWriter(opts, resources, kf8,
write_page_breaks_after_item=write_page_breaks_after_item) write_page_breaks_after_item=write_page_breaks_after_item)
writer(oeb, output_path) writer(oeb, output_path)
self.extract_mobi(output_path, opts)
def extract_mobi(self, output_path, opts):
if opts.extract_to is not None: if opts.extract_to is not None:
from calibre.ebooks.mobi.debug.main import inspect_mobi from calibre.ebooks.mobi.debug.main import inspect_mobi
ddir = opts.extract_to ddir = opts.extract_to

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, random, time import random, time
from cStringIO import StringIO from cStringIO import StringIO
from struct import pack from struct import pack
@ -21,32 +21,10 @@ from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
align_block, detect_periodical, RECORD_SIZE, create_text_record) align_block, detect_periodical, RECORD_SIZE, create_text_record)
from calibre.ebooks.mobi.writer2.indexer import Indexer from calibre.ebooks.mobi.writer2.indexer import Indexer
EXTH_CODES = {
'creator': 100,
'publisher': 101,
'description': 103,
'identifier': 104,
'subject': 105,
'pubdate': 106,
'review': 107,
'contributor': 108,
'rights': 109,
'type': 111,
'source': 112,
'versionnumber': 114,
'startreading': 116,
'coveroffset': 201,
'thumboffset': 202,
'hasfakecover': 203,
'lastupdatetime': 502,
'title': 503,
}
# Disabled as I dont care about uncrossable breaks # Disabled as I dont care about uncrossable breaks
WRITE_UNCROSSABLE_BREAKS = False WRITE_UNCROSSABLE_BREAKS = False
class MobiWriter(object): class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True): def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
self.opts = opts self.opts = opts
@ -210,7 +188,15 @@ class MobiWriter(object):
# header as well # header as well
bt = 0x103 if self.indexer.is_flat_periodical else 0x101 bt = 0x103 if self.indexer.is_flat_periodical else 0x101
exth = self.build_exth(bt) from calibre.ebooks.mobi.writer8.exth import build_exth
exth = build_exth(metadata,
prefer_author_sort=self.opts.prefer_author_sort,
is_periodical=self.is_periodical,
share_not_sync=self.opts.share_not_sync,
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
start_offset=self.serializer.start_offset, mobi_doctype=bt
)
first_image_record = None first_image_record = None
if self.resources: if self.resources:
used_images = self.serializer.used_images used_images = self.serializer.used_images
@ -379,127 +365,6 @@ class MobiWriter(object):
self.records[0] = align_block(record0) self.records[0] = align_block(record0)
# }}} # }}}
def build_exth(self, mobi_doctype): # EXTH Header {{{
oeb = self.oeb
exth = StringIO()
nrecs = 0
for term in oeb.metadata:
if term not in EXTH_CODES: continue
code = EXTH_CODES[term]
items = oeb.metadata[term]
if term == 'creator':
if self.prefer_author_sort:
creators = [normalize(unicode(c.file_as or c)) for c in
items][:1]
else:
creators = [normalize(unicode(c)) for c in items]
items = ['; '.join(creators)]
for item in items:
data = normalize(unicode(item))
if term != 'description':
data = self.COLLAPSE_RE.sub(' ', data)
if term == 'identifier':
if data.lower().startswith('urn:isbn:'):
data = data[9:]
elif item.scheme.lower() == 'isbn':
pass
else:
continue
data = data.encode('utf-8')
exth.write(pack(b'>II', code, len(data) + 8))
exth.write(data)
nrecs += 1
if term == 'rights' :
try:
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
except:
rights = b'Unknown'
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
exth.write(rights)
nrecs += 1
# Write UUID as ASIN
uuid = None
from calibre.ebooks.oeb.base import OPF
for x in oeb.metadata['identifier']:
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
unicode(x).startswith('urn:uuid:')):
uuid = unicode(x).split(':')[-1]
break
if uuid is None:
from uuid import uuid4
uuid = str(uuid4())
if isinstance(uuid, unicode):
uuid = uuid.encode('utf-8')
if not self.opts.share_not_sync:
exth.write(pack(b'>II', 113, len(uuid) + 8))
exth.write(uuid)
nrecs += 1
# Write cdetype
if not self.is_periodical:
if not self.opts.share_not_sync:
exth.write(pack(b'>II', 501, 12))
exth.write(b'EBOK')
nrecs += 1
else:
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
if ids:
exth.write(pack(b'>II', 501, 12))
exth.write(ids)
nrecs += 1
# Add a publication date entry
if oeb.metadata['date']:
datestr = str(oeb.metadata['date'][0])
elif oeb.metadata['timestamp']:
datestr = str(oeb.metadata['timestamp'][0])
if datestr is None:
raise ValueError("missing date or timestamp")
datestr = bytes(datestr)
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
exth.write(datestr)
nrecs += 1
if self.is_periodical:
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
exth.write(datestr)
nrecs += 1
if self.is_periodical:
# Pretend to be amazon's super secret periodical generator
vals = {204:201, 205:2, 206:0, 207:101}
else:
# Pretend to be kindlegen 1.2
vals = {204:201, 205:1, 206:2, 207:33307}
for code, val in vals.iteritems():
exth.write(pack(b'>III', code, 12, val))
nrecs += 1
if self.cover_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
self.cover_offset))
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
nrecs += 2
if self.thumbnail_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
self.thumbnail_offset))
nrecs += 1
if self.serializer.start_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
self.serializer.start_offset))
nrecs += 1
exth = exth.getvalue()
trail = len(exth) % 4
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
return b''.join(exth)
# }}}
def write_header(self): # PalmDB header {{{ def write_header(self): # PalmDB header {{{
''' '''
Write the PalmDB header Write the PalmDB header

View File

@ -0,0 +1,176 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from struct import pack
from io import BytesIO
from calibre.ebooks.mobi.utils import utf8_text
EXTH_CODES = {
'creator': 100,
'publisher': 101,
'description': 103,
'identifier': 104,
'subject': 105,
'pubdate': 106,
'review': 107,
'contributor': 108,
'rights': 109,
'type': 111,
'source': 112,
'versionnumber': 114,
'startreading': 116,
'num_of_resources': 125,
'kf8_unknown_count': 131,
'coveroffset': 201,
'thumboffset': 202,
'hasfakecover': 203,
'lastupdatetime': 502,
'title': 503,
}
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
start_offset=None, mobi_doctype=2, num_of_resources=None,
kf8_unknown_count=0, be_kindlegen2=False):
exth = BytesIO()
nrecs = 0
for term in metadata:
if term not in EXTH_CODES: continue
code = EXTH_CODES[term]
items = metadata[term]
if term == 'creator':
if prefer_author_sort:
creators = [unicode(c.file_as or c) for c in
items][:1]
else:
creators = [unicode(c) for c in items]
items = ['; '.join(creators)]
for item in items:
data = unicode(item)
if term != 'description':
data = COLLAPSE_RE.sub(' ', data)
if term == 'identifier':
if data.lower().startswith('urn:isbn:'):
data = data[9:]
elif item.scheme.lower() == 'isbn':
pass
else:
continue
data = utf8_text(data)
exth.write(pack(b'>II', code, len(data) + 8))
exth.write(data)
nrecs += 1
if term == 'rights' :
try:
rights = utf8_text(unicode(metadata.rights[0]))
except:
rights = b'Unknown'
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
exth.write(rights)
nrecs += 1
# Write UUID as ASIN
uuid = None
from calibre.ebooks.oeb.base import OPF
for x in metadata['identifier']:
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
unicode(x).startswith('urn:uuid:')):
uuid = unicode(x).split(':')[-1]
break
if uuid is None:
from uuid import uuid4
uuid = str(uuid4())
if isinstance(uuid, unicode):
uuid = uuid.encode('utf-8')
if share_not_sync:
exth.write(pack(b'>II', 113, len(uuid) + 8))
exth.write(uuid)
nrecs += 1
# Write cdetype
if not is_periodical:
if not share_not_sync:
exth.write(pack(b'>II', 501, 12))
exth.write(b'EBOK')
nrecs += 1
else:
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
if ids:
exth.write(pack(b'>II', 501, 12))
exth.write(ids)
nrecs += 1
# Add a publication date entry
if metadata['date']:
datestr = str(metadata['date'][0])
elif metadata['timestamp']:
datestr = str(metadata['timestamp'][0])
if datestr is None:
raise ValueError("missing date or timestamp")
datestr = bytes(datestr)
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
exth.write(datestr)
nrecs += 1
if is_periodical:
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
exth.write(datestr)
nrecs += 1
if be_kindlegen2:
vals = {204:201, 205:2, 206:2, 207:35621}
elif is_periodical:
# Pretend to be amazon's super secret periodical generator
vals = {204:201, 205:2, 206:0, 207:101}
else:
# Pretend to be kindlegen 1.2
vals = {204:201, 205:1, 206:2, 207:33307}
for code, val in vals.iteritems():
exth.write(pack(b'>III', code, 12, val))
nrecs += 1
if cover_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
cover_offset))
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
nrecs += 2
if thumbnail_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
thumbnail_offset))
nrecs += 1
if start_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
start_offset))
nrecs += 1
if num_of_resources is not None:
exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
num_of_resources))
nrecs += 1
if kf8_unknown_count is not None:
exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
kf8_unknown_count))
nrecs += 1
exth = exth.getvalue()
trail = len(exth) % 4
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
return b''.join(exth)

View File

@ -7,6 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random
from io import BytesIO from io import BytesIO
from collections import OrderedDict from collections import OrderedDict
from struct import pack from struct import pack
@ -16,6 +17,7 @@ from calibre.ebooks.mobi.utils import align_block
NULL = 0xffffffff NULL = 0xffffffff
zeroes = lambda x: b'\0'*x zeroes = lambda x: b'\0'*x
nulls = lambda x: b'\xff'*x nulls = lambda x: b'\xff'*x
short = lambda x: pack(b'>H', x)
class Header(OrderedDict): class Header(OrderedDict):
@ -25,7 +27,9 @@ class Header(OrderedDict):
''' '''
ALIGN_BLOCK = False ALIGN_BLOCK = False
POSITIONS = {} POSITIONS = {} # Mapping of position field to field whose position should
# be stored in the position field
SHORT_FIELDS = set()
def __init__(self): def __init__(self):
OrderedDict.__init__(self) OrderedDict.__init__(self)
@ -36,13 +40,17 @@ class Header(OrderedDict):
name, val = [x.strip() for x in line.partition('=')[0::2]] name, val = [x.strip() for x in line.partition('=')[0::2]]
if val: if val:
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None, val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
'nulls':nulls}) 'nulls':nulls, 'short':short, 'random':random})
else: else:
val = 0 val = 0
if name in self: if name in self:
raise ValueError('Duplicate field in definition: %r'%name) raise ValueError('Duplicate field in definition: %r'%name)
self[name] = val self[name] = val
@property
def dynamic_fields(self):
return tuple(k for k, v in self.iteritems() if v is None)
def __call__(self, **kwargs): def __call__(self, **kwargs):
positions = {} positions = {}
for name, val in kwargs.iteritems(): for name, val in kwargs.iteritems():
@ -58,7 +66,8 @@ class Header(OrderedDict):
if val is None: if val is None:
raise ValueError('Dynamic field %r not set'%name) raise ValueError('Dynamic field %r not set'%name)
if isinstance(val, (int, long)): if isinstance(val, (int, long)):
val = pack(b'>I', val) fmt = 'H' if name in self.SHORT_FIELDS else 'I'
val = pack(b'>'+fmt, val)
buf.write(val) buf.write(val)
for pos_field, field in self.POSITIONS.iteritems(): for pos_field, field in self.POSITIONS.iteritems():

View File

@ -182,7 +182,7 @@ class Index(object): # {{{
if len(body) + self.HEADER_LENGTH >= 0x10000: if len(body) + self.HEADER_LENGTH >= 0x10000:
raise too_large raise too_large
header = b'INDX' header = b'INDX'
buf.truncate(0) buf.seek(0), buf.truncate(0)
buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(pack(b'>I', self.HEADER_LENGTH))
buf.write(b'\0'*4) # Unknown buf.write(b'\0'*4) # Unknown
buf.write(pack(b'>I', 1)) # Header type? Or index record number? buf.write(pack(b'>I', 1)) # Header type? Or index record number?

View File

@ -26,6 +26,7 @@ from calibre.ebooks.oeb.parse_utils import barename
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex, from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
ChunkIndex, GuideIndex) ChunkIndex, GuideIndex)
from calibre.ebooks.mobi.writer8.mobi import KF8Book
XML_DOCS = OEB_DOCS | {SVG_MIME} XML_DOCS = OEB_DOCS | {SVG_MIME}
@ -42,7 +43,7 @@ class KF8Writer(object):
self.used_images = set() self.used_images = set()
self.resources = resources self.resources = resources
self.flows = [None] # First flow item is reserved for the text self.flows = [None] # First flow item is reserved for the text
self.records = [] self.records = [None] # Placeholder for zeroth record
self.log('\tGenerating KF8 markup...') self.log('\tGenerating KF8 markup...')
self.dup_data() self.dup_data()
@ -266,9 +267,10 @@ class KF8Writer(object):
start = 0 if i == 0 else self.fdst_table[-1].end start = 0 if i == 0 else self.fdst_table[-1].end
self.fdst_table.append(FDST(start, start + len(flow))) self.fdst_table.append(FDST(start, start + len(flow)))
entries.extend(self.fdst_table[-1]) entries.extend(self.fdst_table[-1])
rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) + rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) +
pack(b'>%dL'%len(entries), *entries)) pack(b'>%dL'%len(entries), *entries))
self.fdst_records = [rec] self.fdst_records = [rec]
self.fdst_count = len(self.fdst_table)
def create_indices(self): def create_indices(self):
self.skel_records = SkelIndex(self.skel_table)() self.skel_records = SkelIndex(self.skel_table)()
@ -347,3 +349,7 @@ class KF8Writer(object):
if self.guide_table: if self.guide_table:
self.guide_records = GuideIndex(self.guide_table)() self.guide_records = GuideIndex(self.guide_table)()
def create_kf8_book(oeb, opts, resources):
writer = KF8Writer(oeb, opts, resources)
return KF8Book(writer)

View File

@ -0,0 +1,302 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
from struct import pack
from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
from calibre.ebooks.mobi.writer8.header import Header
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.writer8.exth import build_exth
from calibre.utils.filenames import ascii_filename
NULL_INDEX = 0xffffffff
class MOBIHeader(Header): # {{{
'''
Represents the first record in a MOBI file, contains all the metadata about
the file.
'''
FILE_VERSION = 8
DEFINITION = '''
# 0: Compression
compression = DYN
# 2: Unused
unused1 = zeroes(2)
# 4: Text length
text_length = DYN
# 8: Last text record
last_text_record = DYN
# 10: Text record size
record_size = {record_size}
# 12: Unused
unused2
# 16: Ident
ident = b'MOBI'
# 20: Header length
header_length = 248
# 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News
# (flat), 0x103 - News magazine same as 0x101)
book_type = DYN
# 28: Text encoding (utf-8 = 65001)
encoding = 65001
# 32: UID
uid = random.randint(0, 0xffffffff)
# 36: File version
file_version = {file_version}
# 40: Meta orth record (Chunk table index in KF8)
meta_orth_record = DYN
# 44: Meta infl index
meta_infl_index = NULL
# 48: Extra indices
extra_index0 = NULL
extra_index1 = NULL
extra_index2 = NULL
extra_index3 = NULL
extra_index4 = NULL
extra_index5 = NULL
extra_index6 = NULL
extra_index7 = NULL
# 80: First non text record
first_non_text_record = DYN
# 84: Title offset
title_offset
# 88: Title Length
title_length = DYN
# 92: Language code
language_code = DYN
# 96: Dictionary in and out languages
in_lang
out_lang
# 104: Min version
min_version = {file_version}
# 108: First resource record
first_resource_record = DYN
# 112: Huff/CDIC compression
huff_first_record
huff_count
# 120: DATP records
datp_first_record
datp_count
# 128: EXTH flags
exth_flags = DYN
# 132: Unknown
unknown = zeroes(32)
# 164: DRM
drm_offset = NULL
drm_count = NULL
drm_size
drm_flags
# 180: Unknown
unknown2 = zeroes(12)
# 192: FDST
fdst_record = DYN
fdst_count = DYN
# 200: FCI
fcis_record = NULL
fcis_count
# 208: FLIS
flis_record = NULL
flis_count
# 216: Unknown
unknown3 = zeroes(8)
# 224: SRCS
srcs_record = NULL
srcs_count
# 232: Unknown
unknown4 = nulls(8)
# 240: Extra data flags
# 0b1 - extra multibyte bytes after text records
# 0b10 - TBS indexing data (only used in MOBI 6)
# 0b100 - uncrossable breaks only used in MOBI 6
extra_data_flags = 1
# 244: KF8 Indices
ncx_index = DYN
chunk_index = DYN
skel_index = DYN
datp_index = NULL
guide_index = DYN
# 264: EXTH
exth = DYN
# Full title
full_title = DYN
# Padding to allow amazon's DTP service to add data
padding = zeroes(8192)
'''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
SHORT_FIELDS = {'compression', 'last_text_record', 'record_size'}
ALIGN = True
POSITIONS = {'title_offset':'full_title'}
def format_value(self, name, val):
if name == 'compression':
val = PALMDOC if val else UNCOMPRESSED
return super(MOBIHeader, self).format_value(name, val)
# }}}
# Fields that need to be set in the MOBI Header are
class KF8Book(object):
def __init__(self, writer):
self.build_records(writer)
def build_records(self, writer):
metadata = writer.oeb.metadata
# The text records
for x in ('last_text_record_idx', 'first_non_text_record_idx'):
setattr(self, x.rpartition('_')[0], getattr(writer, x))
self.records = writer.records
self.text_length = writer.text_length
# KF8 Indices
self.chunk_index = self.meta_orth_record = len(self.records)
self.records.extend(writer.chunk_records)
self.skel_index = len(self.records)
self.records.extend(writer.skel_records)
self.guide_index = NULL_INDEX
if writer.guide_records:
self.guide_index = len(self.records)
self.records.extend(writer.guide_records)
self.ncx_index = NULL_INDEX
if writer.ncx_records:
self.ncx_index = len(self.records)
self.records.extend(writer.ncx_records)
# Resources
resources = writer.resources
for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'):
setattr(self, x, getattr(resources, x))
self.first_resource_record = NULL_INDEX
if resources.records:
self.first_resource_record = len(self.records)
self.records.extend(resources.records)
self.first_resource_record = len(self.records)
self.num_of_resources = len(resources.records)
# FDST
self.fdst_count = writer.fdst_count
self.fdst_record = len(self.records)
self.records.extend(writer.fdst_records)
# EOF
self.records.append(b'\xe9\x8e\r\n') # EOF record
# Miscellaneous header fields
self.compression = writer.compress
self.book_type = 0x101 if writer.opts.mobi_periodical else 2
self.full_title = utf8_text(unicode(metadata.title[0]))
self.title_length = len(self.full_title)
self.language_code = iana2mobi(str(metadata.language[0]))
self.exth_flags = 0b1010000
if writer.opts.mobi_periodical:
self.exth_flags |= 0b1000
self.opts = writer.opts
self.start_offset = writer.start_offset
self.metadata = metadata
@property
def record0(self):
''' We generate the EXTH header and record0 dynamically, to allow other
code to customize various values after build_record() has been
called'''
opts = self.opts
kuc = 0 if self.num_of_resources > 0 else None
self.exth = build_exth(self.metadata,
prefer_author_sort=opts.prefer_author_sort,
is_periodical=opts.mobi_periodical,
share_not_sync=opts.share_not_sync,
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
num_of_resources=self.num_of_resources,
kf8_unknown_count=kuc, be_kindlegen2=True,
start_offset=self.start_offset, mobi_doctype=self.book_type)
kwargs = {field:getattr(self, field) for field in
('compression', 'text_length', 'last_text_record',
'book_type', 'meta_orth_record', 'first_non_text_record',
'title_length', 'language_code', 'first_resource_record',
'exth_flags', 'fdst_record', 'fdst_count', 'ncx_index',
'chunk_index', 'skel_index', 'guide_index', 'exth',
'full_title')}
return MOBIHeader()(**kwargs)
def write(self, outpath):
records = [self.record0] + self.records[1:]
with open(outpath, 'wb') as f:
# Write PalmDB Header
title = ascii_filename(self.full_title.decode('utf-8')).replace(
' ', '_')[:31]
title += (b'\0' * (32 - len(title)))
now = int(time.time())
nrecords = len(records)
f.write(title)
f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
f.write(b'BOOKMOBI')
f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
offset = f.tell() + (8 * nrecords) + 2
for i, record in enumerate(records):
f.write(pack(b'>I', offset))
f.write(b'\0' + pack(b'>I', 2*i)[1:])
offset += len(record)
f.write(b'\0\0')
for rec in records:
f.write(rec)