KF8 Output: Implement joint mobi files with both MOBI 6 and KF8 versions in the same file

This commit is contained in:
Kovid Goyal 2012-04-24 18:00:19 +05:30
parent 2f6a705e74
commit ce4655ddd5
6 changed files with 114 additions and 34 deletions

View File

@ -174,7 +174,8 @@ class MOBIOutput(OutputFormatPlugin):
add_fonts=create_kf8)
self.check_for_periodical()
kf8 = self.create_kf8(resources) if create_kf8 else None
kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
) if create_kf8 else None
if mobi_type == 'new':
kf8.write(output_path)
self.extract_mobi(output_path, opts)
@ -183,9 +184,10 @@ class MOBIOutput(OutputFormatPlugin):
self.log('Creating MOBI 6 output')
self.write_mobi(input_plugin, output_path, kf8, resources)
def create_kf8(self, resources):
def create_kf8(self, resources, for_joint=False):
from calibre.ebooks.mobi.writer8.main import create_kf8_book
return create_kf8_book(self.oeb, self.opts, resources)
return create_kf8_book(self.oeb, self.opts, resources,
for_joint=for_joint)
def write_mobi(self, input_plugin, output_path, kf8, resources):
from calibre.ebooks.mobi.mobiml import MobiMLizer

View File

@ -139,6 +139,8 @@ class MOBIFile(object):
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
def dump_flows(self, ddir):
if self.fdst is None:
raise ValueError('This MOBI file has no FDST record')
for i, x in enumerate(self.fdst.sections):
start, end = x
raw = self.raw_text[start:end]

View File

@ -23,6 +23,7 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer
# Disabled as I dont care about uncrossable breaks
WRITE_UNCROSSABLE_BREAKS = False
NULL_INDEX = 0xffffffff
class MobiWriter(object):
@ -30,6 +31,7 @@ class MobiWriter(object):
self.opts = opts
self.resources = resources
self.kf8 = kf8
self.for_joint = kf8 is not None
self.write_page_breaks_after_item = write_page_breaks_after_item
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
self.prefer_author_sort = opts.prefer_author_sort
@ -61,7 +63,7 @@ class MobiWriter(object):
self.stream = stream
self.records = [None]
self.generate_content()
self.generate_record0()
self.generate_joint_record0() if self.for_joint else self.generate_record0()
self.write_header()
self.write_content()
@ -200,8 +202,6 @@ class MobiWriter(object):
first_image_record = None
if self.resources:
used_images = self.serializer.used_images
if self.kf8 is not None:
used_images |= self.kf8.used_images
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
last_content_record = len(self.records) - 1
@ -365,6 +365,68 @@ class MobiWriter(object):
self.records[0] = align_block(record0)
# }}}
def generate_joint_record0(self): # {{{
from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader,
HEADER_FIELDS)
from calibre.ebooks.mobi.writer8.exth import build_exth
# Insert resource records
first_image_record = None
old = len(self.records)
if self.resources:
used_images = self.serializer.used_images | self.kf8.used_images
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old
# Insert KF8 records
self.records.append(b'BOUNDARY')
kf8_header_index = len(self.records)
self.kf8.start_offset = (self.serializer.start_offset,
self.kf8.start_offset)
self.records.append(self.kf8.record0)
self.records.extend(self.kf8.records[1:])
first_image_record if first_image_record else len(self.records)
header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS}
# Now change the header fields that need to be different in the MOBI 6
# header
header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['extra_data_flags'] = 0b11
for k, v in {'last_text_record':'last_text_record_idx',
'first_non_text_record':'first_non_text_record_idx',
'ncx_index':'primary_index_record_idx',
}.iteritems():
header_fields[k] = getattr(self, v)
for x in ('skel', 'chunk', 'guide'):
header_fields[x+'_index'] = NULL_INDEX
# Create the MOBI 6 EXTH
opts = self.opts
kuc = 0 if resource_record_count > 0 else None
header_fields['exth'] = build_exth(self.oeb.metadata,
prefer_author_sort=opts.prefer_author_sort,
is_periodical=opts.mobi_periodical,
share_not_sync=opts.share_not_sync,
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
num_of_resources=resource_record_count,
kf8_unknown_count=kuc, be_kindlegen2=True,
kf8_header_index=kf8_header_index,
start_offset=self.serializer.start_offset,
mobi_doctype=2)
self.records[0] = MOBIHeader(file_version=6)(**header_fields)
# }}}
def write_header(self): # PalmDB header {{{
'''
Write the PalmDB header

View File

@ -27,6 +27,7 @@ EXTH_CODES = {
'source': 112,
'versionnumber': 114,
'startreading': 116,
'kf8_header_index': 121,
'num_of_resources': 125,
'kf8_unknown_count': 131,
'coveroffset': 201,
@ -41,7 +42,7 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
start_offset=None, mobi_doctype=2, num_of_resources=None,
kf8_unknown_count=0, be_kindlegen2=False):
kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None):
exth = BytesIO()
nrecs = 0
@ -158,10 +159,16 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
except TypeError:
start_offset = [start_offset]
for so in start_offset:
if so is not None:
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
so))
nrecs += 1
if kf8_header_index is not None:
exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
kf8_header_index))
nrecs += 1
if num_of_resources is not None:
exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
num_of_resources))

View File

@ -380,7 +380,7 @@ class KF8Writer(object):
self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle
self.guide_records = GuideIndex(self.guide_table)()
def create_kf8_book(oeb, opts, resources):
def create_kf8_book(oeb, opts, resources, for_joint=False):
writer = KF8Writer(oeb, opts, resources)
return KF8Book(writer)
return KF8Book(writer, for_joint=for_joint)

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
import time, random
from struct import pack
from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
@ -25,8 +25,6 @@ class MOBIHeader(Header): # {{{
the file.
'''
FILE_VERSION = 8
DEFINITION = '''
# 0: Compression
compression = DYN
@ -63,7 +61,7 @@ class MOBIHeader(Header): # {{{
encoding = 65001
# 32: UID
uid = random.randint(0, 0xffffffff)
uid = DYN
# 36: File version
file_version = {file_version}
@ -154,7 +152,7 @@ class MOBIHeader(Header): # {{{
# 0b1 - extra multibyte bytes after text records
# 0b10 - TBS indexing data (only used in MOBI 6)
# 0b100 - uncrossable breaks only used in MOBI 6
extra_data_flags = 1
extra_data_flags = DYN
# 244: KF8 Indices
ncx_index = DYN
@ -171,13 +169,18 @@ class MOBIHeader(Header): # {{{
# Padding to allow amazon's DTP service to add data
padding = zeroes(8192)
'''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
'''
SHORT_FIELDS = {'compression', 'last_text_record', 'record_size',
'encryption_type', 'unused2'}
ALIGN = True
POSITIONS = {'title_offset':'full_title'}
def __init__(self, file_version=8):
self.DEFINITION = self.DEFINITION.format(file_version=file_version,
record_size=RECORD_SIZE)
super(MOBIHeader, self).__init__()
def format_value(self, name, val):
if name == 'compression':
val = PALMDOC if val else UNCOMPRESSED
@ -185,14 +188,20 @@ class MOBIHeader(Header): # {{{
# }}}
# Fields that need to be set in the MOBI Header are
HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type',
'first_non_text_record', 'title_length', 'language_code',
'first_resource_record', 'exth_flags', 'fdst_record',
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
'guide_index', 'exth', 'full_title', 'extra_data_flags',
'uid'}
class KF8Book(object):
def __init__(self, writer):
self.build_records(writer)
def __init__(self, writer, for_joint=False):
self.build_records(writer, for_joint)
self.used_images = writer.used_images
def build_records(self, writer):
def build_records(self, writer, for_joint):
metadata = writer.oeb.metadata
# The text records
for x in ('last_text_record_idx', 'first_non_text_record_idx'):
@ -222,8 +231,10 @@ class KF8Book(object):
self.first_resource_record = NULL_INDEX
if resources.records:
self.first_resource_record = len(self.records)
self.records.extend(resources.records)
self.num_of_resources = len(resources.records)
before = len(self.records)
if not for_joint:
resources.serialize(self.records, writer.used_images)
self.num_of_resources = len(self.records) - before
# FDST
self.fdst_count = writer.fdst_count
@ -233,12 +244,13 @@ class KF8Book(object):
# EOF
self.records.append(b'\xe9\x8e\r\n') # EOF record
# Miscellaneous header fields
self.compression = writer.compress
self.book_type = 0x101 if writer.opts.mobi_periodical else 2
self.full_title = utf8_text(unicode(metadata.title[0]))
self.title_length = len(self.full_title)
self.extra_data_flags = 0b1
self.uid = random.randint(0, 0xffffffff)
self.language_code = iana2mobi(str(metadata.language[0]))
self.exth_flags = 0b1010000
@ -248,14 +260,14 @@ class KF8Book(object):
self.opts = writer.opts
self.start_offset = writer.start_offset
self.metadata = metadata
self.kuc = 0 if len(resources.records) > 0 else None
@property
def record0(self):
''' We generate the EXTH header and record0 dynamically, to allow other
code to customize various values after build_record() has been
code to customize various values after build_records() has been
called'''
opts = self.opts
kuc = 0 if self.num_of_resources > 0 else None
self.exth = build_exth(self.metadata,
prefer_author_sort=opts.prefer_author_sort,
is_periodical=opts.mobi_periodical,
@ -263,15 +275,10 @@ class KF8Book(object):
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
num_of_resources=self.num_of_resources,
kf8_unknown_count=kuc, be_kindlegen2=True,
kf8_unknown_count=self.kuc, be_kindlegen2=True,
start_offset=self.start_offset, mobi_doctype=self.book_type)
kwargs = {field:getattr(self, field) for field in
('compression', 'text_length', 'last_text_record', 'book_type',
'first_non_text_record', 'title_length', 'language_code',
'first_resource_record', 'exth_flags', 'fdst_record',
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
'guide_index', 'exth', 'full_title')}
kwargs = {field:getattr(self, field) for field in HEADER_FIELDS}
return MOBIHeader()(**kwargs)
def write(self, outpath):