diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 98a837e1a3..b73d6341f9 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -174,7 +174,8 @@ class MOBIOutput(OutputFormatPlugin): add_fonts=create_kf8) self.check_for_periodical() - kf8 = self.create_kf8(resources) if create_kf8 else None + kf8 = self.create_kf8(resources, for_joint=mobi_type=='both' + ) if create_kf8 else None if mobi_type == 'new': kf8.write(output_path) self.extract_mobi(output_path, opts) @@ -183,9 +184,10 @@ class MOBIOutput(OutputFormatPlugin): self.log('Creating MOBI 6 output') self.write_mobi(input_plugin, output_path, kf8, resources) - def create_kf8(self, resources): + def create_kf8(self, resources, for_joint=False): from calibre.ebooks.mobi.writer8.main import create_kf8_book - return create_kf8_book(self.oeb, self.opts, resources) + return create_kf8_book(self.oeb, self.opts, resources, + for_joint=for_joint) def write_mobi(self, input_plugin, output_path, kf8, resources): from calibre.ebooks.mobi.mobiml import MobiMLizer diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index a91213f889..21ed11fc51 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -139,6 +139,8 @@ class MOBIFile(object): self.files.append(File(skel, skeleton, ftext, first_aid, sections)) def dump_flows(self, ddir): + if self.fdst is None: + raise ValueError('This MOBI file has no FDST record') for i, x in enumerate(self.fdst.sections): start, end = x raw = self.raw_text[start:end] diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index a8fc37ff45..f064fd2625 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -23,6 +23,7 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer # Disabled as I dont care about uncrossable breaks WRITE_UNCROSSABLE_BREAKS = False +NULL_INDEX = 0xffffffff class MobiWriter(object): @@ -30,6 +31,7 @@ class MobiWriter(object): self.opts = opts self.resources = resources self.kf8 = kf8 + self.for_joint = kf8 is not None self.write_page_breaks_after_item = write_page_breaks_after_item self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC self.prefer_author_sort = opts.prefer_author_sort @@ -61,7 +63,7 @@ class MobiWriter(object): self.stream = stream self.records = [None] self.generate_content() - self.generate_record0() + self.generate_joint_record0() if self.for_joint else self.generate_record0() self.write_header() self.write_content() @@ -200,8 +202,6 @@ class MobiWriter(object): first_image_record = None if self.resources: used_images = self.serializer.used_images - if self.kf8 is not None: - used_images |= self.kf8.used_images first_image_record = len(self.records) self.resources.serialize(self.records, used_images) last_content_record = len(self.records) - 1 @@ -365,6 +365,68 @@ class MobiWriter(object): self.records[0] = align_block(record0) # }}} + def generate_joint_record0(self): # {{{ + from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader, + HEADER_FIELDS) + from calibre.ebooks.mobi.writer8.exth import build_exth + + # Insert resource records + first_image_record = None + old = len(self.records) + if self.resources: + used_images = self.serializer.used_images | self.kf8.used_images + first_image_record = len(self.records) + self.resources.serialize(self.records, used_images) + resource_record_count = len(self.records) - old + + # Insert KF8 records + self.records.append(b'BOUNDARY') + kf8_header_index = len(self.records) + self.kf8.start_offset = (self.serializer.start_offset, + self.kf8.start_offset) + self.records.append(self.kf8.record0) + self.records.extend(self.kf8.records[1:]) + + first_image_record if first_image_record else len(self.records) + + header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS} + + # Now change the header fields that need to be different in the MOBI 6 + # header + header_fields['first_resource_record'] = first_image_record + header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this + header_fields['fdst_record'] = NULL_INDEX + header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1 + header_fields['extra_data_flags'] = 0b11 + + for k, v in {'last_text_record':'last_text_record_idx', + 'first_non_text_record':'first_non_text_record_idx', + 'ncx_index':'primary_index_record_idx', + }.iteritems(): + header_fields[k] = getattr(self, v) + + for x in ('skel', 'chunk', 'guide'): + header_fields[x+'_index'] = NULL_INDEX + + # Create the MOBI 6 EXTH + opts = self.opts + kuc = 0 if resource_record_count > 0 else None + + header_fields['exth'] = build_exth(self.oeb.metadata, + prefer_author_sort=opts.prefer_author_sort, + is_periodical=opts.mobi_periodical, + share_not_sync=opts.share_not_sync, + cover_offset=self.cover_offset, + thumbnail_offset=self.thumbnail_offset, + num_of_resources=resource_record_count, + kf8_unknown_count=kuc, be_kindlegen2=True, + kf8_header_index=kf8_header_index, + start_offset=self.serializer.start_offset, + mobi_doctype=2) + self.records[0] = MOBIHeader(file_version=6)(**header_fields) + + # }}} + def write_header(self): # PalmDB header {{{ ''' Write the PalmDB header diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py index b469c01d85..4c0c769668 100644 --- a/src/calibre/ebooks/mobi/writer8/exth.py +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -27,6 +27,7 @@ EXTH_CODES = { 'source': 112, 'versionnumber': 114, 'startreading': 116, + 'kf8_header_index': 121, 'num_of_resources': 125, 'kf8_unknown_count': 131, 'coveroffset': 201, @@ -41,7 +42,7 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') def build_exth(metadata, prefer_author_sort=False, is_periodical=False, share_not_sync=True, cover_offset=None, thumbnail_offset=None, start_offset=None, mobi_doctype=2, num_of_resources=None, - kf8_unknown_count=0, be_kindlegen2=False): + kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None): exth = BytesIO() nrecs = 0 @@ -158,9 +159,15 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False, except TypeError: start_offset = [start_offset] for so in start_offset: - exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, - so)) - nrecs += 1 + if so is not None: + exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, + so)) + nrecs += 1 + + if kf8_header_index is not None: + exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12, + kf8_header_index)) + nrecs += 1 if num_of_resources is not None: exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12, diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 19d7e390a9..e35ab5e437 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -380,7 +380,7 @@ class KF8Writer(object): self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle self.guide_records = GuideIndex(self.guide_table)() -def create_kf8_book(oeb, opts, resources): +def create_kf8_book(oeb, opts, resources, for_joint=False): writer = KF8Writer(oeb, opts, resources) - return KF8Book(writer) + return KF8Book(writer, for_joint=for_joint) diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py index 1bb83ad4b6..ff096f350b 100644 --- a/src/calibre/ebooks/mobi/writer8/mobi.py +++ b/src/calibre/ebooks/mobi/writer8/mobi.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import time +import time, random from struct import pack from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text @@ -25,8 +25,6 @@ class MOBIHeader(Header): # {{{ the file. ''' - FILE_VERSION = 8 - DEFINITION = ''' # 0: Compression compression = DYN @@ -63,7 +61,7 @@ class MOBIHeader(Header): # {{{ encoding = 65001 # 32: UID - uid = random.randint(0, 0xffffffff) + uid = DYN # 36: File version file_version = {file_version} @@ -154,7 +152,7 @@ class MOBIHeader(Header): # {{{ # 0b1 - extra multibyte bytes after text records # 0b10 - TBS indexing data (only used in MOBI 6) # 0b100 - uncrossable breaks only used in MOBI 6 - extra_data_flags = 1 + extra_data_flags = DYN # 244: KF8 Indices ncx_index = DYN @@ -171,13 +169,18 @@ class MOBIHeader(Header): # {{{ # Padding to allow amazon's DTP service to add data padding = zeroes(8192) - '''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION) + ''' SHORT_FIELDS = {'compression', 'last_text_record', 'record_size', 'encryption_type', 'unused2'} ALIGN = True POSITIONS = {'title_offset':'full_title'} + def __init__(self, file_version=8): + self.DEFINITION = self.DEFINITION.format(file_version=file_version, + record_size=RECORD_SIZE) + super(MOBIHeader, self).__init__() + def format_value(self, name, val): if name == 'compression': val = PALMDOC if val else UNCOMPRESSED @@ -185,14 +188,20 @@ class MOBIHeader(Header): # {{{ # }}} -# Fields that need to be set in the MOBI Header are +HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type', + 'first_non_text_record', 'title_length', 'language_code', + 'first_resource_record', 'exth_flags', 'fdst_record', + 'fdst_count', 'ncx_index', 'chunk_index', 'skel_index', + 'guide_index', 'exth', 'full_title', 'extra_data_flags', + 'uid'} class KF8Book(object): - def __init__(self, writer): - self.build_records(writer) + def __init__(self, writer, for_joint=False): + self.build_records(writer, for_joint) + self.used_images = writer.used_images - def build_records(self, writer): + def build_records(self, writer, for_joint): metadata = writer.oeb.metadata # The text records for x in ('last_text_record_idx', 'first_non_text_record_idx'): @@ -222,8 +231,10 @@ class KF8Book(object): self.first_resource_record = NULL_INDEX if resources.records: self.first_resource_record = len(self.records) - self.records.extend(resources.records) - self.num_of_resources = len(resources.records) + before = len(self.records) + if not for_joint: + resources.serialize(self.records, writer.used_images) + self.num_of_resources = len(self.records) - before # FDST self.fdst_count = writer.fdst_count @@ -233,12 +244,13 @@ class KF8Book(object): # EOF self.records.append(b'\xe9\x8e\r\n') # EOF record - # Miscellaneous header fields self.compression = writer.compress self.book_type = 0x101 if writer.opts.mobi_periodical else 2 self.full_title = utf8_text(unicode(metadata.title[0])) self.title_length = len(self.full_title) + self.extra_data_flags = 0b1 + self.uid = random.randint(0, 0xffffffff) self.language_code = iana2mobi(str(metadata.language[0])) self.exth_flags = 0b1010000 @@ -248,14 +260,14 @@ class KF8Book(object): self.opts = writer.opts self.start_offset = writer.start_offset self.metadata = metadata + self.kuc = 0 if len(resources.records) > 0 else None @property def record0(self): ''' We generate the EXTH header and record0 dynamically, to allow other - code to customize various values after build_record() has been + code to customize various values after build_records() has been called''' opts = self.opts - kuc = 0 if self.num_of_resources > 0 else None self.exth = build_exth(self.metadata, prefer_author_sort=opts.prefer_author_sort, is_periodical=opts.mobi_periodical, @@ -263,15 +275,10 @@ class KF8Book(object): cover_offset=self.cover_offset, thumbnail_offset=self.thumbnail_offset, num_of_resources=self.num_of_resources, - kf8_unknown_count=kuc, be_kindlegen2=True, + kf8_unknown_count=self.kuc, be_kindlegen2=True, start_offset=self.start_offset, mobi_doctype=self.book_type) - kwargs = {field:getattr(self, field) for field in - ('compression', 'text_length', 'last_text_record', 'book_type', - 'first_non_text_record', 'title_length', 'language_code', - 'first_resource_record', 'exth_flags', 'fdst_record', - 'fdst_count', 'ncx_index', 'chunk_index', 'skel_index', - 'guide_index', 'exth', 'full_title')} + kwargs = {field:getattr(self, field) for field in HEADER_FIELDS} return MOBIHeader()(**kwargs) def write(self, outpath):