From 57b01c645d85dac262b6015c2e5801d3a7d1d660 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Apr 2012 19:30:30 +0530 Subject: [PATCH] KF8 Output: Can now generate standalone KF8 files. There are still bugs that need to be fixed, the produced files are not usable. --- .../ebooks/conversion/plugins/mobi_output.py | 13 +- src/calibre/ebooks/mobi/writer2/main.py | 155 +-------- src/calibre/ebooks/mobi/writer8/exth.py | 176 ++++++++++ src/calibre/ebooks/mobi/writer8/header.py | 15 +- src/calibre/ebooks/mobi/writer8/index.py | 2 +- src/calibre/ebooks/mobi/writer8/main.py | 10 +- src/calibre/ebooks/mobi/writer8/mobi.py | 302 ++++++++++++++++++ 7 files changed, 519 insertions(+), 154 deletions(-) create mode 100644 src/calibre/ebooks/mobi/writer8/exth.py create mode 100644 src/calibre/ebooks/mobi/writer8/mobi.py diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 971d11df3b..4210f7223e 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -164,7 +164,8 @@ class MOBIOutput(OutputFormatPlugin): from calibre.ebooks.mobi.writer2.resources import Resources self.log, self.opts, self.oeb = log, opts, oeb - create_kf8 = tweaks.get('create_kf8', False) + mobi_type = tweaks.get('test_mobi_output_type', 'old') + create_kf8 = mobi_type in ('new', 'both') self.remove_html_cover() resources = Resources(oeb, opts, self.is_periodical, @@ -172,13 +173,17 @@ class MOBIOutput(OutputFormatPlugin): self.check_for_periodical() kf8 = self.create_kf8(resources) if create_kf8 else None + if mobi_type == 'new': + kf8.write(output_path) + self.extract_mobi(output_path, opts) + return self.log('Creating MOBI 6 output') self.write_mobi(input_plugin, output_path, kf8, resources) def create_kf8(self, resources): - from calibre.ebooks.mobi.writer8.main import KF8Writer - return KF8Writer(self.oeb, self.opts, resources) + from calibre.ebooks.mobi.writer8.main import create_kf8_book + return create_kf8_book(self.oeb, self.opts, resources) def write_mobi(self, input_plugin, output_path, kf8, resources): from calibre.ebooks.mobi.mobiml import MobiMLizer @@ -209,7 +214,9 @@ class MOBIOutput(OutputFormatPlugin): writer = MobiWriter(opts, resources, kf8, write_page_breaks_after_item=write_page_breaks_after_item) writer(oeb, output_path) + self.extract_mobi(output_path, opts) + def extract_mobi(self, output_path, opts): if opts.extract_to is not None: from calibre.ebooks.mobi.debug.main import inspect_mobi ddir = opts.extract_to diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index c930609489..a8fc37ff45 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, random, time +import random, time from cStringIO import StringIO from struct import pack @@ -21,32 +21,10 @@ from calibre.ebooks.mobi.utils import (encint, encode_trailing_data, align_block, detect_periodical, RECORD_SIZE, create_text_record) from calibre.ebooks.mobi.writer2.indexer import Indexer -EXTH_CODES = { - 'creator': 100, - 'publisher': 101, - 'description': 103, - 'identifier': 104, - 'subject': 105, - 'pubdate': 106, - 'review': 107, - 'contributor': 108, - 'rights': 109, - 'type': 111, - 'source': 112, - 'versionnumber': 114, - 'startreading': 116, - 'coveroffset': 201, - 'thumboffset': 202, - 'hasfakecover': 203, - 'lastupdatetime': 502, - 'title': 503, - } - # Disabled as I dont care about uncrossable breaks WRITE_UNCROSSABLE_BREAKS = False class MobiWriter(object): - COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True): self.opts = opts @@ -210,7 +188,15 @@ class MobiWriter(object): # header as well bt = 0x103 if self.indexer.is_flat_periodical else 0x101 - exth = self.build_exth(bt) + from calibre.ebooks.mobi.writer8.exth import build_exth + exth = build_exth(metadata, + prefer_author_sort=self.opts.prefer_author_sort, + is_periodical=self.is_periodical, + share_not_sync=self.opts.share_not_sync, + cover_offset=self.cover_offset, + thumbnail_offset=self.thumbnail_offset, + start_offset=self.serializer.start_offset, mobi_doctype=bt + ) first_image_record = None if self.resources: used_images = self.serializer.used_images @@ -379,127 +365,6 @@ class MobiWriter(object): self.records[0] = align_block(record0) # }}} - def build_exth(self, mobi_doctype): # EXTH Header {{{ - oeb = self.oeb - exth = StringIO() - nrecs = 0 - for term in oeb.metadata: - if term not in EXTH_CODES: continue - code = EXTH_CODES[term] - items = oeb.metadata[term] - if term == 'creator': - if self.prefer_author_sort: - creators = [normalize(unicode(c.file_as or c)) for c in - items][:1] - else: - creators = [normalize(unicode(c)) for c in items] - items = ['; '.join(creators)] - for item in items: - data = normalize(unicode(item)) - if term != 'description': - data = self.COLLAPSE_RE.sub(' ', data) - if term == 'identifier': - if data.lower().startswith('urn:isbn:'): - data = data[9:] - elif item.scheme.lower() == 'isbn': - pass - else: - continue - data = data.encode('utf-8') - exth.write(pack(b'>II', code, len(data) + 8)) - exth.write(data) - nrecs += 1 - if term == 'rights' : - try: - rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8') - except: - rights = b'Unknown' - exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) - exth.write(rights) - nrecs += 1 - - # Write UUID as ASIN - uuid = None - from calibre.ebooks.oeb.base import OPF - for x in oeb.metadata['identifier']: - if (x.get(OPF('scheme'), None).lower() == 'uuid' or - unicode(x).startswith('urn:uuid:')): - uuid = unicode(x).split(':')[-1] - break - if uuid is None: - from uuid import uuid4 - uuid = str(uuid4()) - - if isinstance(uuid, unicode): - uuid = uuid.encode('utf-8') - if not self.opts.share_not_sync: - exth.write(pack(b'>II', 113, len(uuid) + 8)) - exth.write(uuid) - nrecs += 1 - - # Write cdetype - if not self.is_periodical: - if not self.opts.share_not_sync: - exth.write(pack(b'>II', 501, 12)) - exth.write(b'EBOK') - nrecs += 1 - else: - ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None) - if ids: - exth.write(pack(b'>II', 501, 12)) - exth.write(ids) - nrecs += 1 - - # Add a publication date entry - if oeb.metadata['date']: - datestr = str(oeb.metadata['date'][0]) - elif oeb.metadata['timestamp']: - datestr = str(oeb.metadata['timestamp'][0]) - - if datestr is None: - raise ValueError("missing date or timestamp") - - datestr = bytes(datestr) - exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) - exth.write(datestr) - nrecs += 1 - if self.is_periodical: - exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8)) - exth.write(datestr) - nrecs += 1 - - if self.is_periodical: - # Pretend to be amazon's super secret periodical generator - vals = {204:201, 205:2, 206:0, 207:101} - else: - # Pretend to be kindlegen 1.2 - vals = {204:201, 205:1, 206:2, 207:33307} - for code, val in vals.iteritems(): - exth.write(pack(b'>III', code, 12, val)) - nrecs += 1 - - if self.cover_offset is not None: - exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, - self.cover_offset)) - exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0)) - nrecs += 2 - if self.thumbnail_offset is not None: - exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12, - self.thumbnail_offset)) - nrecs += 1 - - if self.serializer.start_offset is not None: - exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, - self.serializer.start_offset)) - nrecs += 1 - - exth = exth.getvalue() - trail = len(exth) % 4 - pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte - exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad] - return b''.join(exth) - # }}} - def write_header(self): # PalmDB header {{{ ''' Write the PalmDB header diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py new file mode 100644 index 0000000000..867e2c3112 --- /dev/null +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re +from struct import pack +from io import BytesIO + +from calibre.ebooks.mobi.utils import utf8_text + +EXTH_CODES = { + 'creator': 100, + 'publisher': 101, + 'description': 103, + 'identifier': 104, + 'subject': 105, + 'pubdate': 106, + 'review': 107, + 'contributor': 108, + 'rights': 109, + 'type': 111, + 'source': 112, + 'versionnumber': 114, + 'startreading': 116, + 'num_of_resources': 125, + 'kf8_unknown_count': 131, + 'coveroffset': 201, + 'thumboffset': 202, + 'hasfakecover': 203, + 'lastupdatetime': 502, + 'title': 503, +} + +COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') + +def build_exth(metadata, prefer_author_sort=False, is_periodical=False, + share_not_sync=True, cover_offset=None, thumbnail_offset=None, + start_offset=None, mobi_doctype=2, num_of_resources=None, + kf8_unknown_count=0, be_kindlegen2=False): + exth = BytesIO() + nrecs = 0 + + for term in metadata: + if term not in EXTH_CODES: continue + code = EXTH_CODES[term] + items = metadata[term] + if term == 'creator': + if prefer_author_sort: + creators = [unicode(c.file_as or c) for c in + items][:1] + else: + creators = [unicode(c) for c in items] + items = ['; '.join(creators)] + for item in items: + data = unicode(item) + if term != 'description': + data = COLLAPSE_RE.sub(' ', data) + if term == 'identifier': + if data.lower().startswith('urn:isbn:'): + data = data[9:] + elif item.scheme.lower() == 'isbn': + pass + else: + continue + data = utf8_text(data) + exth.write(pack(b'>II', code, len(data) + 8)) + exth.write(data) + nrecs += 1 + if term == 'rights' : + try: + rights = utf8_text(unicode(metadata.rights[0])) + except: + rights = b'Unknown' + exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) + exth.write(rights) + nrecs += 1 + + # Write UUID as ASIN + uuid = None + from calibre.ebooks.oeb.base import OPF + for x in metadata['identifier']: + if (x.get(OPF('scheme'), None).lower() == 'uuid' or + unicode(x).startswith('urn:uuid:')): + uuid = unicode(x).split(':')[-1] + break + if uuid is None: + from uuid import uuid4 + uuid = str(uuid4()) + + if isinstance(uuid, unicode): + uuid = uuid.encode('utf-8') + if share_not_sync: + exth.write(pack(b'>II', 113, len(uuid) + 8)) + exth.write(uuid) + nrecs += 1 + + # Write cdetype + if not is_periodical: + if not share_not_sync: + exth.write(pack(b'>II', 501, 12)) + exth.write(b'EBOK') + nrecs += 1 + else: + ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None) + if ids: + exth.write(pack(b'>II', 501, 12)) + exth.write(ids) + nrecs += 1 + + # Add a publication date entry + if metadata['date']: + datestr = str(metadata['date'][0]) + elif metadata['timestamp']: + datestr = str(metadata['timestamp'][0]) + + if datestr is None: + raise ValueError("missing date or timestamp") + + datestr = bytes(datestr) + exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) + exth.write(datestr) + nrecs += 1 + if is_periodical: + exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8)) + exth.write(datestr) + nrecs += 1 + + if be_kindlegen2: + vals = {204:201, 205:2, 206:2, 207:35621} + elif is_periodical: + # Pretend to be amazon's super secret periodical generator + vals = {204:201, 205:2, 206:0, 207:101} + else: + # Pretend to be kindlegen 1.2 + vals = {204:201, 205:1, 206:2, 207:33307} + for code, val in vals.iteritems(): + exth.write(pack(b'>III', code, 12, val)) + nrecs += 1 + + if cover_offset is not None: + exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, + cover_offset)) + exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0)) + nrecs += 2 + if thumbnail_offset is not None: + exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12, + thumbnail_offset)) + nrecs += 1 + + if start_offset is not None: + exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, + start_offset)) + nrecs += 1 + + if num_of_resources is not None: + exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12, + num_of_resources)) + nrecs += 1 + + if kf8_unknown_count is not None: + exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12, + kf8_unknown_count)) + nrecs += 1 + + exth = exth.getvalue() + trail = len(exth) % 4 + pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte + exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad] + return b''.join(exth) + + diff --git a/src/calibre/ebooks/mobi/writer8/header.py b/src/calibre/ebooks/mobi/writer8/header.py index 31571d0f5f..94ae722f59 100644 --- a/src/calibre/ebooks/mobi/writer8/header.py +++ b/src/calibre/ebooks/mobi/writer8/header.py @@ -7,6 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import random from io import BytesIO from collections import OrderedDict from struct import pack @@ -16,6 +17,7 @@ from calibre.ebooks.mobi.utils import align_block NULL = 0xffffffff zeroes = lambda x: b'\0'*x nulls = lambda x: b'\xff'*x +short = lambda x: pack(b'>H', x) class Header(OrderedDict): @@ -25,7 +27,9 @@ class Header(OrderedDict): ''' ALIGN_BLOCK = False - POSITIONS = {} + POSITIONS = {} # Mapping of position field to field whose position should + # be stored in the position field + SHORT_FIELDS = set() def __init__(self): OrderedDict.__init__(self) @@ -36,13 +40,17 @@ class Header(OrderedDict): name, val = [x.strip() for x in line.partition('=')[0::2]] if val: val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None, - 'nulls':nulls}) + 'nulls':nulls, 'short':short, 'random':random}) else: val = 0 if name in self: raise ValueError('Duplicate field in definition: %r'%name) self[name] = val + @property + def dynamic_fields(self): + return tuple(k for k, v in self.iteritems() if v is None) + def __call__(self, **kwargs): positions = {} for name, val in kwargs.iteritems(): @@ -58,7 +66,8 @@ class Header(OrderedDict): if val is None: raise ValueError('Dynamic field %r not set'%name) if isinstance(val, (int, long)): - val = pack(b'>I', val) + fmt = 'H' if name in self.SHORT_FIELDS else 'I' + val = pack(b'>'+fmt, val) buf.write(val) for pos_field, field in self.POSITIONS.iteritems(): diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py index 1cf9f02d4b..a3d5c6763f 100644 --- a/src/calibre/ebooks/mobi/writer8/index.py +++ b/src/calibre/ebooks/mobi/writer8/index.py @@ -182,7 +182,7 @@ class Index(object): # {{{ if len(body) + self.HEADER_LENGTH >= 0x10000: raise too_large header = b'INDX' - buf.truncate(0) + buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(b'\0'*4) # Unknown buf.write(pack(b'>I', 1)) # Header type? Or index record number? diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index e061da7df6..2b0eebb13d 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -26,6 +26,7 @@ from calibre.ebooks.oeb.parse_utils import barename from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex, ChunkIndex, GuideIndex) +from calibre.ebooks.mobi.writer8.mobi import KF8Book XML_DOCS = OEB_DOCS | {SVG_MIME} @@ -42,7 +43,7 @@ class KF8Writer(object): self.used_images = set() self.resources = resources self.flows = [None] # First flow item is reserved for the text - self.records = [] + self.records = [None] # Placeholder for zeroth record self.log('\tGenerating KF8 markup...') self.dup_data() @@ -266,9 +267,10 @@ class KF8Writer(object): start = 0 if i == 0 else self.fdst_table[-1].end self.fdst_table.append(FDST(start, start + len(flow))) entries.extend(self.fdst_table[-1]) - rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) + + rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) + pack(b'>%dL'%len(entries), *entries)) self.fdst_records = [rec] + self.fdst_count = len(self.fdst_table) def create_indices(self): self.skel_records = SkelIndex(self.skel_table)() @@ -347,3 +349,7 @@ class KF8Writer(object): if self.guide_table: self.guide_records = GuideIndex(self.guide_table)() +def create_kf8_book(oeb, opts, resources): + writer = KF8Writer(oeb, opts, resources) + return KF8Book(writer) + diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py new file mode 100644 index 0000000000..aa432c487a --- /dev/null +++ b/src/calibre/ebooks/mobi/writer8/mobi.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import time +from struct import pack + +from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text +from calibre.ebooks.mobi.writer8.header import Header +from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED) +from calibre.ebooks.mobi.langcodes import iana2mobi +from calibre.ebooks.mobi.writer8.exth import build_exth +from calibre.utils.filenames import ascii_filename + +NULL_INDEX = 0xffffffff + +class MOBIHeader(Header): # {{{ + ''' + Represents the first record in a MOBI file, contains all the metadata about + the file. + ''' + + FILE_VERSION = 8 + + DEFINITION = ''' + # 0: Compression + compression = DYN + + # 2: Unused + unused1 = zeroes(2) + + # 4: Text length + text_length = DYN + + # 8: Last text record + last_text_record = DYN + + # 10: Text record size + record_size = {record_size} + + # 12: Unused + unused2 + + # 16: Ident + ident = b'MOBI' + + # 20: Header length + header_length = 248 + + # 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News + # (flat), 0x103 - News magazine same as 0x101) + book_type = DYN + + # 28: Text encoding (utf-8 = 65001) + encoding = 65001 + + # 32: UID + uid = random.randint(0, 0xffffffff) + + # 36: File version + file_version = {file_version} + + # 40: Meta orth record (Chunk table index in KF8) + meta_orth_record = DYN + + # 44: Meta infl index + meta_infl_index = NULL + + # 48: Extra indices + extra_index0 = NULL + extra_index1 = NULL + extra_index2 = NULL + extra_index3 = NULL + extra_index4 = NULL + extra_index5 = NULL + extra_index6 = NULL + extra_index7 = NULL + + # 80: First non text record + first_non_text_record = DYN + + # 84: Title offset + title_offset + + # 88: Title Length + title_length = DYN + + # 92: Language code + language_code = DYN + + # 96: Dictionary in and out languages + in_lang + out_lang + + # 104: Min version + min_version = {file_version} + + # 108: First resource record + first_resource_record = DYN + + # 112: Huff/CDIC compression + huff_first_record + huff_count + + # 120: DATP records + datp_first_record + datp_count + + # 128: EXTH flags + exth_flags = DYN + + # 132: Unknown + unknown = zeroes(32) + + # 164: DRM + drm_offset = NULL + drm_count = NULL + drm_size + drm_flags + + # 180: Unknown + unknown2 = zeroes(12) + + # 192: FDST + fdst_record = DYN + fdst_count = DYN + + # 200: FCI + fcis_record = NULL + fcis_count + + # 208: FLIS + flis_record = NULL + flis_count + + # 216: Unknown + unknown3 = zeroes(8) + + # 224: SRCS + srcs_record = NULL + srcs_count + + # 232: Unknown + unknown4 = nulls(8) + + # 240: Extra data flags + # 0b1 - extra multibyte bytes after text records + # 0b10 - TBS indexing data (only used in MOBI 6) + # 0b100 - uncrossable breaks only used in MOBI 6 + extra_data_flags = 1 + + # 244: KF8 Indices + ncx_index = DYN + chunk_index = DYN + skel_index = DYN + datp_index = NULL + guide_index = DYN + + # 264: EXTH + exth = DYN + + # Full title + full_title = DYN + + # Padding to allow amazon's DTP service to add data + padding = zeroes(8192) + '''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION) + + SHORT_FIELDS = {'compression', 'last_text_record', 'record_size'} + ALIGN = True + POSITIONS = {'title_offset':'full_title'} + + def format_value(self, name, val): + if name == 'compression': + val = PALMDOC if val else UNCOMPRESSED + return super(MOBIHeader, self).format_value(name, val) + +# }}} + +# Fields that need to be set in the MOBI Header are + +class KF8Book(object): + + def __init__(self, writer): + self.build_records(writer) + + def build_records(self, writer): + metadata = writer.oeb.metadata + # The text records + for x in ('last_text_record_idx', 'first_non_text_record_idx'): + setattr(self, x.rpartition('_')[0], getattr(writer, x)) + self.records = writer.records + self.text_length = writer.text_length + + # KF8 Indices + self.chunk_index = self.meta_orth_record = len(self.records) + self.records.extend(writer.chunk_records) + self.skel_index = len(self.records) + self.records.extend(writer.skel_records) + self.guide_index = NULL_INDEX + if writer.guide_records: + self.guide_index = len(self.records) + self.records.extend(writer.guide_records) + self.ncx_index = NULL_INDEX + if writer.ncx_records: + self.ncx_index = len(self.records) + self.records.extend(writer.ncx_records) + + # Resources + resources = writer.resources + for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'): + setattr(self, x, getattr(resources, x)) + + self.first_resource_record = NULL_INDEX + if resources.records: + self.first_resource_record = len(self.records) + self.records.extend(resources.records) + + self.first_resource_record = len(self.records) + self.num_of_resources = len(resources.records) + + # FDST + self.fdst_count = writer.fdst_count + self.fdst_record = len(self.records) + self.records.extend(writer.fdst_records) + + # EOF + self.records.append(b'\xe9\x8e\r\n') # EOF record + + + # Miscellaneous header fields + self.compression = writer.compress + self.book_type = 0x101 if writer.opts.mobi_periodical else 2 + self.full_title = utf8_text(unicode(metadata.title[0])) + self.title_length = len(self.full_title) + + self.language_code = iana2mobi(str(metadata.language[0])) + self.exth_flags = 0b1010000 + if writer.opts.mobi_periodical: + self.exth_flags |= 0b1000 + + self.opts = writer.opts + self.start_offset = writer.start_offset + self.metadata = metadata + + @property + def record0(self): + ''' We generate the EXTH header and record0 dynamically, to allow other + code to customize various values after build_record() has been + called''' + opts = self.opts + kuc = 0 if self.num_of_resources > 0 else None + self.exth = build_exth(self.metadata, + prefer_author_sort=opts.prefer_author_sort, + is_periodical=opts.mobi_periodical, + share_not_sync=opts.share_not_sync, + cover_offset=self.cover_offset, + thumbnail_offset=self.thumbnail_offset, + num_of_resources=self.num_of_resources, + kf8_unknown_count=kuc, be_kindlegen2=True, + start_offset=self.start_offset, mobi_doctype=self.book_type) + + kwargs = {field:getattr(self, field) for field in + ('compression', 'text_length', 'last_text_record', + 'book_type', 'meta_orth_record', 'first_non_text_record', + 'title_length', 'language_code', 'first_resource_record', + 'exth_flags', 'fdst_record', 'fdst_count', 'ncx_index', + 'chunk_index', 'skel_index', 'guide_index', 'exth', + 'full_title')} + return MOBIHeader()(**kwargs) + + def write(self, outpath): + records = [self.record0] + self.records[1:] + + with open(outpath, 'wb') as f: + + # Write PalmDB Header + + title = ascii_filename(self.full_title.decode('utf-8')).replace( + ' ', '_')[:31] + title += (b'\0' * (32 - len(title))) + now = int(time.time()) + nrecords = len(records) + f.write(title) + f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0)) + f.write(b'BOOKMOBI') + f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords)) + offset = f.tell() + (8 * nrecords) + 2 + for i, record in enumerate(records): + f.write(pack(b'>I', offset)) + f.write(b'\0' + pack(b'>I', 2*i)[1:]) + offset += len(record) + f.write(b'\0\0') + + for rec in records: + f.write(rec) +