From 57b01c645d85dac262b6015c2e5801d3a7d1d660 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 22 Apr 2012 19:30:30 +0530
Subject: [PATCH] KF8 Output: Can now generate standalone KF8 files. There are
 still bugs that need to be fixed, the produced files are not usable.

---
 .../ebooks/conversion/plugins/mobi_output.py  |  13 +-
 src/calibre/ebooks/mobi/writer2/main.py       | 155 +--------
 src/calibre/ebooks/mobi/writer8/exth.py       | 176 ++++++++++
 src/calibre/ebooks/mobi/writer8/header.py     |  15 +-
 src/calibre/ebooks/mobi/writer8/index.py      |   2 +-
 src/calibre/ebooks/mobi/writer8/main.py       |  10 +-
 src/calibre/ebooks/mobi/writer8/mobi.py       | 302 ++++++++++++++++++
 7 files changed, 519 insertions(+), 154 deletions(-)
 create mode 100644 src/calibre/ebooks/mobi/writer8/exth.py
 create mode 100644 src/calibre/ebooks/mobi/writer8/mobi.py

diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py
index 971d11df3b..4210f7223e 100644
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@@ -164,7 +164,8 @@ class MOBIOutput(OutputFormatPlugin):
         from calibre.ebooks.mobi.writer2.resources import Resources
         self.log, self.opts, self.oeb = log, opts, oeb
 
-        create_kf8 = tweaks.get('create_kf8', False)
+        mobi_type = tweaks.get('test_mobi_output_type', 'old')
+        create_kf8 = mobi_type in ('new', 'both')
 
         self.remove_html_cover()
         resources = Resources(oeb, opts, self.is_periodical,
@@ -172,13 +173,17 @@ class MOBIOutput(OutputFormatPlugin):
         self.check_for_periodical()
 
         kf8 = self.create_kf8(resources) if create_kf8 else None
+        if mobi_type == 'new':
+            kf8.write(output_path)
+            self.extract_mobi(output_path, opts)
+            return
 
         self.log('Creating MOBI 6 output')
         self.write_mobi(input_plugin, output_path, kf8, resources)
 
     def create_kf8(self, resources):
-        from calibre.ebooks.mobi.writer8.main import KF8Writer
-        return KF8Writer(self.oeb, self.opts, resources)
+        from calibre.ebooks.mobi.writer8.main import create_kf8_book
+        return create_kf8_book(self.oeb, self.opts, resources)
 
     def write_mobi(self, input_plugin, output_path, kf8, resources):
         from calibre.ebooks.mobi.mobiml import MobiMLizer
@@ -209,7 +214,9 @@ class MOBIOutput(OutputFormatPlugin):
         writer = MobiWriter(opts, resources, kf8,
                         write_page_breaks_after_item=write_page_breaks_after_item)
         writer(oeb, output_path)
+        self.extract_mobi(output_path, opts)
 
+    def extract_mobi(self, output_path, opts):
         if opts.extract_to is not None:
             from calibre.ebooks.mobi.debug.main import inspect_mobi
             ddir = opts.extract_to
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index c930609489..a8fc37ff45 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import re, random, time
+import random, time
 from cStringIO import StringIO
 from struct import pack
 
@@ -21,32 +21,10 @@ from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
         align_block, detect_periodical, RECORD_SIZE, create_text_record)
 from calibre.ebooks.mobi.writer2.indexer import Indexer
 
-EXTH_CODES = {
-    'creator': 100,
-    'publisher': 101,
-    'description': 103,
-    'identifier': 104,
-    'subject': 105,
-    'pubdate': 106,
-    'review': 107,
-    'contributor': 108,
-    'rights': 109,
-    'type': 111,
-    'source': 112,
-    'versionnumber': 114,
-    'startreading': 116,
-    'coveroffset': 201,
-    'thumboffset': 202,
-    'hasfakecover': 203,
-    'lastupdatetime': 502,
-    'title': 503,
-    }
-
 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False
 
 class MobiWriter(object):
-    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
 
     def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
         self.opts = opts
@@ -210,7 +188,15 @@ class MobiWriter(object):
                 # header as well
                 bt = 0x103 if self.indexer.is_flat_periodical else 0x101
 
-        exth = self.build_exth(bt)
+        from calibre.ebooks.mobi.writer8.exth import build_exth
+        exth = build_exth(metadata,
+                prefer_author_sort=self.opts.prefer_author_sort,
+                is_periodical=self.is_periodical,
+                share_not_sync=self.opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                start_offset=self.serializer.start_offset, mobi_doctype=bt
+                )
         first_image_record = None
         if self.resources:
             used_images = self.serializer.used_images
@@ -379,127 +365,6 @@ class MobiWriter(object):
         self.records[0] = align_block(record0)
     # }}}
 
-    def build_exth(self, mobi_doctype): # EXTH Header {{{
-        oeb = self.oeb
-        exth = StringIO()
-        nrecs = 0
-        for term in oeb.metadata:
-            if term not in EXTH_CODES: continue
-            code = EXTH_CODES[term]
-            items = oeb.metadata[term]
-            if term == 'creator':
-                if self.prefer_author_sort:
-                    creators = [normalize(unicode(c.file_as or c)) for c in
-                            items][:1]
-                else:
-                    creators = [normalize(unicode(c)) for c in items]
-                items = ['; '.join(creators)]
-            for item in items:
-                data = normalize(unicode(item))
-                if term != 'description':
-                    data = self.COLLAPSE_RE.sub(' ', data)
-                if term == 'identifier':
-                    if data.lower().startswith('urn:isbn:'):
-                        data = data[9:]
-                    elif item.scheme.lower() == 'isbn':
-                        pass
-                    else:
-                        continue
-                data = data.encode('utf-8')
-                exth.write(pack(b'>II', code, len(data) + 8))
-                exth.write(data)
-                nrecs += 1
-            if term == 'rights' :
-                try:
-                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
-                except:
-                    rights = b'Unknown'
-                exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
-                exth.write(rights)
-                nrecs += 1
-
-        # Write UUID as ASIN
-        uuid = None
-        from calibre.ebooks.oeb.base import OPF
-        for x in oeb.metadata['identifier']:
-            if (x.get(OPF('scheme'), None).lower() == 'uuid' or
-                    unicode(x).startswith('urn:uuid:')):
-                uuid = unicode(x).split(':')[-1]
-                break
-        if uuid is None:
-            from uuid import uuid4
-            uuid = str(uuid4())
-
-        if isinstance(uuid, unicode):
-            uuid = uuid.encode('utf-8')
-        if not self.opts.share_not_sync:
-            exth.write(pack(b'>II', 113, len(uuid) + 8))
-            exth.write(uuid)
-            nrecs += 1
-
-        # Write cdetype
-        if not self.is_periodical:
-            if not self.opts.share_not_sync:
-                exth.write(pack(b'>II', 501, 12))
-                exth.write(b'EBOK')
-                nrecs += 1
-        else:
-            ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
-            if ids:
-                exth.write(pack(b'>II', 501, 12))
-                exth.write(ids)
-                nrecs += 1
-
-        # Add a publication date entry
-        if oeb.metadata['date']:
-            datestr = str(oeb.metadata['date'][0])
-        elif oeb.metadata['timestamp']:
-            datestr = str(oeb.metadata['timestamp'][0])
-
-        if datestr is None:
-            raise ValueError("missing date or timestamp")
-
-        datestr = bytes(datestr)
-        exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
-        exth.write(datestr)
-        nrecs += 1
-        if self.is_periodical:
-            exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
-            exth.write(datestr)
-            nrecs += 1
-
-        if self.is_periodical:
-            # Pretend to be amazon's super secret periodical generator
-            vals = {204:201, 205:2, 206:0, 207:101}
-        else:
-            # Pretend to be kindlegen 1.2
-            vals = {204:201, 205:1, 206:2, 207:33307}
-        for code, val in vals.iteritems():
-            exth.write(pack(b'>III', code, 12, val))
-            nrecs += 1
-
-        if self.cover_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
-                self.cover_offset))
-            exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
-            nrecs += 2
-        if self.thumbnail_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
-                self.thumbnail_offset))
-            nrecs += 1
-
-        if self.serializer.start_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
-                self.serializer.start_offset))
-            nrecs += 1
-
-        exth = exth.getvalue()
-        trail = len(exth) % 4
-        pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
-        exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
-        return b''.join(exth)
-    # }}}
-
     def write_header(self): # PalmDB header {{{
         '''
         Write the PalmDB header
diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py
new file mode 100644
index 0000000000..867e2c3112
--- /dev/null
+++ b/src/calibre/ebooks/mobi/writer8/exth.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from struct import pack
+from io import BytesIO
+
+from calibre.ebooks.mobi.utils import utf8_text
+
+EXTH_CODES = {
+    'creator': 100,
+    'publisher': 101,
+    'description': 103,
+    'identifier': 104,
+    'subject': 105,
+    'pubdate': 106,
+    'review': 107,
+    'contributor': 108,
+    'rights': 109,
+    'type': 111,
+    'source': 112,
+    'versionnumber': 114,
+    'startreading': 116,
+    'num_of_resources': 125,
+    'kf8_unknown_count': 131,
+    'coveroffset': 201,
+    'thumboffset': 202,
+    'hasfakecover': 203,
+    'lastupdatetime': 502,
+    'title': 503,
+}
+
+COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
+
+def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
+        share_not_sync=True, cover_offset=None, thumbnail_offset=None,
+        start_offset=None, mobi_doctype=2, num_of_resources=None,
+        kf8_unknown_count=0, be_kindlegen2=False):
+    exth = BytesIO()
+    nrecs = 0
+
+    for term in metadata:
+        if term not in EXTH_CODES: continue
+        code = EXTH_CODES[term]
+        items = metadata[term]
+        if term == 'creator':
+            if prefer_author_sort:
+                creators = [unicode(c.file_as or c) for c in
+                        items][:1]
+            else:
+                creators = [unicode(c) for c in items]
+            items = ['; '.join(creators)]
+        for item in items:
+            data = unicode(item)
+            if term != 'description':
+                data = COLLAPSE_RE.sub(' ', data)
+            if term == 'identifier':
+                if data.lower().startswith('urn:isbn:'):
+                    data = data[9:]
+                elif item.scheme.lower() == 'isbn':
+                    pass
+                else:
+                    continue
+            data = utf8_text(data)
+            exth.write(pack(b'>II', code, len(data) + 8))
+            exth.write(data)
+            nrecs += 1
+        if term == 'rights' :
+            try:
+                rights = utf8_text(unicode(metadata.rights[0]))
+            except:
+                rights = b'Unknown'
+            exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
+            exth.write(rights)
+            nrecs += 1
+
+    # Write UUID as ASIN
+    uuid = None
+    from calibre.ebooks.oeb.base import OPF
+    for x in metadata['identifier']:
+        if (x.get(OPF('scheme'), None).lower() == 'uuid' or
+                unicode(x).startswith('urn:uuid:')):
+            uuid = unicode(x).split(':')[-1]
+            break
+    if uuid is None:
+        from uuid import uuid4
+        uuid = str(uuid4())
+
+    if isinstance(uuid, unicode):
+        uuid = uuid.encode('utf-8')
+    if share_not_sync:
+        exth.write(pack(b'>II', 113, len(uuid) + 8))
+        exth.write(uuid)
+        nrecs += 1
+
+    # Write cdetype
+    if not is_periodical:
+        if not share_not_sync:
+            exth.write(pack(b'>II', 501, 12))
+            exth.write(b'EBOK')
+            nrecs += 1
+    else:
+        ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
+        if ids:
+            exth.write(pack(b'>II', 501, 12))
+            exth.write(ids)
+            nrecs += 1
+
+    # Add a publication date entry
+    if metadata['date']:
+        datestr = str(metadata['date'][0])
+    elif metadata['timestamp']:
+        datestr = str(metadata['timestamp'][0])
+
+    if datestr is None:
+        raise ValueError("missing date or timestamp")
+
+    datestr = bytes(datestr)
+    exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
+    exth.write(datestr)
+    nrecs += 1
+    if is_periodical:
+        exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
+        exth.write(datestr)
+        nrecs += 1
+
+    if be_kindlegen2:
+        vals = {204:201, 205:2, 206:2, 207:35621}
+    elif is_periodical:
+        # Pretend to be amazon's super secret periodical generator
+        vals = {204:201, 205:2, 206:0, 207:101}
+    else:
+        # Pretend to be kindlegen 1.2
+        vals = {204:201, 205:1, 206:2, 207:33307}
+    for code, val in vals.iteritems():
+        exth.write(pack(b'>III', code, 12, val))
+        nrecs += 1
+
+    if cover_offset is not None:
+        exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
+            cover_offset))
+        exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
+        nrecs += 2
+    if thumbnail_offset is not None:
+        exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
+            thumbnail_offset))
+        nrecs += 1
+
+    if start_offset is not None:
+        exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
+            start_offset))
+        nrecs += 1
+
+    if num_of_resources is not None:
+        exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
+            num_of_resources))
+        nrecs += 1
+
+    if kf8_unknown_count is not None:
+        exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
+            kf8_unknown_count))
+        nrecs += 1
+
+    exth = exth.getvalue()
+    trail = len(exth) % 4
+    pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
+    exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
+    return b''.join(exth)
+
+
diff --git a/src/calibre/ebooks/mobi/writer8/header.py b/src/calibre/ebooks/mobi/writer8/header.py
index 31571d0f5f..94ae722f59 100644
--- a/src/calibre/ebooks/mobi/writer8/header.py
+++ b/src/calibre/ebooks/mobi/writer8/header.py
@@ -7,6 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import random
 from io import BytesIO
 from collections import OrderedDict
 from struct import pack
@@ -16,6 +17,7 @@ from calibre.ebooks.mobi.utils import align_block
 NULL = 0xffffffff
 zeroes = lambda x: b'\0'*x
 nulls = lambda x: b'\xff'*x
+short = lambda x: pack(b'>H', x)
 
 class Header(OrderedDict):
 
@@ -25,7 +27,9 @@ class Header(OrderedDict):
     '''
 
     ALIGN_BLOCK = False
-    POSITIONS = {}
+    POSITIONS = {}  # Mapping of position field to field whose position should
+                    # be stored in the position field
+    SHORT_FIELDS = set()
 
     def __init__(self):
         OrderedDict.__init__(self)
@@ -36,13 +40,17 @@ class Header(OrderedDict):
             name, val = [x.strip() for x in line.partition('=')[0::2]]
             if val:
                 val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
-                    'nulls':nulls})
+                    'nulls':nulls, 'short':short, 'random':random})
             else:
                 val = 0
             if name in self:
                 raise ValueError('Duplicate field in definition: %r'%name)
             self[name] = val
 
+    @property
+    def dynamic_fields(self):
+        return tuple(k for k, v in self.iteritems() if v is None)
+
     def __call__(self, **kwargs):
         positions = {}
         for name, val in kwargs.iteritems():
@@ -58,7 +66,8 @@ class Header(OrderedDict):
             if val is None:
                 raise ValueError('Dynamic field %r not set'%name)
             if isinstance(val, (int, long)):
-                val = pack(b'>I', val)
+                fmt = 'H' if name in self.SHORT_FIELDS else 'I'
+                val = pack(b'>'+fmt, val)
             buf.write(val)
 
         for pos_field, field in self.POSITIONS.iteritems():
diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py
index 1cf9f02d4b..a3d5c6763f 100644
--- a/src/calibre/ebooks/mobi/writer8/index.py
+++ b/src/calibre/ebooks/mobi/writer8/index.py
@@ -182,7 +182,7 @@ class Index(object): # {{{
         if len(body) + self.HEADER_LENGTH >= 0x10000:
             raise too_large
         header = b'INDX'
-        buf.truncate(0)
+        buf.seek(0), buf.truncate(0)
         buf.write(pack(b'>I', self.HEADER_LENGTH))
         buf.write(b'\0'*4) # Unknown
         buf.write(pack(b'>I', 1)) # Header type? Or index record number?
diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index e061da7df6..2b0eebb13d 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -26,6 +26,7 @@ from calibre.ebooks.oeb.parse_utils import barename
 from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
 from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
         ChunkIndex, GuideIndex)
+from calibre.ebooks.mobi.writer8.mobi import KF8Book
 
 XML_DOCS = OEB_DOCS | {SVG_MIME}
 
@@ -42,7 +43,7 @@ class KF8Writer(object):
         self.used_images = set()
         self.resources = resources
         self.flows = [None] # First flow item is reserved for the text
-        self.records = []
+        self.records = [None] # Placeholder for zeroth record
 
         self.log('\tGenerating KF8 markup...')
         self.dup_data()
@@ -266,9 +267,10 @@ class KF8Writer(object):
             start = 0 if i == 0 else self.fdst_table[-1].end
             self.fdst_table.append(FDST(start, start + len(flow)))
             entries.extend(self.fdst_table[-1])
-        rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) +
+        rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) +
                 pack(b'>%dL'%len(entries), *entries))
         self.fdst_records = [rec]
+        self.fdst_count = len(self.fdst_table)
 
     def create_indices(self):
         self.skel_records = SkelIndex(self.skel_table)()
@@ -347,3 +349,7 @@ class KF8Writer(object):
         if self.guide_table:
             self.guide_records = GuideIndex(self.guide_table)()
 
+def create_kf8_book(oeb, opts, resources):
+    writer = KF8Writer(oeb, opts, resources)
+    return KF8Book(writer)
+
diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py
new file mode 100644
index 0000000000..aa432c487a
--- /dev/null
+++ b/src/calibre/ebooks/mobi/writer8/mobi.py
@@ -0,0 +1,302 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from struct import pack
+
+from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
+from calibre.ebooks.mobi.writer8.header import Header
+from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
+from calibre.ebooks.mobi.langcodes import iana2mobi
+from calibre.ebooks.mobi.writer8.exth import build_exth
+from calibre.utils.filenames import ascii_filename
+
+NULL_INDEX = 0xffffffff
+
+class MOBIHeader(Header): # {{{
+    '''
+    Represents the first record in a MOBI file, contains all the metadata about
+    the file.
+    '''
+
+    FILE_VERSION = 8
+
+    DEFINITION = '''
+    # 0: Compression
+    compression = DYN
+
+    # 2: Unused
+    unused1 = zeroes(2)
+
+    # 4: Text length
+    text_length = DYN
+
+    # 8: Last text record
+    last_text_record = DYN
+
+    # 10: Text record size
+    record_size = {record_size}
+
+    # 12: Unused
+    unused2
+
+    # 16: Ident
+    ident = b'MOBI'
+
+    # 20: Header length
+    header_length = 248
+
+    # 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News
+    # (flat), 0x103 - News magazine same as 0x101)
+    book_type = DYN
+
+    # 28: Text encoding (utf-8 = 65001)
+    encoding = 65001
+
+    # 32: UID
+    uid = random.randint(0, 0xffffffff)
+
+    # 36: File version
+    file_version = {file_version}
+
+    # 40: Meta orth record (Chunk table index in KF8)
+    meta_orth_record = DYN
+
+    # 44: Meta infl index
+    meta_infl_index = NULL
+
+    # 48: Extra indices
+    extra_index0 = NULL
+    extra_index1 = NULL
+    extra_index2 = NULL
+    extra_index3 = NULL
+    extra_index4 = NULL
+    extra_index5 = NULL
+    extra_index6 = NULL
+    extra_index7 = NULL
+
+    # 80: First non text record
+    first_non_text_record = DYN
+
+    # 84: Title offset
+    title_offset
+
+    # 88: Title Length
+    title_length = DYN
+
+    # 92: Language code
+    language_code = DYN
+
+    # 96: Dictionary in and out languages
+    in_lang
+    out_lang
+
+    # 104: Min version
+    min_version = {file_version}
+
+    # 108: First resource record
+    first_resource_record = DYN
+
+    # 112: Huff/CDIC compression
+    huff_first_record
+    huff_count
+
+    # 120: DATP records
+    datp_first_record
+    datp_count
+
+    # 128: EXTH flags
+    exth_flags = DYN
+
+    # 132: Unknown
+    unknown = zeroes(32)
+
+    # 164: DRM
+    drm_offset = NULL
+    drm_count = NULL
+    drm_size
+    drm_flags
+
+    # 180: Unknown
+    unknown2 = zeroes(12)
+
+    # 192: FDST
+    fdst_record = DYN
+    fdst_count = DYN
+
+    # 200: FCI
+    fcis_record = NULL
+    fcis_count
+
+    # 208: FLIS
+    flis_record = NULL
+    flis_count
+
+    # 216: Unknown
+    unknown3 = zeroes(8)
+
+    # 224: SRCS
+    srcs_record = NULL
+    srcs_count
+
+    # 232: Unknown
+    unknown4 = nulls(8)
+
+    # 240: Extra data flags
+    # 0b1 - extra multibyte bytes after text records
+    # 0b10 - TBS indexing data (only used in MOBI 6)
+    # 0b100 - uncrossable breaks only used in MOBI 6
+    extra_data_flags = 1
+
+    # 244: KF8 Indices
+    ncx_index = DYN
+    chunk_index = DYN
+    skel_index = DYN
+    datp_index = NULL
+    guide_index = DYN
+
+    # 264: EXTH
+    exth = DYN
+
+    # Full title
+    full_title = DYN
+
+    # Padding to allow amazon's DTP service to add data
+    padding = zeroes(8192)
+    '''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
+
+    SHORT_FIELDS = {'compression', 'last_text_record', 'record_size'}
+    ALIGN = True
+    POSITIONS = {'title_offset':'full_title'}
+
+    def format_value(self, name, val):
+        if name == 'compression':
+            val = PALMDOC if val else UNCOMPRESSED
+        return super(MOBIHeader, self).format_value(name, val)
+
+# }}}
+
+# Fields that need to be set in the MOBI Header are
+
+class KF8Book(object):
+
+    def __init__(self, writer):
+        self.build_records(writer)
+
+    def build_records(self, writer):
+        metadata = writer.oeb.metadata
+        # The text records
+        for x in ('last_text_record_idx', 'first_non_text_record_idx'):
+            setattr(self, x.rpartition('_')[0], getattr(writer, x))
+        self.records = writer.records
+        self.text_length = writer.text_length
+
+        # KF8 Indices
+        self.chunk_index = self.meta_orth_record = len(self.records)
+        self.records.extend(writer.chunk_records)
+        self.skel_index = len(self.records)
+        self.records.extend(writer.skel_records)
+        self.guide_index = NULL_INDEX
+        if writer.guide_records:
+            self.guide_index = len(self.records)
+            self.records.extend(writer.guide_records)
+        self.ncx_index = NULL_INDEX
+        if writer.ncx_records:
+            self.ncx_index = len(self.records)
+            self.records.extend(writer.ncx_records)
+
+        # Resources
+        resources = writer.resources
+        for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'):
+            setattr(self, x, getattr(resources, x))
+
+        self.first_resource_record = NULL_INDEX
+        if resources.records:
+            self.first_resource_record = len(self.records)
+            self.records.extend(resources.records)
+
+        self.first_resource_record = len(self.records)
+        self.num_of_resources = len(resources.records)
+
+        # FDST
+        self.fdst_count = writer.fdst_count
+        self.fdst_record = len(self.records)
+        self.records.extend(writer.fdst_records)
+
+        # EOF
+        self.records.append(b'\xe9\x8e\r\n') # EOF record
+
+
+        # Miscellaneous header fields
+        self.compression = writer.compress
+        self.book_type = 0x101 if writer.opts.mobi_periodical else 2
+        self.full_title = utf8_text(unicode(metadata.title[0]))
+        self.title_length = len(self.full_title)
+
+        self.language_code = iana2mobi(str(metadata.language[0]))
+        self.exth_flags = 0b1010000
+        if writer.opts.mobi_periodical:
+            self.exth_flags |= 0b1000
+
+        self.opts = writer.opts
+        self.start_offset = writer.start_offset
+        self.metadata = metadata
+
+    @property
+    def record0(self):
+        ''' We generate the EXTH header and record0 dynamically, to allow other
+        code to customize various values after build_record() has been
+        called'''
+        opts = self.opts
+        kuc = 0 if self.num_of_resources > 0 else None
+        self.exth = build_exth(self.metadata,
+                prefer_author_sort=opts.prefer_author_sort,
+                is_periodical=opts.mobi_periodical,
+                share_not_sync=opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                num_of_resources=self.num_of_resources,
+                kf8_unknown_count=kuc, be_kindlegen2=True,
+                start_offset=self.start_offset, mobi_doctype=self.book_type)
+
+        kwargs = {field:getattr(self, field) for field in
+                ('compression', 'text_length', 'last_text_record',
+                'book_type', 'meta_orth_record', 'first_non_text_record',
+                'title_length', 'language_code', 'first_resource_record',
+                'exth_flags', 'fdst_record', 'fdst_count', 'ncx_index',
+                'chunk_index', 'skel_index', 'guide_index', 'exth',
+                'full_title')}
+        return MOBIHeader()(**kwargs)
+
+    def write(self, outpath):
+        records = [self.record0] + self.records[1:]
+
+        with open(outpath, 'wb') as f:
+
+            # Write PalmDB Header
+
+            title = ascii_filename(self.full_title.decode('utf-8')).replace(
+                    ' ', '_')[:31]
+            title += (b'\0' * (32 - len(title)))
+            now = int(time.time())
+            nrecords = len(records)
+            f.write(title)
+            f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
+            f.write(b'BOOKMOBI')
+            f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
+            offset = f.tell() + (8 * nrecords) + 2
+            for i, record in enumerate(records):
+                f.write(pack(b'>I', offset))
+                f.write(b'\0' + pack(b'>I', 2*i)[1:])
+                offset += len(record)
+            f.write(b'\0\0')
+
+            for rec in records:
+                f.write(rec)
+