From 6c42c0ea6cef09a945cd887e0af898d7bafc1733 Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 17:55:15 +0200
Subject: [PATCH 1/4] Produce an output more similar to Mobi Unpack when
 dumping a MOBI header

---
 src/calibre/ebooks/mobi/debug/headers.py | 120 ++++++++++++++---------
 1 file changed, 72 insertions(+), 48 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py
index 5b80a46f1b..7e2fc6f3cb 100644
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@@ -116,61 +116,85 @@ class Record(object): # {{{
 # EXTH {{{
 class EXTHRecord(object):
 
-    def __init__(self, type_, data):
+    def __init__(self, type_, data, length):
         self.type = type_
         self.data = data
+        self.length = length
         self.name = {
-                1 : 'DRM Server id',
-                2 : 'DRM Commerce id',
-                3 : 'DRM ebookbase book id',
-                100 : 'author',
-                101 : 'publisher',
-                102 : 'imprint',
-                103 : 'description',
-                104 : 'isbn',
-                105 : 'subject',
-                106 : 'publishingdate',
-                107 : 'review',
-                108 : 'contributor',
-                109 : 'rights',
-                110 : 'subjectcode',
-                111 : 'type',
-                112 : 'source',
-                113 : 'asin',
-                114 : 'versionnumber',
+                  1 : 'Drm Server Id',
+                  2 : 'Drm Commerce Id',
+                  3 : 'Drm Ebookbase Book Id',
+                100 : 'Creator',
+                101 : 'Publisher',
+                102 : 'Imprint',
+                103 : 'Description',
+                104 : 'ISBN',
+                105 : 'Subject',
+                106 : 'Published',
+                107 : 'Review',
+                108 : 'Contributor',
+                109 : 'Rights',
+                110 : 'SubjectCode',
+                111 : 'Type',
+                112 : 'Source',
+                113 : 'ASIN',
+                114 : 'versionNumber',
                 115 : 'sample',
-                116 : 'startreading',
-                117 : 'adult',
-                118 : 'retailprice',
-                119 : 'retailpricecurrency',
-                121 : 'KF8 header section index',
-                125 : 'KF8 resources (images/fonts) count',
-                129 : 'KF8 cover URI',
-                131 : 'KF8 unknown count',
-                201 : 'coveroffset',
-                202 : 'thumboffset',
-                203 : 'hasfakecover',
+                116 : 'StartOffset',
+                117 : 'Adult',
+                118 : 'Price',
+                119 : 'Currency',
+                121 : 'KF8_Boundary_Section',
+                122 : 'fixed-layout',
+                123 : 'book-type',
+                124 : 'orientation-lock',
+                125 : 'KF8_Count_of_Resources_Fonts_Images',
+                126 : 'original-resolution',
+                127 : 'zero-gutter',
+                128 : 'zero-margin',
+                129 : 'KF8_Masthead/Cover_Image',
+                131 : 'KF8_Unidentified_Count',
+                132 : 'RegionMagnification',
+                200 : 'DictShortName',
+                201 : 'CoverOffset',
+                202 : 'ThumbOffset',
+                203 : 'Fake Cover',
                 204 : 'Creator Software',
                 205 : 'Creator Major Version', # '>I'
                 206 : 'Creator Minor Version', # '>I'
                 207 : 'Creator Build Number', # '>I'
-                208 : 'watermark',
-                209 : 'tamper_proof_keys',
-                300 : 'fontsignature',
-                301 : 'clippinglimit', # percentage '>B'
-                402 : 'publisherlimit',
-                404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
-                501 : 'cdetype', # 4 chars (PDOC or EBOK)
-                502 : 'lastupdatetime',
-                503 : 'updatedtitle',
-                524 : 'language',
+                208 : 'Watermark',
+                209 : 'Tamper Proof Keys [hex]',
+                300 : 'Font Signature [hex]',
+                301 : 'Clipping Limit [3xx]', # percentage '>B'
+                401 : 'Clipping Limit', # percentage '>B'
+                402 : 'Publisher Limit',
+                404 : 'Text to Speech Disabled', # '>B' 1 - TTS disabled 0 - TTS enabled
+                501 : 'CDE Type', # 4 chars (PDOC, EBOK, MAGZ, ...)
+                502 : 'last_update_time',
+                503 : 'Updated Title',
+                504 : 'ASIN [5xx]',
+                524 : 'Language',
+                525 : 'TextDirection',
+                528 : 'Unknown_Logical_Value',
+                535 : 'Kindlegen Build-Rev Number',
         }.get(self.type, repr(self.type))
 
-        if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
-                'Creator Major Version', 'Creator Minor Version',
-                'Creator Build Number', 'Creator Software', 'startreading'} or
+        if (self.name in {'sample', 'StartOffset', 'CoverOffset', 'ThumbOffset', 'Fake Cover',
+                'Creator Software', 'Creator Major Version', 'Creator Minor Version',
+                'Creator Build Number', 'Clipping Limit (3xx)', 'Clipping Limit',
+                'Publisher Limit', 'Text to Speech Disabled'} or
                 self.type in {121, 125, 131}):
-            self.data, = struct.unpack(b'>I', self.data)
+            if self.length == 9:
+                self.data, = struct.unpack(b'>B', self.data)
+            elif self.length == 10:
+                self.data, = struct.unpack(b'>H', self.data)
+            elif self.length == 12:
+                self.data, = struct.unpack(b'>L', self.data)
+            else:
+                self.data, = struct.unpack(b'>I', self.data) # Assume I for unknown sizes...
+        elif self.type in {209, 300}:
+            self.data = bytes(self.data.encode('hex'))
 
     def __str__(self):
         return '%s (%d): %r'%(self.name, self.type, self.data)
@@ -181,8 +205,8 @@ class EXTHHeader(object):
         self.raw = raw
         if not self.raw.startswith(b'EXTH'):
             raise ValueError('EXTH header does not start with EXTH')
-        self.length, = struct.unpack(b'>I', self.raw[4:8])
-        self.count,  = struct.unpack(b'>I', self.raw[8:12])
+        self.length, = struct.unpack(b'>L', self.raw[4:8])
+        self.count,  = struct.unpack(b'>L', self.raw[8:12])
 
         pos = 12
         self.records = []
@@ -199,9 +223,9 @@ class EXTHHeader(object):
         return getattr(ans, 'data', default)
 
     def read_record(self, pos):
-        type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
+        type_, length = struct.unpack(b'>LL', self.raw[pos:pos+8])
         data = self.raw[(pos+8):(pos+length)]
-        self.records.append(EXTHRecord(type_, data))
+        self.records.append(EXTHRecord(type_, data, length))
         return pos + length
 
     @property

From 8297eebb61a5b3987d4d4452dc22377b3ab89690 Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 17:56:15 +0200
Subject: [PATCH 2/4] Don't horribly die when dumping a mobi whose HTML content
 is not encoded in UTF-8

---
 src/calibre/ebooks/mobi/debug/mobi6.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/debug/mobi6.py b/src/calibre/ebooks/mobi/debug/mobi6.py
index fb5674653c..938629e391 100644
--- a/src/calibre/ebooks/mobi/debug/mobi6.py
+++ b/src/calibre/ebooks/mobi/debug/mobi6.py
@@ -802,7 +802,7 @@ def inspect_mobi(mobi_file, ddir):
             alltext += rec.raw
         of.seek(0)
 
-    root = html.fromstring(alltext.decode('utf-8'))
+    root = html.fromstring(alltext.decode(f.mobi_header.encoding))
     with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
         of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
             include_meta_content_type=True))

From ca467cdb2ee2d7ed9c858e54dab6e64f321d514f Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 17:58:09 +0200
Subject: [PATCH 3/4] Split each author into its own EXTH Creator field, even
 with the 'Use author_sort as author' setting. Also, in a slightly unrelated
 note, sprinkle azw3 awareness in a few places.

---
 src/calibre/ebooks/__init__.py          |  2 +-
 src/calibre/ebooks/metadata/archive.py  |  2 +-
 src/calibre/ebooks/metadata/meta.py     |  4 ++--
 src/calibre/ebooks/metadata/mobi.py     | 12 ++++++++----
 src/calibre/ebooks/metadata/rar.py      |  2 +-
 src/calibre/ebooks/metadata/zip.py      |  2 +-
 src/calibre/ebooks/mobi/writer8/exth.py | 10 +++++++++-
 src/calibre/gui2/actions/add.py         |  2 +-
 src/calibre/gui2/wizard/__init__.py     |  2 +-
 9 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index da820cffbb..ee880000f0 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -163,7 +163,7 @@ def render_html(path_to_html, width=590, height=750, as_xhtml=True):
 
 def check_ebook_format(stream, current_guess):
     ans = current_guess
-    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'):
+    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'):
         stream.seek(0)
         if stream.read(3) == 'TPZ':
             ans = 'tpz'
diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py
index b9136e5a13..e28389d7f7 100644
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@@ -70,7 +70,7 @@ class ArchiveExtract(FileTypePlugin):
         fname = fnames[0]
         ext = os.path.splitext(fname)[1][1:]
         if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
-                'mp3', 'pdb', 'azw', 'azw1', 'fb2'):
+                'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'):
             return archive
 
         of = self.temporary_file('_archive_extract.'+ext)
diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index 61afe3c49c..83d109fcef 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
                        'html', 'htm', 'xhtml', 'xhtm',
                        'rtf', 'fb2', 'pdf', 'prc', 'odt',
                        'epub', 'lit', 'lrx', 'lrf', 'mobi',
-                       'rb', 'imp', 'azw', 'snb'
+                       'rb', 'imp', 'azw', 'azw3', 'azw1' 'snb'
                       ]
 
 # The priorities for loading metadata from different file types
@@ -85,7 +85,7 @@ def _get_metadata(stream, stream_type, use_libprs_metadata,
     if stream_type: stream_type = stream_type.lower()
     if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
         stream_type = 'html'
-    if stream_type in ('mobi', 'prc', 'azw'):
+    if stream_type in ('mobi', 'prc', 'azw', 'azw1', 'azw3'):
         stream_type = 'mobi'
     if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'):
         stream_type = 'odt'
diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 846015f491..e701946c01 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -341,11 +341,14 @@ class MetadataUpdater(object):
             kindle_pdoc = None
             share_not_sync = False
         if mi.author_sort and pas:
-            authors = mi.author_sort
-            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
+            # We want an EXTH field per author...
+            authors = mi.author_sort.split(' & ')
+            for author in authors:
+                update_exth_record((100, normalize(author).encode(self.codec, 'replace')))
         elif mi.authors:
-            authors = ';'.join(mi.authors)
-            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
+            authors = mi.authors
+            for author in authors:
+                update_exth_record((100, normalize(author).encode(self.codec, 'replace')))
         if mi.publisher:
             update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
         if mi.comments:
@@ -360,6 +363,7 @@ class MetadataUpdater(object):
         if mi.isbn:
             update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
         if mi.tags:
+            # FIXME: Keep a single subject per EXTH field?
             subjects = '; '.join(mi.tags)
             update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
 
diff --git a/src/calibre/ebooks/metadata/rar.py b/src/calibre/ebooks/metadata/rar.py
index a9b5d45546..58ca283a1a 100644
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@@ -32,7 +32,7 @@ def get_metadata(stream):
         if stream_type:
             stream_type = stream_type[1:]
             if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
-                               'rb', 'imp', 'pdf', 'lrf', 'azw'):
+                               'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'):
                 with TemporaryDirectory() as tdir:
                     with CurrentDir(tdir):
                        stream = extract_member(path, match=None, name=f,
diff --git a/src/calibre/ebooks/metadata/zip.py b/src/calibre/ebooks/metadata/zip.py
index 887975b993..7369d2055c 100644
--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@@ -23,7 +23,7 @@ def get_metadata(stream):
         if stream_type:
             stream_type = stream_type[1:]
             if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
-                               'rb', 'imp', 'pdf', 'lrf', 'azw'):
+                               'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'):
                 with TemporaryDirectory() as tdir:
                     with CurrentDir(tdir):
                         path = zf.extract(f)
diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py
index 508b77ce5b..fa0da0d2ee 100644
--- a/src/calibre/ebooks/mobi/writer8/exth.py
+++ b/src/calibre/ebooks/mobi/writer8/exth.py
@@ -54,8 +54,16 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
         items = metadata[term]
         if term == 'creator':
             if prefer_author_sort:
-                creators = [unicode(c.file_as or c) for c in
+                # This is a bit hackish... We only get the first item in the creators list,
+                # because we only care about the file_as property, and it contains *all* the authors in every creator markup,
+                # so we only need one, or we end up with duplicates ;).
+                # We then end up with a single item in our list, that contains every authors, in author sort syntax, separated by an ' & ' character.
+                # That's not good enough, because we want each author in a separate entry in the list, so we just split this on every & ;).
+                # This way, we properly end up with multiple Creator fields in the EXTH header, one for each author, like KindleGen :).
+                all_creators = [unicode(c.file_as or c) for c in
                         items][:1]
+                for creator in all_creators:
+                    creators = creator.split(' & ')
             else:
                 creators = [unicode(c) for c in items]
             items = creators
diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py
index ef7ed7a594..4f3e9fc066 100644
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@@ -30,7 +30,7 @@ def get_filters():
             (_('LRF Books'), ['lrf']),
             (_('HTML Books'), ['htm', 'html', 'xhtm', 'xhtml']),
             (_('LIT Books'), ['lit']),
-            (_('MOBI Books'), ['mobi', 'prc', 'azw']),
+            (_('MOBI Books'), ['mobi', 'prc', 'azw', 'azw3']),
             (_('Topaz books'), ['tpz','azw1']),
             (_('Text books'), ['txt', 'text', 'rtf']),
             (_('PDF Books'), ['pdf', 'azw4']),
diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index d831307d9a..569c72ae55 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -446,7 +446,7 @@ class KindlePage(QWizardPage, KindleUI):
             if not accounts: accounts = {}
             for y in accounts.values():
                 y[2] = False
-            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
+            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1, AZW3', True, True]
             conf.set('accounts', accounts)
 
     def nextId(self):

From e669ffd8eccedb80f444e124ad97a3b08aa46c5a Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 18:49:13 +0200
Subject: [PATCH 4/4] Nope, Amazon still doesn't allow sending AZW3 over email,
 my bad.

---
 src/calibre/gui2/wizard/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index 569c72ae55..d831307d9a 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -446,7 +446,7 @@ class KindlePage(QWizardPage, KindleUI):
             if not accounts: accounts = {}
             for y in accounts.values():
                 y[2] = False
-            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1, AZW3', True, True]
+            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
             conf.set('accounts', accounts)
 
     def nextId(self):