diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index da820cffbb..ee880000f0 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -163,7 +163,7 @@ def render_html(path_to_html, width=590, height=750, as_xhtml=True): def check_ebook_format(stream, current_guess): ans = current_guess - if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'): + if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'): stream.seek(0) if stream.read(3) == 'TPZ': ans = 'tpz' diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index b9136e5a13..e28389d7f7 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -70,7 +70,7 @@ class ArchiveExtract(FileTypePlugin): fname = fnames[0] ext = os.path.splitext(fname)[1][1:] if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', - 'mp3', 'pdb', 'azw', 'azw1', 'fb2'): + 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'): return archive of = self.temporary_file('_archive_extract.'+ext) diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 61afe3c49c..fd1335ec98 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [ 'html', 'htm', 'xhtml', 'xhtm', 'rtf', 'fb2', 'pdf', 'prc', 'odt', 'epub', 'lit', 'lrx', 'lrf', 'mobi', - 'rb', 'imp', 'azw', 'snb' + 'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb' ] # The priorities for loading metadata from different file types diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 846015f491..e701946c01 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -341,11 +341,14 @@ class MetadataUpdater(object): kindle_pdoc = None share_not_sync = False if mi.author_sort and pas: - authors = mi.author_sort - update_exth_record((100, normalize(authors).encode(self.codec, 'replace'))) + # We want an EXTH field per author... + authors = mi.author_sort.split(' & ') + for author in authors: + update_exth_record((100, normalize(author).encode(self.codec, 'replace'))) elif mi.authors: - authors = ';'.join(mi.authors) - update_exth_record((100, normalize(authors).encode(self.codec, 'replace'))) + authors = mi.authors + for author in authors: + update_exth_record((100, normalize(author).encode(self.codec, 'replace'))) if mi.publisher: update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace'))) if mi.comments: @@ -360,6 +363,7 @@ class MetadataUpdater(object): if mi.isbn: update_exth_record((104, mi.isbn.encode(self.codec, 'replace'))) if mi.tags: + # FIXME: Keep a single subject per EXTH field? subjects = '; '.join(mi.tags) update_exth_record((105, normalize(subjects).encode(self.codec, 'replace'))) diff --git a/src/calibre/ebooks/metadata/rar.py b/src/calibre/ebooks/metadata/rar.py index a9b5d45546..58ca283a1a 100644 --- a/src/calibre/ebooks/metadata/rar.py +++ b/src/calibre/ebooks/metadata/rar.py @@ -32,7 +32,7 @@ def get_metadata(stream): if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', - 'rb', 'imp', 'pdf', 'lrf', 'azw'): + 'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'): with TemporaryDirectory() as tdir: with CurrentDir(tdir): stream = extract_member(path, match=None, name=f, diff --git a/src/calibre/ebooks/metadata/zip.py b/src/calibre/ebooks/metadata/zip.py index 887975b993..7369d2055c 100644 --- a/src/calibre/ebooks/metadata/zip.py +++ b/src/calibre/ebooks/metadata/zip.py @@ -23,7 +23,7 @@ def get_metadata(stream): if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', - 'rb', 'imp', 'pdf', 'lrf', 'azw'): + 'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'): with TemporaryDirectory() as tdir: with CurrentDir(tdir): path = zf.extract(f) diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py index 5b80a46f1b..3b1a0ac2f8 100644 --- a/src/calibre/ebooks/mobi/debug/headers.py +++ b/src/calibre/ebooks/mobi/debug/headers.py @@ -116,61 +116,83 @@ class Record(object): # {{{ # EXTH {{{ class EXTHRecord(object): - def __init__(self, type_, data): + def __init__(self, type_, data, length): self.type = type_ self.data = data + self.length = length self.name = { - 1 : 'DRM Server id', - 2 : 'DRM Commerce id', - 3 : 'DRM ebookbase book id', - 100 : 'author', - 101 : 'publisher', - 102 : 'imprint', - 103 : 'description', - 104 : 'isbn', - 105 : 'subject', - 106 : 'publishingdate', - 107 : 'review', - 108 : 'contributor', - 109 : 'rights', - 110 : 'subjectcode', - 111 : 'type', - 112 : 'source', - 113 : 'asin', - 114 : 'versionnumber', + 1 : 'Drm Server Id', + 2 : 'Drm Commerce Id', + 3 : 'Drm Ebookbase Book Id', + 100 : 'Creator', + 101 : 'Publisher', + 102 : 'Imprint', + 103 : 'Description', + 104 : 'ISBN', + 105 : 'Subject', + 106 : 'Published', + 107 : 'Review', + 108 : 'Contributor', + 109 : 'Rights', + 110 : 'SubjectCode', + 111 : 'Type', + 112 : 'Source', + 113 : 'ASIN', + 114 : 'versionNumber', 115 : 'sample', - 116 : 'startreading', - 117 : 'adult', - 118 : 'retailprice', - 119 : 'retailpricecurrency', - 121 : 'KF8 header section index', - 125 : 'KF8 resources (images/fonts) count', - 129 : 'KF8 cover URI', - 131 : 'KF8 unknown count', - 201 : 'coveroffset', - 202 : 'thumboffset', - 203 : 'hasfakecover', + 116 : 'StartOffset', + 117 : 'Adult', + 118 : 'Price', + 119 : 'Currency', + 121 : 'KF8_Boundary_Section', + 122 : 'fixed-layout', + 123 : 'book-type', + 124 : 'orientation-lock', + 125 : 'KF8_Count_of_Resources_Fonts_Images', + 126 : 'original-resolution', + 127 : 'zero-gutter', + 128 : 'zero-margin', + 129 : 'KF8_Masthead/Cover_Image', + 131 : 'KF8_Unidentified_Count', + 132 : 'RegionMagnification', + 200 : 'DictShortName', + 201 : 'CoverOffset', + 202 : 'ThumbOffset', + 203 : 'Fake Cover', 204 : 'Creator Software', 205 : 'Creator Major Version', # '>I' 206 : 'Creator Minor Version', # '>I' 207 : 'Creator Build Number', # '>I' - 208 : 'watermark', - 209 : 'tamper_proof_keys', - 300 : 'fontsignature', - 301 : 'clippinglimit', # percentage '>B' - 402 : 'publisherlimit', - 404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled - 501 : 'cdetype', # 4 chars (PDOC or EBOK) - 502 : 'lastupdatetime', - 503 : 'updatedtitle', - 524 : 'language', + 208 : 'Watermark', + 209 : 'Tamper Proof Keys [hex]', + 300 : 'Font Signature [hex]', + 301 : 'Clipping Limit [3xx]', # percentage '>B' + 401 : 'Clipping Limit', # percentage '>B' + 402 : 'Publisher Limit', + 404 : 'Text to Speech Disabled', # '>B' 1 - TTS disabled 0 - TTS enabled + 501 : 'CDE Type', # 4 chars (PDOC, EBOK, MAGZ, ...) + 502 : 'last_update_time', + 503 : 'Updated Title', + 504 : 'ASIN [5xx]', + 524 : 'Language', + 525 : 'TextDirection', + 528 : 'Unknown_Logical_Value', + 535 : 'Kindlegen Build-Rev Number', }.get(self.type, repr(self.type)) - if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover', - 'Creator Major Version', 'Creator Minor Version', - 'Creator Build Number', 'Creator Software', 'startreading'} or + if (self.name in {'sample', 'StartOffset', 'CoverOffset', 'ThumbOffset', 'Fake Cover', + 'Creator Software', 'Creator Major Version', 'Creator Minor Version', + 'Creator Build Number', 'Clipping Limit (3xx)', 'Clipping Limit', + 'Publisher Limit', 'Text to Speech Disabled'} or self.type in {121, 125, 131}): - self.data, = struct.unpack(b'>I', self.data) + if self.length == 9: + self.data, = struct.unpack(b'>B', self.data) + elif self.length == 10: + self.data, = struct.unpack(b'>H', self.data) + else: + self.data, = struct.unpack(b'>L', self.data) + elif self.type in {209, 300}: + self.data = bytes(self.data.encode('hex')) def __str__(self): return '%s (%d): %r'%(self.name, self.type, self.data) @@ -181,8 +203,8 @@ class EXTHHeader(object): self.raw = raw if not self.raw.startswith(b'EXTH'): raise ValueError('EXTH header does not start with EXTH') - self.length, = struct.unpack(b'>I', self.raw[4:8]) - self.count, = struct.unpack(b'>I', self.raw[8:12]) + self.length, = struct.unpack(b'>L', self.raw[4:8]) + self.count, = struct.unpack(b'>L', self.raw[8:12]) pos = 12 self.records = [] @@ -199,9 +221,9 @@ class EXTHHeader(object): return getattr(ans, 'data', default) def read_record(self, pos): - type_, length = struct.unpack(b'>II', self.raw[pos:pos+8]) + type_, length = struct.unpack(b'>LL', self.raw[pos:pos+8]) data = self.raw[(pos+8):(pos+length)] - self.records.append(EXTHRecord(type_, data)) + self.records.append(EXTHRecord(type_, data, length)) return pos + length @property diff --git a/src/calibre/ebooks/mobi/debug/mobi6.py b/src/calibre/ebooks/mobi/debug/mobi6.py index fb5674653c..938629e391 100644 --- a/src/calibre/ebooks/mobi/debug/mobi6.py +++ b/src/calibre/ebooks/mobi/debug/mobi6.py @@ -802,7 +802,7 @@ def inspect_mobi(mobi_file, ddir): alltext += rec.raw of.seek(0) - root = html.fromstring(alltext.decode('utf-8')) + root = html.fromstring(alltext.decode(f.mobi_header.encoding)) with open(os.path.join(ddir, 'pretty.html'), 'wb') as of: of.write(html.tostring(root, pretty_print=True, encoding='utf-8', include_meta_content_type=True)) diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py index bfbffe546e..2578e8ec9a 100644 --- a/src/calibre/ebooks/mobi/reader/headers.py +++ b/src/calibre/ebooks/mobi/reader/headers.py @@ -88,7 +88,7 @@ class EXTHHeader(object): # {{{ self.mi.authors = [] au = content.decode(codec, 'ignore').strip() self.mi.authors.append(au) - if re.match(r'\S+?\s*,\s+\S+', au.strip()): + if self.mi.is_null('author_sort') and re.match(r'\S+?\s*,\s+\S+', au.strip()): self.mi.author_sort = au.strip() elif idx == 101: self.mi.publisher = content.decode(codec, 'ignore').strip() diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py index 508b77ce5b..09e2b14bc7 100644 --- a/src/calibre/ebooks/mobi/writer8/exth.py +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -13,6 +13,7 @@ from io import BytesIO from calibre.ebooks.mobi.utils import utf8_text from calibre.utils.localization import lang_as_iso639_1 +from calibre.ebooks.metadata import authors_to_sort_string EXTH_CODES = { 'creator': 100, @@ -54,8 +55,8 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False, items = metadata[term] if term == 'creator': if prefer_author_sort: - creators = [unicode(c.file_as or c) for c in - items][:1] + creators = [authors_to_sort_string([unicode(c)]) for c in + items] else: creators = [unicode(c) for c in items] items = creators diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index ef7ed7a594..4f3e9fc066 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -30,7 +30,7 @@ def get_filters(): (_('LRF Books'), ['lrf']), (_('HTML Books'), ['htm', 'html', 'xhtm', 'xhtml']), (_('LIT Books'), ['lit']), - (_('MOBI Books'), ['mobi', 'prc', 'azw']), + (_('MOBI Books'), ['mobi', 'prc', 'azw', 'azw3']), (_('Topaz books'), ['tpz','azw1']), (_('Text books'), ['txt', 'text', 'rtf']), (_('PDF Books'), ['pdf', 'azw4']),