From 6e5547011a18d4a314ec18b45789861681717f17 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 Mar 2009 14:20:12 -0700 Subject: [PATCH] Implement support for reading metadata from Amazon Topaz books --- src/calibre/customize/builtins.py | 13 ++++++++- src/calibre/ebooks/metadata/topaz.py | 40 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/calibre/ebooks/metadata/topaz.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 14d3c79062..a087e7f36d 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -132,13 +132,24 @@ class HTMLMetadataReader(MetadataReaderPlugin): class MOBIMetadataReader(MetadataReaderPlugin): name = 'Read MOBI metadata' - file_types = set(['mobi', 'prc', '.azw']) + file_types = set(['mobi', 'prc', 'azw']) description = _('Read metadata from %s files')%'MOBI' def get_metadata(self, stream, ftype): from calibre.ebooks.mobi.reader import get_metadata return get_metadata(stream) + +class TOPAZMetadataReader(MetadataReaderPlugin): + + name = 'Read Topaz metadata' + file_types = set(['tpz', 'azw1']) + description = _('Read metadata from %s files')%'MOBI' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.topaz import get_metadata + return get_metadata(stream) + class ODTMetadataReader(MetadataReaderPlugin): name = 'Read ODT metadata' diff --git a/src/calibre/ebooks/metadata/topaz.py b/src/calibre/ebooks/metadata/topaz.py new file mode 100644 index 0000000000..55eb9d6e69 --- /dev/null +++ b/src/calibre/ebooks/metadata/topaz.py @@ -0,0 +1,40 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +''' Read metadata from Amazon's topaz format ''' + +def read_record(raw, name): + idx = raw.find(name) + if idx > -1: + length = ord(raw[idx+len(name)]) + return raw[idx+len(name)+1:idx+len(name)+1+length] + +def get_metadata(stream): + raw = stream.read(8*1024) + if not raw.startswith('TPZ'): + raise ValueError('Not a Topaz file') + first = raw.find('metadata') + if first < 0: + raise ValueError('Invalid Topaz file') + second = raw.find('metadata', first+10) + if second < 0: + raise ValueError('Invalid Topaz file') + raw = raw[second:second+1000] + authors = read_record(raw, 'Authors') + if authors: + authors = authors.decode('utf-8', 'replace').split(';') + else: + authors = [_('Unknown')] + title = read_record(raw, 'Title') + if title: + title = title.decode('utf-8', 'replace') + else: + raise ValueError('No metadata in file') + from calibre.ebooks.metadata import MetaInformation + return MetaInformation(title, authors) + +if __name__ == '__main__': + import sys + print get_metadata(open(sys.argv[1], 'rb')) \ No newline at end of file