Implement support for reading metadata from Amazon Topaz books

This commit is contained in:
Kovid Goyal 2009-03-15 14:20:12 -07:00
parent f785c76883
commit 6e5547011a
2 changed files with 52 additions and 1 deletions

View File

@ -132,13 +132,24 @@ class HTMLMetadataReader(MetadataReaderPlugin):
class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata'
file_types = set(['mobi', 'prc', '.azw'])
file_types = set(['mobi', 'prc', 'azw'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.mobi.reader import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin):
name = 'Read Topaz metadata'
file_types = set(['tpz', 'azw1'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.topaz import get_metadata
return get_metadata(stream)
class ODTMetadataReader(MetadataReaderPlugin):
name = 'Read ODT metadata'

View File

@ -0,0 +1,40 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
''' Read metadata from Amazon's topaz format '''
def read_record(raw, name):
idx = raw.find(name)
if idx > -1:
length = ord(raw[idx+len(name)])
return raw[idx+len(name)+1:idx+len(name)+1+length]
def get_metadata(stream):
raw = stream.read(8*1024)
if not raw.startswith('TPZ'):
raise ValueError('Not a Topaz file')
first = raw.find('metadata')
if first < 0:
raise ValueError('Invalid Topaz file')
second = raw.find('metadata', first+10)
if second < 0:
raise ValueError('Invalid Topaz file')
raw = raw[second:second+1000]
authors = read_record(raw, 'Authors')
if authors:
authors = authors.decode('utf-8', 'replace').split(';')
else:
authors = [_('Unknown')]
title = read_record(raw, 'Title')
if title:
title = title.decode('utf-8', 'replace')
else:
raise ValueError('No metadata in file')
from calibre.ebooks.metadata import MetaInformation
return MetaInformation(title, authors)
if __name__ == '__main__':
import sys
print get_metadata(open(sys.argv[1], 'rb'))