Fix #2058 (Calibre reads Kindle metadata wrong)

This commit is contained in:
Kovid Goyal 2009-03-15 13:09:45 -07:00
parent 7294199798
commit fad3f7a7d0
5 changed files with 51 additions and 28 deletions

View File

@ -26,12 +26,16 @@ mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
mimetypes.add_type('application/x-font-opentype', '.otf')
mimetypes.add_type('application/x-font-truetype', '.ttf')
mimetypes.add_type('application/oebps-package+xml', '.opf')
mimetypes.add_type('application/ereader', '.pdb')
mimetypes.add_type('application/mobi', '.mobi')
mimetypes.add_type('application/mobi', '.prc')
mimetypes.add_type('application/mobi', '.azw')
guess_type = mimetypes.guess_type
import cssutils
cssutils.log.setLevel(logging.WARN)

View File

@ -4,9 +4,9 @@ __copyright__ = '2009, John Schember <john at nachtimwald.com>'
Device driver for Amazon's Kindle
'''
import os
import os, re
from calibre.devices.usbms.driver import USBMS
from calibre.devices.usbms.driver import USBMS, metadata_from_formats
class KINDLE(USBMS):
# Ordered list of supported formats
@ -29,6 +29,9 @@ class KINDLE(USBMS):
EBOOK_DIR_MAIN = "documents"
EBOOK_DIR_CARD = "documents"
SUPPORTS_SUB_DIRS = True
WIRELESS_FILE_NAME_PATTERN = re.compile(
r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
def delete_books(self, paths, end_session=True):
for path in paths:
@ -40,6 +43,16 @@ class KINDLE(USBMS):
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')
@classmethod
def metadata_from_path(cls, path):
mi = metadata_from_formats([path])
if mi.title == _('Unknown') or ('-asin' in mi.title and '-type' in mi.title):
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
if match is not None:
mi.title = match.group('title')
return mi
class KINDLE2(KINDLE):

View File

@ -1,19 +1,20 @@
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
'''
Global Mime mapping of ebook types.
'''
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
MIME_MAP = {
'azw' : 'application/azw',
'epub' : 'application/epub+zip',
'html' : 'text/html',
'lrf' : 'application/x-sony-bbeb',
'lrx' : 'application/x-sony-bbeb',
'mobi' : 'application/mobi',
'pdf' : 'application/pdf',
'prc' : 'application/prc',
'rtf' : 'application/rtf',
'txt' : 'text/plain',
}
from calibre import guess_type
def _mt(path):
mt = guess_type(path)[0]
if not mt:
mt = 'application/octet-stream'
return mt
def mime_type_ext(ext):
if not ext.startswith('.'):
ext = '.'+ext
return _mt('a'+ext)
def mime_type_path(path):
return _mt(path)

View File

@ -15,7 +15,7 @@ from calibre.ebooks.metadata import authors_to_string
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.devices.errors import FreeSpaceError, PathError
from calibre.devices.mime import MIME_MAP
from calibre.devices.mime import mime_type_ext
class File(object):
def __init__(self, path):
@ -216,14 +216,17 @@ class USBMS(Device):
if not os.path.isdir(path):
os.utime(path, None)
@classmethod
def metadata_from_path(cls, path):
return metadata_from_formats([path])
@classmethod
def book_from_path(cls, path):
fileext = path_to_ext(path)
mi = metadata_from_formats([path])
mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
mi = cls.metadata_from_path(path)
mime = mime_type_ext(fileext)
authors = authors_to_string(mi.authors)
return Book(path, mi.title, authors, mime)
book = Book(path, mi.title, authors, mime)
return book

View File

@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc', 'odt',
'epub', 'lit', 'lrx', 'lrf', 'mobi',
'rb', 'imp'
'rb', 'imp', 'azw'
]
# The priorities for loading metadata from different file types
@ -41,7 +41,9 @@ def metadata_from_formats(formats):
for path, ext in zip(formats, extensions):
with open(path, 'rb') as stream:
try:
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
newmi = get_metadata(stream, stream_type=ext,
use_libprs_metadata=True)
mi.smart_update(newmi)
except:
continue
if getattr(mi, 'application_id', None) is not None:
@ -58,7 +60,7 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
if stream_type: stream_type = stream_type.lower()
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
stream_type = 'html'
if stream_type in ('mobi', 'prc'):
if stream_type in ('mobi', 'prc', 'azw'):
stream_type = 'mobi'
if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'):
stream_type = 'odt'