On device metadata caching

This commit is contained in:
Kovid Goyal 2010-05-11 10:30:12 -06:00
commit 5f3bc55517
12 changed files with 302 additions and 91 deletions

View File

@ -455,6 +455,7 @@ from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, KOBO
from calibre.devices.htc_td2.driver import HTC_TD2
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
from calibre.library.catalog import CSV_XML, EPUB_MOBI
@ -539,6 +540,7 @@ plugins += [
PALMPRE,
KOBO,
AZBOOKA,
HTC_TD2
]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.devices.usbms.driver import USBMS
class HTC_TD2(USBMS):
name = 'HTC TD2 Phone driver'
gui_name = 'HTC TD2'
description = _('Communicate with HTC TD2 phones.')
author = 'Charles Haley'
supported_platforms = ['osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'pdf']
VENDOR_ID = {
# HTC
0x0bb4 : { 0x0c30 : [0x000]},
}
EBOOK_DIR_MAIN = ['EBooks']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
'send e-books to on the device. The first one that exists will '
'be used')
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
VENDOR_NAME = ['']
WINDOWS_MAIN_MEM = ['']
MAIN_MEMORY_VOLUME_LABEL = 'HTC Phone Internal Memory'
SUPPORTS_SUB_DIRS = True
def post_open_callback(self):
opts = self.settings()
dirs = opts.extra_customization
if not dirs:
dirs = self.EBOOK_DIR_MAIN
else:
dirs = [x.strip() for x in dirs.split(',')]
self.EBOOK_DIR_MAIN = dirs

View File

@ -55,13 +55,7 @@ class JETBOOK(USBMS):
au = mi.format_authors()
if not au:
au = 'Unknown'
suffix = ''
if getattr(mi, 'application_id', None) is not None:
base = fname.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if base.endswith(suffix):
suffix = ''
return '%s#%s%s%s' % (au, title, fileext, suffix)
return '%s#%s%s' % (au, title, fileext)
@classmethod
def metadata_from_path(cls, path):

View File

@ -55,7 +55,7 @@ class Book(object):
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else _('Unknown'))
formatter=lambda x: [x if x and x.strip() else _('Unknown')])
mime = book_metadata_field("mime")
rpath = book_metadata_field("path")
id = book_metadata_field("id", formatter=int)

View File

@ -121,6 +121,14 @@ class PRS505(CLI, Device):
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
def filename_callback(self, fname, mi):
if getattr(mi, 'application_id', None) is not None:
base = fname.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if not base.endswith(suffix):
fname = base + suffix + '.' + fname.rpartition('.')[-1]
return fname
def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):

View File

@ -8,25 +8,63 @@ import os
import re
import time
from calibre.ebooks.metadata import MetaInformation
from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList
from calibre.constants import filesystem_encoding
class Book(object):
class Book(MetaInformation):
def __init__(self, path, title, authors, mime):
self.title = title
self.authors = authors
self.mime = mime
self.size = os.path.getsize(path)
BOOK_ATTRS = ['lpath', 'size', 'mime']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid'
]
def __init__(self, prefix, lpath, size=None, other=None):
from calibre.ebooks.metadata.meta import path_to_ext
MetaInformation.__init__(self, '')
self.path = os.path.join(prefix, lpath)
self.lpath = lpath
self.mime = mime_type_ext(path_to_ext(lpath))
self.size = os.stat(self.path).st_size if size == None else size
self.db_id = None
try:
self.datetime = time.gmtime(os.path.getctime(path))
self.datetime = time.gmtime(os.path.getctime(self.path))
except ValueError:
self.datetime = time.gmtime()
self.path = path
self.thumbnail = None
self.tags = []
if other:
self.smart_update(other)
def __eq__(self, other):
return self.path == other.path
spath = self.path
opath = other.path
if not isinstance(self.path, unicode):
try:
spath = unicode(self.path)
except:
try:
spath = self.path.decode(filesystem_encoding)
except:
spath = self.path
if not isinstance(other.path, unicode):
try:
opath = unicode(other.path)
except:
try:
opath = other.path.decode(filesystem_encoding)
except:
opath = other.path
return spath == opath
@dynamic_property
def title_sorter(self):
@ -39,24 +77,37 @@ class Book(object):
def thumbnail(self):
return None
def __str__(self):
'''
Return a utf-8 encoded string with title author and path information
'''
return self.title.encode('utf-8') + " by " + \
self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')
# def __str__(self):
# '''
# Return a utf-8 encoded string with title author and path information
# '''
# return self.title.encode('utf-8') + " by " + \
# self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')
@property
def db_id(self):
'''The database id in the application database that this file corresponds to'''
match = re.search(r'_(\d+)$', self.path.rpartition('.')[0])
if match:
return int(match.group(1))
def smart_update(self, other):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
json[attr] = getattr(self, attr)
return json
class BookList(_BookList):
def supports_tags(self):
return False
return True
def set_tags(self, book, tags):
pass
book.tags = tags

View File

@ -784,14 +784,8 @@ class Device(DeviceConfig, DevicePlugin):
def filename_callback(self, default, mi):
'''
Callback to allow drivers to change the default file name
set by :method:`create_upload_path`. By default, add the DB_ID
to the end of the string. Helps with ondevice doc matching
set by :method:`create_upload_path`.
'''
if getattr(mi, 'application_id', None) is not None:
base = default.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if not base.endswith(suffix):
default = base + suffix + '.' + default.rpartition('.')[-1]
return default
def sanitize_path_components(self, components):

View File

@ -11,15 +11,13 @@ for a particular device.
'''
import os
import fnmatch
import re
import json
from itertools import cycle
from calibre.ebooks.metadata import authors_to_string
from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.devices.mime import mime_type_ext
# CLI must come before Device as it implements the CLI functions that
# are inherited from the device interface in Device.
@ -30,7 +28,8 @@ class USBMS(CLI, Device):
supported_platforms = ['windows', 'osx', 'linux']
FORMATS = []
CAN_SET_METADATA = False
CAN_SET_METADATA = True
METADATA_CACHE = 'metadata.calibre'
def get_device_information(self, end_session=True):
self.report_progress(1.0, _('Get device information...'))
@ -39,6 +38,8 @@ class USBMS(CLI, Device):
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
bl = BookList()
metadata = BookList()
need_sync = False
if oncard == 'carda' and not self._card_a_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
@ -55,6 +56,34 @@ class USBMS(CLI, Device):
self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
self.get_main_ebook_dir()
bl, need_sync = self.parse_metadata_cache(prefix, self.METADATA_CACHE)
# make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {}
for idx,b in enumerate(bl):
bl_cache[b.path] = idx
self.count_found_in_bl = 0
def update_booklist(filename, path, prefix):
changed = False
if path_to_ext(filename) in self.FORMATS:
try:
lpath = os.path.join(path, filename).partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
p = os.path.join(prefix, lpath)
if p in bl_cache:
item, changed = self.__class__.update_metadata_item(bl[bl_cache[p]])
self.count_found_in_bl += 1
else:
item = self.__class__.book_from_path(prefix, lpath)
changed = True
metadata.append(item)
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
return changed
if isinstance(ebook_dirs, basestring):
ebook_dirs = [ebook_dirs]
for ebook_dir in ebook_dirs:
@ -63,32 +92,33 @@ class USBMS(CLI, Device):
# Get all books in the ebook_dir directory
if self.SUPPORTS_SUB_DIRS:
for path, dirs, files in os.walk(ebook_dir):
# Filter out anything that isn't in the list of supported ebook types
for book_type in self.FORMATS:
match = fnmatch.filter(files, '*.%s' % (book_type))
for i, filename in enumerate(match):
self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
continue
for filename in files:
self.report_progress(50.0, _('Getting list of books on device...'))
changed = update_booklist(filename, path, prefix)
if changed:
need_sync = True
else:
paths = os.listdir(ebook_dir)
for i, filename in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
if path_to_ext(filename) in self.FORMATS:
try:
bl.append(self.__class__.book_from_path(os.path.join(ebook_dir, filename)))
except: # Probably a file name encoding error
import traceback
traceback.print_exc()
continue
changed = update_booklist(filename, ebook_dir, prefix)
if changed:
need_sync = True
# if count != len(bl) then there were items in it that we did not
# find on the device. If need_sync is True then there were either items
# on the device that were not in bl or some of the items were changed.
if self.count_found_in_bl != len(bl) or need_sync:
if oncard == 'cardb':
self.sync_booklists((None, None, metadata))
elif oncard == 'carda':
self.sync_booklists((None, metadata, None))
else:
self.sync_booklists((metadata, None, None))
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
#print 'at return', now() - start_time
return metadata
def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
@ -128,15 +158,28 @@ class USBMS(CLI, Device):
pass
def add_books_to_metadata(self, locations, metadata, booklists):
metadata = iter(metadata)
for i, location in enumerate(locations):
self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
info = metadata.next()
path = location[0]
blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
book = self.book_from_path(path)
if self._main_prefix:
prefix = self._main_prefix if path.startswith(self._main_prefix) else None
if not prefix and self._card_a_prefix:
prefix = self._card_a_prefix if path.startswith(self._card_a_prefix) else None
if not prefix and self._card_b_prefix:
prefix = self._card_b_prefix if path.startswith(self._card_b_prefix) else None
lpath = path.partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
if not book in booklists[blist]:
book = Book(prefix, lpath, other=info)
if book not in booklists[blist]:
booklists[blist].append(book)
self.report_progress(1.0, _('Adding books to device metadata listing...'))
def delete_books(self, paths, end_session=True):
@ -170,11 +213,55 @@ class USBMS(CLI, Device):
self.report_progress(1.0, _('Removing books from device metadata listing...'))
def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated
# as a mass storage device and does not use a meta data xml file like
# the Sony Readers.
if not os.path.exists(self._main_prefix):
os.makedirs(self._main_prefix)
def write_prefix(prefix, listid):
if prefix is not None and isinstance(booklists[listid], BookList):
if not os.path.exists(prefix):
os.makedirs(prefix)
js = [item.to_json() for item in booklists[listid]]
with open(os.path.join(prefix, self.METADATA_CACHE), 'wb') as f:
json.dump(js, f, indent=2, encoding='utf-8')
write_prefix(self._main_prefix, 0)
write_prefix(self._card_a_prefix, 1)
write_prefix(self._card_b_prefix, 2)
self.report_progress(1.0, _('Sending metadata to device...'))
@classmethod
def parse_metadata_cache(cls, prefix, name):
js = []
bl = BookList()
need_sync = False
try:
with open(os.path.join(prefix, name), 'rb') as f:
js = json.load(f, encoding='utf-8')
for item in js:
lpath = item.get('lpath', None)
if not lpath or not os.path.exists(os.path.join(prefix, lpath)):
need_sync = True
continue
book = Book(prefix, lpath)
for key in item.keys():
setattr(book, key, item[key])
bl.append(book)
except:
import traceback
traceback.print_exc()
bl = BookList()
return bl, need_sync
@classmethod
def update_metadata_item(cls, item):
changed = False
size = os.stat(item.path).st_size
if size != item.size:
changed = True
mi = cls.metadata_from_path(item.path)
item.smart_update(mi)
return item, changed
@classmethod
def metadata_from_path(cls, path):
return cls.metadata_from_formats([path])
@ -187,13 +274,11 @@ class USBMS(CLI, Device):
return metadata_from_formats(fmts)
@classmethod
def book_from_path(cls, path):
from calibre.ebooks.metadata.meta import path_to_ext
def book_from_path(cls, prefix, path):
from calibre.ebooks.metadata import MetaInformation
mime = mime_type_ext(path_to_ext(path))
if cls.settings().read_metadata or cls.MUST_READ_METADATA:
mi = cls.metadata_from_path(path)
mi = cls.metadata_from_path(os.path.join(prefix, path))
else:
from calibre.ebooks.metadata.meta import metadata_from_filename
mi = metadata_from_filename(os.path.basename(path),
@ -203,7 +288,5 @@ class USBMS(CLI, Device):
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0],
[_('Unknown')])
authors = authors_to_string(mi.authors)
book = Book(path, mi.title, authors, mime)
book = Book(prefix, path, other=mi)
return book

View File

@ -10,7 +10,7 @@ import os, mimetypes, sys, re
from urllib import unquote, quote
from urlparse import urlparse
from calibre import relpath
from calibre import relpath, prints
from calibre.utils.config import tweaks
from calibre.utils.date import isoformat
@ -253,6 +253,15 @@ class MetaInformation(object):
):
setattr(self, x, getattr(mi, x, None))
def print_all_attributes(self):
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type', 'uuid',
):
prints(x, getattr(self, x, 'None'))
def smart_update(self, mi):
'''
Merge the information in C{mi} into self. In case of conflicts, the information

View File

@ -1011,20 +1011,32 @@ class DeviceGUI(object):
book_title = re.sub('(?u)\W|[_]', '', book_title)
if book_title not in self.book_on_device_cache[i]:
self.book_on_device_cache[i][book_title] = \
{'authors':set(), 'db_ids':set()}
{'authors':set(), 'db_ids':set(), 'uuids':set()}
book_authors = authors_to_string(book.authors).lower()
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
self.book_on_device_cache[i][book_title]['authors'].add(book_authors)
self.book_on_device_cache[i][book_title]['db_ids'].add(book.db_id)
id = getattr(book, 'application_id', None)
if id is None:
id = book.db_id
if id is not None:
self.book_on_device_cache[i][book_title]['db_ids'].add(id)
uuid = getattr(book, 'uuid', None)
if uuid is None:
self.book_on_device_cache[i][book_title]['uuids'].add(uuid)
db_title = self.library_view.model().db.title(index, index_is_id=True).lower()
db = self.library_view.model().db
db_title = db.title(index, index_is_id=True).lower()
db_title = re.sub('(?u)\W|[_]', '', db_title)
au = self.library_view.model().db.authors(index, index_is_id=True)
db_authors = au.lower() if au else ''
db_authors = db.authors(index, index_is_id=True)
db_authors = db_authors.lower() if db_authors else ''
db_authors = re.sub('(?u)\W|[_]', '', db_authors)
db_uuid = db.uuid(index, index_is_id=True)
for i, l in enumerate(self.booklists()):
d = self.book_on_device_cache[i].get(db_title, None)
if d and (index in d['db_ids'] or db_authors in d['authors']):
if d:
if db_uuid in d['uuids'] or \
index in d['db_ids'] or \
db_authors in d['authors']:
loc[i] = True
break
return loc
@ -1036,12 +1048,13 @@ class DeviceGUI(object):
for id, title in self.library_view.model().db.all_titles():
title = re.sub('(?u)\W|[_]', '', title.lower())
if title not in self.book_in_library_cache:
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set()}
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set(), 'uuids':set()}
au = self.library_view.model().db.authors(id, index_is_id=True)
authors = au.lower() if au else ''
authors = re.sub('(?u)\W|[_]', '', authors)
self.book_in_library_cache[title]['authors'].add(authors)
self.book_in_library_cache[title]['db_ids'].add(id)
self.book_in_library_cache[title]['uuids'].add(self.library_view.model().db.uuid(id, index_is_id=True))
# Now iterate through all the books on the device, setting the in_library field
for booklist in booklists:
@ -1051,6 +1064,10 @@ class DeviceGUI(object):
book.in_library = False
d = self.book_in_library_cache.get(book_title, None)
if d is not None:
if getattr(book, 'uuid', None) in d['uuids'] or \
getattr(book, 'application_id', None) in d['db_ids']:
book.in_library = True
continue
if book.db_id in d['db_ids']:
book.in_library = True
continue

View File

@ -17,7 +17,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
SIGNAL, QObject, QSize, QModelIndex, QDate
from calibre import strftime
from calibre.ebooks.metadata import string_to_authors, fmt_sidx, authors_to_string
from calibre.ebooks.metadata import fmt_sidx, authors_to_string
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.gui2 import NONE, TableView, config, error_dialog, UNDEFINED_QDATE
from calibre.gui2.dialogs.comments_dialog import CommentsDialog
@ -1248,7 +1248,7 @@ class OnDeviceSearch(SearchQueryParser):
locations = ['title', 'author', 'tag', 'format'] if location == 'all' else [location]
q = {
'title' : lambda x : getattr(x, 'title').lower(),
'author': lambda x: getattr(x, 'authors').lower(),
'author': lambda x: ' & '.join(getattr(x, 'authors')).lower(),
'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
'format':lambda x: os.path.splitext(x.path)[1].lower()
}
@ -1447,9 +1447,8 @@ class DeviceBooksModel(BooksModel):
if not au:
au = self.unknown
if role == Qt.EditRole:
return QVariant(au)
authors = string_to_authors(au)
return QVariant(" & ".join(authors))
return QVariant(authors_to_string(au))
return QVariant(" & ".join(au))
elif col == 2:
size = self.db[self.map[row]].size
return QVariant(BooksView.human_readable(size))