Metadata caching, removed db_id from file names.

This commit is contained in:
Charles Haley 2010-05-11 16:25:30 +01:00
parent 866fda04fe
commit 93bcff6a83
12 changed files with 309 additions and 89 deletions

View File

@ -455,6 +455,7 @@ from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, KOBO
from calibre.devices.htc_td2.driver import HTC_TD2
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
from calibre.library.catalog import CSV_XML, EPUB_MOBI
@ -539,6 +540,7 @@ plugins += [
PALMPRE,
KOBO,
AZBOOKA,
HTC_TD2
]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.devices.usbms.driver import USBMS
class HTC_TD2(USBMS):
name = 'HTC TD2 Phone driver'
gui_name = 'HTC TD2'
description = _('Communicate with HTC TD2 phones.')
author = 'Charles Haley'
supported_platforms = ['windows']
# Ordered list of supported formats
FORMATS = ['epub', 'pdf']
VENDOR_ID = {
# HTC
0x0bb4 : { 0x0c30 : [0x000]},
}
EBOOK_DIR_MAIN = ['EBooks']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
'send e-books to on the device. The first one that exists will '
'be used')
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
VENDOR_NAME = ['']
WINDOWS_MAIN_MEM = ['']
# OSX_MAIN_MEM = 'HTC TD2 Phone Media'
# MAIN_MEMORY_VOLUME_LABEL = 'HTC Phone Internal Memory'
SUPPORTS_SUB_DIRS = True
def post_open_callback(self):
opts = self.settings()
dirs = opts.extra_customization
if not dirs:
dirs = self.EBOOK_DIR_MAIN
else:
dirs = [x.strip() for x in dirs.split(',')]
self.EBOOK_DIR_MAIN = dirs

View File

@ -55,13 +55,7 @@ class JETBOOK(USBMS):
au = mi.format_authors()
if not au:
au = 'Unknown'
suffix = ''
if getattr(mi, 'application_id', None) is not None:
base = fname.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if base.endswith(suffix):
suffix = ''
return '%s#%s%s%s' % (au, title, fileext, suffix)
return '%s#%s%s' % (au, title, fileext)
@classmethod
def metadata_from_path(cls, path):

View File

@ -55,7 +55,7 @@ class Book(object):
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else _('Unknown'))
formatter=lambda x: [x if x and x.strip() else _('Unknown')])
mime = book_metadata_field("mime")
rpath = book_metadata_field("path")
id = book_metadata_field("id", formatter=int)

View File

@ -121,6 +121,14 @@ class PRS505(CLI, Device):
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
def filename_callback(self, fname, mi):
if getattr(mi, 'application_id', None) is not None:
base = fname.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if not base.endswith(suffix):
fname = base + suffix + '.' + fname.rpartition('.')[-1]
return fname
def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):

View File

@ -8,25 +8,62 @@ import os
import re
import time
from calibre.ebooks.metadata import MetaInformation
from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList
class Book(object):
class Book(MetaInformation):
def __init__(self, path, title, authors, mime):
self.title = title
self.authors = authors
self.mime = mime
self.size = os.path.getsize(path)
BOOK_ATTRS = ['lpath', 'size', 'mime']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid'
]
def __init__(self, prefix, lpath, size=None, other=None):
from calibre.ebooks.metadata.meta import path_to_ext
MetaInformation.__init__(self, '')
self.path = os.path.join(prefix, lpath)
self.lpath = lpath
self.mime = mime_type_ext(path_to_ext(lpath))
self.size = os.stat(self.path).st_size if size == None else size
self.db_id = None
try:
self.datetime = time.gmtime(os.path.getctime(path))
self.datetime = time.gmtime(os.path.getctime(self.path))
except ValueError:
self.datetime = time.gmtime()
self.path = path
self.thumbnail = None
self.tags = []
if other:
self.smart_update(other)
def __eq__(self, other):
return self.path == other.path
spath = self.path
opath = other.path
if not isinstance(self.path, unicode):
try:
spath = unicode(self.path)
except:
try:
spath = self.path.decode('utf-8')
except:
spath = self.path
if not isinstance(other.path, unicode):
try:
opath = unicode(other.path)
except:
try:
opath = other.path.decode('utf-8')
except:
opath = other.path
return spath == opath
@dynamic_property
def title_sorter(self):
@ -39,24 +76,37 @@ class Book(object):
def thumbnail(self):
return None
def __str__(self):
'''
Return a utf-8 encoded string with title author and path information
'''
return self.title.encode('utf-8') + " by " + \
self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')
# def __str__(self):
# '''
# Return a utf-8 encoded string with title author and path information
# '''
# return self.title.encode('utf-8') + " by " + \
# self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')
@property
def db_id(self):
'''The database id in the application database that this file corresponds to'''
match = re.search(r'_(\d+)$', self.path.rpartition('.')[0])
if match:
return int(match.group(1))
def smart_update(self, other):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
json[attr] = getattr(self, attr)
return json
class BookList(_BookList):
def supports_tags(self):
return False
return True
def set_tags(self, book, tags):
pass
book.tags = tags

View File

@ -784,14 +784,8 @@ class Device(DeviceConfig, DevicePlugin):
def filename_callback(self, default, mi):
'''
Callback to allow drivers to change the default file name
set by :method:`create_upload_path`. By default, add the DB_ID
to the end of the string. Helps with ondevice doc matching
set by :method:`create_upload_path`.
'''
if getattr(mi, 'application_id', None) is not None:
base = default.rpartition('.')[0]
suffix = '_%s'%mi.application_id
if not base.endswith(suffix):
default = base + suffix + '.' + default.rpartition('.')[-1]
return default
def sanitize_path_components(self, components):

View File

@ -11,15 +11,14 @@ for a particular device.
'''
import os
import fnmatch
import re
import json
from itertools import cycle
from calibre.utils.date import now
from calibre.ebooks.metadata import authors_to_string
from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.devices.mime import mime_type_ext
# CLI must come before Device as it implements the CLI functions that
# are inherited from the device interface in Device.
@ -30,7 +29,8 @@ class USBMS(CLI, Device):
supported_platforms = ['windows', 'osx', 'linux']
FORMATS = []
CAN_SET_METADATA = False
CAN_SET_METADATA = True
METADATA_CACHE = 'metadata.calibre'
def get_device_information(self, end_session=True):
self.report_progress(1.0, _('Get device information...'))
@ -38,7 +38,10 @@ class USBMS(CLI, Device):
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
start_time = now()
bl = BookList()
metadata = BookList()
need_sync = False
if oncard == 'carda' and not self._card_a_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
@ -55,6 +58,37 @@ class USBMS(CLI, Device):
self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
self.get_main_ebook_dir()
#print 'after booklist get', now() - start_time
bl, need_sync = self.parse_metadata_cache(prefix, self.METADATA_CACHE)
#print 'after parse_metadata_cache', now() - start_time
# make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {}
for idx,b in enumerate(bl):
bl_cache[b.path] = idx
self.count_found_in_bl = 0
#print 'after make cache', now() - start_time
def update_booklist(filename, path, prefix):
changed = False
if path_to_ext(filename) in self.FORMATS:
try:
lpath = os.path.join(path, filename).partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
p = os.path.join(prefix, lpath)
if p in bl_cache:
item, changed = self.__class__.update_metadata_item(bl[bl_cache[p]])
self.count_found_in_bl += 1
else:
item = self.__class__.book_from_path(prefix, lpath)
changed = True
metadata.append(item)
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
return changed
if isinstance(ebook_dirs, basestring):
ebook_dirs = [ebook_dirs]
for ebook_dir in ebook_dirs:
@ -63,32 +97,33 @@ class USBMS(CLI, Device):
# Get all books in the ebook_dir directory
if self.SUPPORTS_SUB_DIRS:
for path, dirs, files in os.walk(ebook_dir):
# Filter out anything that isn't in the list of supported ebook types
for book_type in self.FORMATS:
match = fnmatch.filter(files, '*.%s' % (book_type))
for i, filename in enumerate(match):
self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
continue
for filename in files:
self.report_progress(50.0, _('Getting list of books on device...'))
changed = update_booklist(filename, path, prefix)
if changed:
need_sync = True
else:
paths = os.listdir(ebook_dir)
for i, filename in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
if path_to_ext(filename) in self.FORMATS:
try:
bl.append(self.__class__.book_from_path(os.path.join(ebook_dir, filename)))
except: # Probably a file name encoding error
import traceback
traceback.print_exc()
continue
changed = update_booklist(filename, ebook_dir, prefix)
if changed:
need_sync = True
# if count != len(bl) then there were items in it that we did not
# find on the device. If need_sync is True then there were either items
# on the device that were not in bl or some of the items were changed.
if self.count_found_in_bl != len(bl) or need_sync:
if oncard == 'cardb':
self.sync_booklists((None, None, metadata))
elif oncard == 'carda':
self.sync_booklists((None, metadata, None))
else:
self.sync_booklists((metadata, None, None))
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
#print 'at return', now() - start_time
return metadata
def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
@ -128,15 +163,28 @@ class USBMS(CLI, Device):
pass
def add_books_to_metadata(self, locations, metadata, booklists):
metadata = iter(metadata)
for i, location in enumerate(locations):
self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
info = metadata.next()
path = location[0]
blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
book = self.book_from_path(path)
if self._main_prefix:
prefix = self._main_prefix if path.startswith(self._main_prefix) else None
if not prefix and self._card_a_prefix:
prefix = self._card_a_prefix if path.startswith(self._card_a_prefix) else None
if not prefix and self._card_b_prefix:
prefix = self._card_b_prefix if path.startswith(self._card_b_prefix) else None
lpath = path.partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
if not book in booklists[blist]:
book = Book(prefix, lpath, other=info)
if book not in booklists[blist]:
booklists[blist].append(book)
self.report_progress(1.0, _('Adding books to device metadata listing...'))
def delete_books(self, paths, end_session=True):
@ -170,13 +218,59 @@ class USBMS(CLI, Device):
self.report_progress(1.0, _('Removing books from device metadata listing...'))
def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated
# as a mass storage device and does not use a meta data xml file like
# the Sony Readers.
print 'in sync_booklists'
if not os.path.exists(self._main_prefix):
os.makedirs(self._main_prefix)
def write_prefix(prefix, listid):
if prefix is not None and isinstance(booklists[listid], BookList):
if not os.path.exists(prefix):
os.makedirs(prefix)
js = [item.to_json() for item in booklists[listid]]
with open(os.path.join(prefix, self.METADATA_CACHE), 'wb') as f:
json.dump(js, f, indent=2, encoding='utf-8')
write_prefix(self._main_prefix, 0)
write_prefix(self._card_a_prefix, 1)
write_prefix(self._card_b_prefix, 2)
self.report_progress(1.0, _('Sending metadata to device...'))
@classmethod
def parse_metadata_cache(cls, prefix, name):
js = []
bl = BookList()
need_sync = False
try:
with open(os.path.join(prefix, name), 'rb') as f:
js = json.load(f, encoding='utf-8')
for item in js:
lpath = item.get('lpath', None)
if not lpath or not os.path.exists(os.path.join(prefix, lpath)):
need_sync = True
continue
book = Book(prefix, lpath)
for key in item.keys():
setattr(book, key, item[key])
bl.append(book)
except:
import traceback
traceback.print_exc()
bl = BookList()
return bl, need_sync
@classmethod
def update_metadata_item(cls, item):
changed = False
size = os.stat(item.path).st_size
if size != item.size:
changed = True
mi = cls.metadata_from_path(item.path)
item.smart_update(mi)
return item, changed
@classmethod
def metadata_from_path(cls, path):
print 'here'
return cls.metadata_from_formats([path])
@classmethod
@ -187,13 +281,11 @@ class USBMS(CLI, Device):
return metadata_from_formats(fmts)
@classmethod
def book_from_path(cls, path):
from calibre.ebooks.metadata.meta import path_to_ext
def book_from_path(cls, prefix, path):
from calibre.ebooks.metadata import MetaInformation
mime = mime_type_ext(path_to_ext(path))
if cls.settings().read_metadata or cls.MUST_READ_METADATA:
mi = cls.metadata_from_path(path)
mi = cls.metadata_from_path(os.path.join(prefix, path))
else:
from calibre.ebooks.metadata.meta import metadata_from_filename
mi = metadata_from_filename(os.path.basename(path),
@ -203,7 +295,5 @@ class USBMS(CLI, Device):
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0],
[_('Unknown')])
authors = authors_to_string(mi.authors)
book = Book(path, mi.title, authors, mime)
book = Book(prefix, path, other=mi)
return book

View File

@ -253,6 +253,16 @@ class MetaInformation(object):
):
setattr(self, x, getattr(mi, x, None))
def print_all_attributes(self):
print 'here'
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type', 'uuid',
):
print x, getattr(self, x, 'None')
def smart_update(self, mi):
'''
Merge the information in C{mi} into self. In case of conflicts, the information

View File

@ -1011,22 +1011,34 @@ class DeviceGUI(object):
book_title = re.sub('(?u)\W|[_]', '', book_title)
if book_title not in self.book_on_device_cache[i]:
self.book_on_device_cache[i][book_title] = \
{'authors':set(), 'db_ids':set()}
{'authors':set(), 'db_ids':set(), 'uuids':set()}
book_authors = authors_to_string(book.authors).lower()
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
self.book_on_device_cache[i][book_title]['authors'].add(book_authors)
self.book_on_device_cache[i][book_title]['db_ids'].add(book.db_id)
id = getattr(book, 'application_id', None)
if id is None:
id = book.db_id
if id is not None:
self.book_on_device_cache[i][book_title]['db_ids'].add(id)
uuid = getattr(book, 'uuid', None)
if uuid is None:
self.book_on_device_cache[i][book_title]['uuids'].add(uuid)
db_title = self.library_view.model().db.title(index, index_is_id=True).lower()
db = self.library_view.model().db
db_title = db.title(index, index_is_id=True).lower()
db_title = re.sub('(?u)\W|[_]', '', db_title)
au = self.library_view.model().db.authors(index, index_is_id=True)
db_authors = au.lower() if au else ''
db_authors = db.authors(index, index_is_id=True)
db_authors = db_authors.lower() if db_authors else ''
db_authors = re.sub('(?u)\W|[_]', '', db_authors)
db_uuid = db.uuid(index, index_is_id=True)
for i, l in enumerate(self.booklists()):
d = self.book_on_device_cache[i].get(db_title, None)
if d and (index in d['db_ids'] or db_authors in d['authors']):
loc[i] = True
break
if d:
if db_uuid in d['uuids'] or \
index in d['db_ids'] or \
db_authors in d['authors']:
loc[i] = True
break
return loc
def set_books_in_library(self, booklists, reset=False):
@ -1036,12 +1048,13 @@ class DeviceGUI(object):
for id, title in self.library_view.model().db.all_titles():
title = re.sub('(?u)\W|[_]', '', title.lower())
if title not in self.book_in_library_cache:
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set()}
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set(), 'uuids':set()}
au = self.library_view.model().db.authors(id, index_is_id=True)
authors = au.lower() if au else ''
authors = re.sub('(?u)\W|[_]', '', authors)
self.book_in_library_cache[title]['authors'].add(authors)
self.book_in_library_cache[title]['db_ids'].add(id)
self.book_in_library_cache[title]['uuids'].add(self.library_view.model().db.uuid(id, index_is_id=True))
# Now iterate through all the books on the device, setting the in_library field
for booklist in booklists:
@ -1051,6 +1064,10 @@ class DeviceGUI(object):
book.in_library = False
d = self.book_in_library_cache.get(book_title, None)
if d is not None:
if getattr(book, 'uuid', None) in d['uuids'] or \
getattr(book, 'application_id', None) in d['db_ids']:
book.in_library = True
continue
if book.db_id in d['db_ids']:
book.in_library = True
continue

View File

@ -1248,7 +1248,7 @@ class OnDeviceSearch(SearchQueryParser):
locations = ['title', 'author', 'tag', 'format'] if location == 'all' else [location]
q = {
'title' : lambda x : getattr(x, 'title').lower(),
'author': lambda x: getattr(x, 'authors').lower(),
'author': lambda x: ' & '.join(getattr(x, 'authors')).lower(),
'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
'format':lambda x: os.path.splitext(x.path)[1].lower()
}
@ -1447,9 +1447,8 @@ class DeviceBooksModel(BooksModel):
if not au:
au = self.unknown
if role == Qt.EditRole:
return QVariant(au)
authors = string_to_authors(au)
return QVariant(" & ".join(authors))
return QVariant(authors_to_string(au))
return QVariant(" & ".join(au))
elif col == 2:
size = self.db[self.map[row]].size
return QVariant(BooksView.human_readable(size))