Part way through normalization of path names in caching, and also performance improvements to ondevice matching

This commit is contained in:
Charles Haley 2010-05-12 15:40:01 +01:00
parent 49e1ca506e
commit 9b7815acf1
2 changed files with 29 additions and 19 deletions

View File

@ -10,6 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
for a particular device. for a particular device.
''' '''
import posixpath
import os import os
import re import re
import json import json
@ -61,7 +62,7 @@ class USBMS(CLI, Device):
# make a dict cache of paths so the lookup in the loop below is faster. # make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {} bl_cache = {}
for idx,b in enumerate(bl): for idx,b in enumerate(bl):
bl_cache[b.path] = idx bl_cache[b.lpath] = idx
self.count_found_in_bl = 0 self.count_found_in_bl = 0
def update_booklist(filename, path, prefix): def update_booklist(filename, path, prefix):
@ -71,9 +72,9 @@ class USBMS(CLI, Device):
lpath = os.path.join(path, filename).partition(prefix)[2] lpath = os.path.join(path, filename).partition(prefix)[2]
if lpath.startswith(os.sep): if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):] lpath = lpath[len(os.sep):]
p = os.path.join(prefix, lpath) idx = bl_cache.get(lpath.replace('\\', '/'), None)
if p in bl_cache: if idx is not None:
item, changed = self.__class__.update_metadata_item(bl[bl_cache[p]]) item, changed = self.__class__.update_metadata_item(bl[idx])
self.count_found_in_bl += 1 self.count_found_in_bl += 1
else: else:
item = self.__class__.book_from_path(prefix, lpath) item = self.__class__.book_from_path(prefix, lpath)
@ -109,6 +110,7 @@ class USBMS(CLI, Device):
# find on the device. If need_sync is True then there were either items # find on the device. If need_sync is True then there were either items
# on the device that were not in bl or some of the items were changed. # on the device that were not in bl or some of the items were changed.
if self.count_found_in_bl != len(bl) or need_sync: if self.count_found_in_bl != len(bl) or need_sync:
print 'resync'
if oncard == 'cardb': if oncard == 'cardb':
self.sync_booklists((None, None, metadata)) self.sync_booklists((None, None, metadata))
elif oncard == 'carda': elif oncard == 'carda':
@ -173,7 +175,7 @@ class USBMS(CLI, Device):
lpath = path.partition(prefix)[2] lpath = path.partition(prefix)[2]
if lpath.startswith(os.sep): if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):] lpath = lpath[len(os.sep):]
lpath = lpath.replace('\\', '/')
book = Book(prefix, lpath, other=info) book = Book(prefix, lpath, other=info)
if book not in booklists[blist]: if book not in booklists[blist]:

View File

@ -1043,29 +1043,37 @@ class DeviceGUI(object):
def set_books_in_library(self, booklists, reset=False): def set_books_in_library(self, booklists, reset=False):
if reset: if reset:
# First build a self.book_in_library_cache of the library, so the search isn't On**2 # First build a cache of the library, so the search isn't On**2
self.book_in_library_cache = {} self.db_book_title_cache = {}
for id, title in self.library_view.model().db.all_titles(): self.db_book_uuid_cache = set()
title = re.sub('(?u)\W|[_]', '', title.lower()) for idx in range(self.library_view.model().db.count()):
if title not in self.book_in_library_cache: mi = self.library_view.model().db.get_metadata(idx, index_is_id=False)
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set(), 'uuids':set()} title = re.sub('(?u)\W|[_]', '', mi.title.lower())
au = self.library_view.model().db.authors(id, index_is_id=True) if title not in self.db_book_title_cache:
authors = au.lower() if au else '' self.db_book_title_cache[title] = {'authors':set(), 'db_ids':set()}
authors = authors_to_string(mi.authors).lower() if mi.authors else ''
authors = re.sub('(?u)\W|[_]', '', authors) authors = re.sub('(?u)\W|[_]', '', authors)
self.book_in_library_cache[title]['authors'].add(authors) self.db_book_title_cache[title]['authors'].add(authors)
self.book_in_library_cache[title]['db_ids'].add(id) self.db_book_title_cache[title]['db_ids'].add(id)
self.book_in_library_cache[title]['uuids'].add(self.library_view.model().db.uuid(id, index_is_id=True)) self.db_book_uuid_cache.add(mi.uuid)
# Now iterate through all the books on the device, setting the in_library field # Now iterate through all the books on the device, setting the in_library field
# Fastest and most accurate key is the uuid. Second is the application_id, which
# is really the db key, but as this can accidentally match across libraries we
# also verify the title. The db_id exists on Sony devices. Fallback is title
# and author match
for booklist in booklists: for booklist in booklists:
for book in booklist: for book in booklist:
if getattr(book, 'uuid', None) in self.db_book_uuid_cache:
self.book_in_library = True
continue
book_title = book.title.lower() if book.title else '' book_title = book.title.lower() if book.title else ''
book_title = re.sub('(?u)\W|[_]', '', book_title) book_title = re.sub('(?u)\W|[_]', '', book_title)
book.in_library = False book.in_library = False
d = self.book_in_library_cache.get(book_title, None) d = self.db_book_title_cache.get(book_title, None)
if d is not None: if d is not None:
if getattr(book, 'uuid', None) in d['uuids'] or \ if getattr(book, 'application_id', None) in d['db_ids']:
getattr(book, 'application_id', None) in d['db_ids']:
book.in_library = True book.in_library = True
continue continue
if book.db_id in d['db_ids']: if book.db_id in d['db_ids']: