Part way through normalization of path names in caching, and also performance improvements to ondevice matching

This commit is contained in:
Charles Haley 2010-05-12 15:40:01 +01:00
parent 49e1ca506e
commit 9b7815acf1
2 changed files with 29 additions and 19 deletions

View File

@ -10,6 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
for a particular device.
'''
import posixpath
import os
import re
import json
@ -61,7 +62,7 @@ class USBMS(CLI, Device):
# make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {}
for idx,b in enumerate(bl):
bl_cache[b.path] = idx
bl_cache[b.lpath] = idx
self.count_found_in_bl = 0
def update_booklist(filename, path, prefix):
@ -71,9 +72,9 @@ class USBMS(CLI, Device):
lpath = os.path.join(path, filename).partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
p = os.path.join(prefix, lpath)
if p in bl_cache:
item, changed = self.__class__.update_metadata_item(bl[bl_cache[p]])
idx = bl_cache.get(lpath.replace('\\', '/'), None)
if idx is not None:
item, changed = self.__class__.update_metadata_item(bl[idx])
self.count_found_in_bl += 1
else:
item = self.__class__.book_from_path(prefix, lpath)
@ -109,6 +110,7 @@ class USBMS(CLI, Device):
# find on the device. If need_sync is True then there were either items
# on the device that were not in bl or some of the items were changed.
if self.count_found_in_bl != len(bl) or need_sync:
print 'resync'
if oncard == 'cardb':
self.sync_booklists((None, None, metadata))
elif oncard == 'carda':
@ -173,7 +175,7 @@ class USBMS(CLI, Device):
lpath = path.partition(prefix)[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
lpath = lpath.replace('\\', '/')
book = Book(prefix, lpath, other=info)
if book not in booklists[blist]:

View File

@ -1043,29 +1043,37 @@ class DeviceGUI(object):
def set_books_in_library(self, booklists, reset=False):
if reset:
# First build a self.book_in_library_cache of the library, so the search isn't On**2
self.book_in_library_cache = {}
for id, title in self.library_view.model().db.all_titles():
title = re.sub('(?u)\W|[_]', '', title.lower())
if title not in self.book_in_library_cache:
self.book_in_library_cache[title] = {'authors':set(), 'db_ids':set(), 'uuids':set()}
au = self.library_view.model().db.authors(id, index_is_id=True)
authors = au.lower() if au else ''
# First build a cache of the library, so the search isn't On**2
self.db_book_title_cache = {}
self.db_book_uuid_cache = set()
for idx in range(self.library_view.model().db.count()):
mi = self.library_view.model().db.get_metadata(idx, index_is_id=False)
title = re.sub('(?u)\W|[_]', '', mi.title.lower())
if title not in self.db_book_title_cache:
self.db_book_title_cache[title] = {'authors':set(), 'db_ids':set()}
authors = authors_to_string(mi.authors).lower() if mi.authors else ''
authors = re.sub('(?u)\W|[_]', '', authors)
self.book_in_library_cache[title]['authors'].add(authors)
self.book_in_library_cache[title]['db_ids'].add(id)
self.book_in_library_cache[title]['uuids'].add(self.library_view.model().db.uuid(id, index_is_id=True))
self.db_book_title_cache[title]['authors'].add(authors)
self.db_book_title_cache[title]['db_ids'].add(id)
self.db_book_uuid_cache.add(mi.uuid)
# Now iterate through all the books on the device, setting the in_library field
# Fastest and most accurate key is the uuid. Second is the application_id, which
# is really the db key, but as this can accidentally match across libraries we
# also verify the title. The db_id exists on Sony devices. Fallback is title
# and author match
for booklist in booklists:
for book in booklist:
if getattr(book, 'uuid', None) in self.db_book_uuid_cache:
self.book_in_library = True
continue
book_title = book.title.lower() if book.title else ''
book_title = re.sub('(?u)\W|[_]', '', book_title)
book.in_library = False
d = self.book_in_library_cache.get(book_title, None)
d = self.db_book_title_cache.get(book_title, None)
if d is not None:
if getattr(book, 'uuid', None) in d['uuids'] or \
getattr(book, 'application_id', None) in d['db_ids']:
if getattr(book, 'application_id', None) in d['db_ids']:
book.in_library = True
continue
if book.db_id in d['db_ids']: