mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Improved get_categories -- approximately 6 times faster
This commit is contained in:
parent
f1911aa270
commit
d2ff37b5a1
@ -14,7 +14,7 @@ from operator import itemgetter
|
|||||||
|
|
||||||
from PyQt4.QtGui import QImage
|
from PyQt4.QtGui import QImage
|
||||||
|
|
||||||
|
from calibre import prints
|
||||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.library.database import LibraryDatabase
|
from calibre.library.database import LibraryDatabase
|
||||||
@ -1039,43 +1039,170 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
tn=field['table'], col=field['link_column']), (id_,))
|
tn=field['table'], col=field['link_column']), (id_,))
|
||||||
return set(x[0] for x in ans)
|
return set(x[0] for x in ans)
|
||||||
|
|
||||||
|
########## data structures for get_categories
|
||||||
|
|
||||||
CATEGORY_SORTS = ('name', 'popularity', 'rating')
|
CATEGORY_SORTS = ('name', 'popularity', 'rating')
|
||||||
|
|
||||||
def get_categories(self, sort='name', ids=None, icon_map=None):
|
class TCat_Tag(object):
|
||||||
self.books_list_filter.change([] if not ids else ids)
|
|
||||||
|
|
||||||
categories = {}
|
def __init__(self, name, sort):
|
||||||
|
self.n = name
|
||||||
|
self.s = sort
|
||||||
|
self.c = 0
|
||||||
|
self.rt = 0
|
||||||
|
self.rc = 0
|
||||||
|
self.id = None
|
||||||
|
|
||||||
|
def set_all(self, c, rt, rc, id):
|
||||||
|
self.c = c
|
||||||
|
self.rt = rt
|
||||||
|
self.rc = rc
|
||||||
|
self.id = id
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return unicode(self)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return 'n=%s s=%s c=%d rt=%d rc=%d id=%s'%\
|
||||||
|
(self.n, self.s, self.c, self.rt, self.rc, self.id)
|
||||||
|
|
||||||
|
|
||||||
|
def get_categories(self, sort='name', ids=None, icon_map=None):
|
||||||
|
start = time.time()
|
||||||
if icon_map is not None and type(icon_map) != TagsIcons:
|
if icon_map is not None and type(icon_map) != TagsIcons:
|
||||||
raise TypeError('icon_map passed to get_categories must be of type TagIcons')
|
raise TypeError('icon_map passed to get_categories must be of type TagIcons')
|
||||||
|
if sort not in self.CATEGORY_SORTS:
|
||||||
|
raise ValueError('sort ' + sort + ' not a valid value')
|
||||||
|
|
||||||
|
self.books_list_filter.change([] if not ids else ids)
|
||||||
|
id_filter = None if not ids else frozenset(ids)
|
||||||
|
|
||||||
tb_cats = self.field_metadata
|
tb_cats = self.field_metadata
|
||||||
#### First, build the standard and custom-column categories ####
|
tcategories = {}
|
||||||
|
tids = {}
|
||||||
|
md = []
|
||||||
|
|
||||||
|
# First, build the maps. We need a category->items map and an
|
||||||
|
# item -> (item_id, sort_val) map to use in the books loop
|
||||||
for category in tb_cats.keys():
|
for category in tb_cats.keys():
|
||||||
cat = tb_cats[category]
|
cat = tb_cats[category]
|
||||||
if not cat['is_category'] or cat['kind'] in ['user', 'search']:
|
if not cat['is_category'] or cat['kind'] in ['user', 'search'] \
|
||||||
|
or category in ['news', 'formats']:
|
||||||
continue
|
continue
|
||||||
tn = cat['table']
|
# Get the ids for the item values
|
||||||
categories[category] = [] #reserve the position in the ordered list
|
if not cat['is_custom']:
|
||||||
if tn is None: # Nothing to do for the moment
|
funcs = {
|
||||||
|
'authors' : self.get_authors_with_ids,
|
||||||
|
'series' : self.get_series_with_ids,
|
||||||
|
'publisher': self.get_publishers_with_ids,
|
||||||
|
'tags' : self.get_tags_with_ids,
|
||||||
|
'rating' : self.get_ratings_with_ids,
|
||||||
|
}
|
||||||
|
func = funcs.get(category, None)
|
||||||
|
if func:
|
||||||
|
list = func()
|
||||||
|
else:
|
||||||
|
raise ValueError(category + ' has no get with ids function')
|
||||||
|
else:
|
||||||
|
list = self.get_custom_items_with_ids(label=cat['label'])
|
||||||
|
tids[category] = {}
|
||||||
|
if category == 'authors':
|
||||||
|
for l in list:
|
||||||
|
(id, val, sort_val) = (l[0], l[1], l[2])
|
||||||
|
tids[category][val] = (id, sort_val)
|
||||||
|
else:
|
||||||
|
for l in list:
|
||||||
|
(id, val) = (l[0], l[1])
|
||||||
|
tids[category][val] = (id, val)
|
||||||
|
# add an empty category to the category map
|
||||||
|
tcategories[category] = {}
|
||||||
|
# create a list of category/field_index for the books scan to use.
|
||||||
|
# This saves iterating through field_metadata for each book
|
||||||
|
md.append((category, cat['rec_index'], cat['is_multiple']))
|
||||||
|
|
||||||
|
print 'end phase "collection":', time.time() - start, 'seconds'
|
||||||
|
|
||||||
|
# Now scan every book looking for category items.
|
||||||
|
# Code below is duplicated because it shaves off 10% of the loop time
|
||||||
|
id_dex = self.FIELD_MAP['id']
|
||||||
|
rating_dex = self.FIELD_MAP['rating']
|
||||||
|
for book in self.data.iterall():
|
||||||
|
if id_filter and book[id_dex] not in id_filter:
|
||||||
continue
|
continue
|
||||||
cn = cat['column']
|
rating = book[rating_dex]
|
||||||
if ids is None:
|
# We kept track of all possible category field_map positions above
|
||||||
query = '''SELECT id, {0}, count, avg_rating, sort
|
for (cat, dex, mult) in md:
|
||||||
FROM tag_browser_{1}'''.format(cn, tn)
|
if book[dex] is None:
|
||||||
else:
|
continue
|
||||||
query = '''SELECT id, {0}, count, avg_rating, sort
|
if not mult:
|
||||||
FROM tag_browser_filtered_{1}'''.format(cn, tn)
|
val = book[dex]
|
||||||
if sort == 'popularity':
|
try:
|
||||||
query += ' ORDER BY count DESC, sort ASC'
|
(item_id, sort_val) = tids[cat][val] # let exceptions fly
|
||||||
elif sort == 'name':
|
item = tcategories[cat].get(val, None)
|
||||||
query += ' ORDER BY sort COLLATE icucollate'
|
if not item:
|
||||||
else:
|
item = LibraryDatabase2.TCat_Tag(val, sort_val)
|
||||||
query += ' ORDER BY avg_rating DESC, sort ASC'
|
tcategories[cat][val] = item
|
||||||
data = self.conn.get(query)
|
item.c += 1
|
||||||
|
item.id = item_id
|
||||||
|
if rating > 0:
|
||||||
|
item.rt += rating
|
||||||
|
item.rc += 1
|
||||||
|
except:
|
||||||
|
prints('get_categories: item', val, 'is not in', cat, 'list!')
|
||||||
|
else:
|
||||||
|
vals = book[dex].split(mult)
|
||||||
|
for val in vals:
|
||||||
|
try:
|
||||||
|
(item_id, sort_val) = tids[cat][val] # let exceptions fly
|
||||||
|
item = tcategories[cat].get(val, None)
|
||||||
|
if not item:
|
||||||
|
item = LibraryDatabase2.TCat_Tag(val, sort_val)
|
||||||
|
tcategories[cat][val] = item
|
||||||
|
item.c += 1
|
||||||
|
item.id = item_id
|
||||||
|
if rating > 0:
|
||||||
|
item.rt += rating
|
||||||
|
item.rc += 1
|
||||||
|
except:
|
||||||
|
prints('get_categories: item', val, 'is not in', cat, 'list!')
|
||||||
|
|
||||||
|
print 'end phase "books":', time.time() - start, 'seconds'
|
||||||
|
|
||||||
|
# Now do news
|
||||||
|
tcategories['news'] = {}
|
||||||
|
cat = tb_cats['news']
|
||||||
|
tn = cat['table']
|
||||||
|
cn = cat['column']
|
||||||
|
if ids is None:
|
||||||
|
query = '''SELECT id, {0}, count, avg_rating, sort
|
||||||
|
FROM tag_browser_{1}'''.format(cn, tn)
|
||||||
|
else:
|
||||||
|
query = '''SELECT id, {0}, count, avg_rating, sort
|
||||||
|
FROM tag_browser_filtered_{1}'''.format(cn, tn)
|
||||||
|
# results will be sorted later
|
||||||
|
data = self.conn.get(query)
|
||||||
|
for r in data:
|
||||||
|
item = LibraryDatabase2.TCat_Tag(r[1], r[1])
|
||||||
|
item.set_all(c=r[2], rt=r[2]*r[3], rc=r[2], id=r[0])
|
||||||
|
tcategories['news'][r[1]] = item
|
||||||
|
|
||||||
|
print 'end phase "news":', time.time() - start, 'seconds'
|
||||||
|
|
||||||
|
# Build the real category list by iterating over the temporary copy
|
||||||
|
# and building the Tag instances.
|
||||||
|
categories = {}
|
||||||
|
for category in tb_cats.keys():
|
||||||
|
if category not in tcategories:
|
||||||
|
continue
|
||||||
|
cat = tb_cats[category]
|
||||||
|
|
||||||
|
# prepare the place where we will put the array of Tags
|
||||||
|
categories[category] = []
|
||||||
|
|
||||||
# icon_map is not None if get_categories is to store an icon and
|
# icon_map is not None if get_categories is to store an icon and
|
||||||
# possibly a tooltip in the tag structure.
|
# possibly a tooltip in the tag structure.
|
||||||
icon, tooltip = None, ''
|
icon = None
|
||||||
|
tooltip = ''
|
||||||
label = tb_cats.key_to_label(category)
|
label = tb_cats.key_to_label(category)
|
||||||
if icon_map:
|
if icon_map:
|
||||||
if not tb_cats.is_custom_field(category):
|
if not tb_cats.is_custom_field(category):
|
||||||
@ -1087,23 +1214,40 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
tooltip = self.custom_column_label_map[label]['name']
|
tooltip = self.custom_column_label_map[label]['name']
|
||||||
|
|
||||||
datatype = cat['datatype']
|
datatype = cat['datatype']
|
||||||
avgr = itemgetter(3)
|
avgr = lambda x: 0.0 if x.rc == 0 else x.rt/x.rc
|
||||||
item_not_zero_func = lambda x: x[2] > 0
|
# Duplicate the build of items below to avoid using a lambda func
|
||||||
|
# in the main Tag loop. Saves a few %
|
||||||
if datatype == 'rating':
|
if datatype == 'rating':
|
||||||
# eliminate the zero ratings line as well as count == 0
|
|
||||||
item_not_zero_func = (lambda x: x[1] > 0 and x[2] > 0)
|
|
||||||
formatter = (lambda x:u'\u2605'*int(x/2))
|
formatter = (lambda x:u'\u2605'*int(x/2))
|
||||||
avgr = itemgetter(1)
|
avgr = lambda x : x.n
|
||||||
|
# eliminate the zero ratings line as well as count == 0
|
||||||
|
items = [v for v in tcategories[category].values() if v.c > 0 and v.n != 0]
|
||||||
elif category == 'authors':
|
elif category == 'authors':
|
||||||
# Clean up the authors strings to human-readable form
|
# Clean up the authors strings to human-readable form
|
||||||
formatter = (lambda x: x.replace('|', ','))
|
formatter = (lambda x: x.replace('|', ','))
|
||||||
|
items = [v for v in tcategories[category].values() if v.c > 0]
|
||||||
else:
|
else:
|
||||||
formatter = (lambda x:unicode(x))
|
formatter = (lambda x:unicode(x))
|
||||||
|
items = [v for v in tcategories[category].values() if v.c > 0]
|
||||||
|
|
||||||
categories[category] = [Tag(formatter(r[1]), count=r[2], id=r[0],
|
# sort the list
|
||||||
avg=avgr(r), sort=r[4], icon=icon,
|
if sort == 'name':
|
||||||
|
kf = lambda x: sort_key(x.s) if isinstance(x.s, unicode) else x.s
|
||||||
|
reverse=False
|
||||||
|
elif sort == 'popularity':
|
||||||
|
kf = lambda x: x.c
|
||||||
|
reverse=True
|
||||||
|
else:
|
||||||
|
kf = avgr
|
||||||
|
reverse=True
|
||||||
|
items.sort(key=kf, reverse=reverse)
|
||||||
|
|
||||||
|
categories[category] = [Tag(formatter(r.n), count=r.c, id=r.id,
|
||||||
|
avg=avgr(r), sort=r.s, icon=icon,
|
||||||
tooltip=tooltip, category=category)
|
tooltip=tooltip, category=category)
|
||||||
for r in data if item_not_zero_func(r)]
|
for r in items]
|
||||||
|
|
||||||
|
print 'end phase "tags list":', time.time() - start, 'seconds'
|
||||||
|
|
||||||
# Needed for legacy databases that have multiple ratings that
|
# Needed for legacy databases that have multiple ratings that
|
||||||
# map to n stars
|
# map to n stars
|
||||||
@ -1189,8 +1333,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
icon_map['search'] = icon_map['search']
|
icon_map['search'] = icon_map['search']
|
||||||
categories['search'] = items
|
categories['search'] = items
|
||||||
|
|
||||||
|
t = time.time() - start
|
||||||
|
print 'get_categories ran in:', t, 'seconds'
|
||||||
|
|
||||||
return categories
|
return categories
|
||||||
|
|
||||||
|
############# End get_categories
|
||||||
|
|
||||||
def tags_older_than(self, tag, delta):
|
def tags_older_than(self, tag, delta):
|
||||||
tag = tag.lower().strip()
|
tag = tag.lower().strip()
|
||||||
now = nowf()
|
now = nowf()
|
||||||
@ -1486,6 +1635,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
# Note: we generally do not need to refresh_ids because library_view will
|
# Note: we generally do not need to refresh_ids because library_view will
|
||||||
# refresh everything.
|
# refresh everything.
|
||||||
|
|
||||||
|
def get_ratings_with_ids(self):
|
||||||
|
result = self.conn.get('SELECT id,rating FROM ratings')
|
||||||
|
if not result:
|
||||||
|
return []
|
||||||
|
return result
|
||||||
|
|
||||||
def dirty_books_referencing(self, field, id, commit=True):
|
def dirty_books_referencing(self, field, id, commit=True):
|
||||||
# Get the list of books to dirty -- all books that reference the item
|
# Get the list of books to dirty -- all books that reference the item
|
||||||
table = self.field_metadata[field]['table']
|
table = self.field_metadata[field]['table']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user