Start work on get_categories() for the new db backend

This commit is contained in:
Kovid Goyal 2013-01-24 14:41:03 +05:30
parent dd001ca8c1
commit 68f58f2c30
5 changed files with 191 additions and 70 deletions

View File

@ -293,13 +293,13 @@ class Cache(object):
Return all the books associated with the item identified by
``item_id``, where the item belongs to the field ``name``.
Returned value is a tuple of book ids, or the empty tuple if the item
Returned value is a set of book ids, or the empty set if the item
or the field does not exist.
'''
try:
return self.fields[name].books_for(item_id)
except (KeyError, IndexError):
return ()
return set()
@read_api
def all_book_ids(self, type=frozenset):

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from functools import partial
from calibre.library.field_metadata import TagsIcons
from calibre.utils.config_base import tweaks
CATEGORY_SORTS = { 'name', 'popularity', 'rating' }
class Tag(object):
def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
tooltip=None, icon=None, category=None, id_set=None,
is_editable=True, is_searchable=True, use_sort_as_name=False):
self.name = self.original_name = name
self.id = id
self.count = count
self.state = state
self.is_hierarchical = ''
self.is_editable = is_editable
self.is_searchable = is_searchable
self.id_set = id_set if id_set is not None else set([])
self.avg_rating = avg/2.0 if avg is not None else 0
self.sort = sort
self.use_sort_as_name = use_sort_as_name
if self.avg_rating > 0:
if tooltip:
tooltip = tooltip + ': '
tooltip = _('%(tt)sAverage rating is %(rating)3.1f')%dict(
tt=tooltip, rating=self.avg_rating)
self.tooltip = tooltip
self.icon = icon
self.category = category
def __unicode__(self):
return u'%s:%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state,
self.category, self.tooltip)
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return str(self)
def find_categories(field_metadata):
for category, cat in field_metadata.iteritems():
if (cat['is_category'] and cat['kind'] not in { 'user', 'search' } and
category not in {'news', 'formats'} and not cat.get('is_csp', False)):
yield (category, cat['is_multiple'].get('cache_to_list', None), False)
elif (cat['datatype'] == 'composite' and
cat['display'].get('make_category', False)):
yield (category, cat['is_multiple'].get('cache_to_list', None), True)
def create_tag_class(category, fm, icon_map):
cat = fm[category]
icon = None
tooltip = '(' + category + ')'
label = fm.key_to_label(category)
if icon_map:
if not fm.is_custom_field(category):
if category in icon_map:
icon = icon_map[label]
else:
icon = icon_map['custom:']
icon_map[category] = icon
is_editable = category not in { 'news', 'rating', 'languages' }
if (tweaks['categories_use_field_for_author_name'] == 'author_sort' and
(category == 'authors' or
(cat['display'].get('is_names', False) and
cat['is_custom'] and cat['is_multiple'] and
cat['datatype'] == 'text'))):
use_sort_as_name = True
else:
use_sort_as_name = False
return partial(Tag, use_sort_as_name=use_sort_as_name, icon=icon,
tooltip=tooltip, is_editable=is_editable,
category=category)
def get_categories(dbcache, sort='name', book_ids=None, icon_map=None):
if icon_map is not None and type(icon_map) != TagsIcons:
raise TypeError('icon_map passed to get_categories must be of type TagIcons')
if sort not in CATEGORY_SORTS:
raise ValueError('sort ' + sort + ' not a valid value')
fm = dbcache.field_metadata
book_rating_map = dbcache.fields['rating'].book_value_map
categories = {}
book_ids = frozenset(book_ids)
for category, is_multiple, is_composite in find_categories(fm):
tag_class = create_tag_class(category, fm, icon_map)
categories[category] = dbcache.fields[category].get_categories(
tag_class, book_rating_map, sort, dbcache.fields['language'], book_ids)

View File

@ -9,7 +9,8 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from threading import Lock
from collections import defaultdict
from collections import defaultdict, Counter
from operator import attrgetter
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
from calibre.ebooks.metadata import title_sort
@ -24,22 +25,28 @@ class Field(object):
def __init__(self, name, table):
self.name, self.table = name, table
self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
'series', 'enumeration')
self.table_type = self.table.table_type
dt = self.metadata['datatype']
self.has_text_data = dt in {'text', 'comments', 'series', 'enumeration'}
self.table_type = self.table.table_type
self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else lambda x: x)
self._default_sort_key = ''
if self.metadata['datatype'] in ('int', 'float', 'rating'):
if dt in { 'int', 'float', 'rating' }:
self._default_sort_key = 0
elif self.metadata['datatype'] == 'bool':
elif dt == 'bool':
self._default_sort_key = None
elif self.metadata['datatype'] == 'datetime':
elif dt == 'datetime':
self._default_sort_key = UNDEFINED_DATE
if self.name == 'languages':
self._sort_key = lambda x:sort_key(calibre_langcode_to_name(x))
self.is_multiple = (bool(self.metadata['is_multiple']) or self.name ==
'formats')
self.category_formatter = type(u'')
self.category_sort_reverse = False
if dt == 'rating':
self.category_formatter = lambda x:'\u2605'*int(x/2)
self.category_sort_reverse = True
elif name == 'languages':
self.category_formatter = calibre_langcode_to_name
@property
def metadata(self):
@ -63,7 +70,7 @@ class Field(object):
def books_for(self, item_id):
'''
Return the ids of all books associated with the item identified by
item_id as a tuple. An empty tuple is returned if no books are found.
item_id as a set. An empty set is returned if no books are found.
'''
raise NotImplementedError()
@ -94,6 +101,34 @@ class Field(object):
'''
raise NotImplementedError()
def get_categories(self, tag_class, book_rating_map, sort, lang_field, book_ids=None):
ans = []
if not self.is_many:
return ans
special_sort = hasattr(self, 'category_sort_value')
for item_id, item_book_ids in self.table.col_book_map.iteritems():
if book_ids is not None:
item_book_ids = item_book_ids.intersection(book_ids)
if item_book_ids:
ratings = tuple(r for r in (book_rating_map.get(book_id, 0) for
book_id in item_book_ids) if r > 0)
avg = sum(ratings)/len(ratings)
name = self.category_formatter(self.table.id_map[item_id])
sval = (self.category_sort_value(item_id, item_book_ids, lang_field)
if special_sort else name)
c = tag_class(name, id=item_id, sort=sval, avg=avg,
id_set=item_book_ids, count=len(item_book_ids))
ans.append(c)
if sort == 'popularity':
key=attrgetter('count')
elif sort == 'rating':
key=attrgetter('avg_rating')
else:
key=lambda x:sort_key(x.sort or x.name)
ans.sort(key=key, reverse=self.category_sort_reverse)
return ans
class OneToOneField(Field):
def for_book(self, book_id, default_value=None):
@ -103,7 +138,7 @@ class OneToOneField(Field):
return (book_id,)
def books_for(self, item_id):
return (item_id,)
return {item_id}
def __iter__(self):
return self.table.book_col_map.iterkeys()
@ -223,7 +258,7 @@ class ManyToOneField(Field):
return (id_,)
def books_for(self, item_id):
return self.table.col_book_map.get(item_id, ())
return self.table.col_book_map.get(item_id, set())
def __iter__(self):
return self.table.id_map.iterkeys()
@ -238,11 +273,17 @@ class ManyToOneField(Field):
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.col_book_map
empty = set()
for item_id, val in self.table.id_map.iteritems():
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
book_ids = cbm.get(item_id, empty).intersection(candidates)
if book_ids:
yield val, book_ids
@property
def book_value_map(self):
return {book_id:self.table.id_map[item_id] for book_id, item_id in
self.book_col_map.iteritems()}
class ManyToManyField(Field):
is_many = True
@ -263,7 +304,7 @@ class ManyToManyField(Field):
return self.table.book_col_map.get(book_id, ())
def books_for(self, item_id):
return self.table.col_book_map.get(item_id, ())
return self.table.col_book_map.get(item_id, set())
def __iter__(self):
return self.table.id_map.iterkeys()
@ -282,8 +323,9 @@ class ManyToManyField(Field):
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.col_book_map
empty = set()
for item_id, val in self.table.id_map.iteritems():
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
book_ids = cbm.get(item_id, empty).intersection(candidates)
if book_ids:
yield val, book_ids
@ -327,6 +369,9 @@ class AuthorsField(ManyToManyField):
'link' : self.table.alink_map[author_id],
}
def category_sort_value(self, item_id, book_ids, language_field):
return self.table.asort_map[item_id]
class FormatsField(ManyToManyField):
def for_book(self, book_id, default_value=None):
@ -361,6 +406,23 @@ class SeriesField(ManyToOneField):
return {book_id:self.sort_key_for_series(book_id, get_lang, sso) for book_id
in all_book_ids}
def category_sort_value(self, item_id, book_ids, language_field):
lang = None
tss = tweaks['title_series_sorting']
if tss != 'strictly_alphabetic':
lang_map = language_field.book_col_map
c = Counter()
for book_id in book_ids:
l = lang_map.get(book_id, None)
if l:
c[l[0]] += 1
if c:
lang = c.most_common(1)[0][0]
val = self.table.id_map[item_id]
return title_sort(val, order=tss, lang=lang)
def create_field(name, table):
cls = {
ONE_ONE : OneToOneField,

View File

@ -132,13 +132,10 @@ class ManyToOneTable(Table):
'SELECT book, {0} FROM {1}'.format(
self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = []
self.col_book_map[row[1]].append(row[0])
self.col_book_map[row[1]] = set()
self.col_book_map[row[1]].add(row[0])
self.book_col_map[row[0]] = row[1]
for key in tuple(self.col_book_map.iterkeys()):
self.col_book_map[key] = tuple(self.col_book_map[key])
class ManyToManyTable(ManyToOneTable):
'''
@ -154,15 +151,12 @@ class ManyToManyTable(ManyToOneTable):
for row in db.conn.execute(
self.selectq.format(self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = []
self.col_book_map[row[1]].append(row[0])
self.col_book_map[row[1]] = set()
self.col_book_map[row[1]].add(row[0])
if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append(row[1])
for key in tuple(self.col_book_map.iterkeys()):
self.col_book_map[key] = tuple(self.col_book_map[key])
for key in tuple(self.book_col_map.iterkeys()):
self.book_col_map[key] = tuple(self.book_col_map[key])
@ -191,8 +185,8 @@ class FormatsTable(ManyToManyTable):
if row[1] is not None:
fmt = row[1].upper()
if fmt not in self.col_book_map:
self.col_book_map[fmt] = []
self.col_book_map[fmt].append(row[0])
self.col_book_map[fmt] = set()
self.col_book_map[fmt].add(row[0])
if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append(fmt)
@ -200,9 +194,6 @@ class FormatsTable(ManyToManyTable):
self.fname_map[row[0]] = {}
self.fname_map[row[0]][fmt] = row[2]
for key in tuple(self.col_book_map.iterkeys()):
self.col_book_map[key] = tuple(self.col_book_map[key])
for key in tuple(self.book_col_map.iterkeys()):
self.book_col_map[key] = tuple(sorted(self.book_col_map[key]))
@ -215,15 +206,12 @@ class IdentifiersTable(ManyToManyTable):
for row in db.conn.execute('SELECT book, type, val FROM identifiers'):
if row[1] is not None and row[2] is not None:
if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = []
self.col_book_map[row[1]].append(row[0])
self.col_book_map[row[1]] = set()
self.col_book_map[row[1]].add(row[0])
if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = {}
self.book_col_map[row[0]][row[1]] = row[2]
for key in tuple(self.col_book_map.iterkeys()):
self.col_book_map[key] = tuple(self.col_book_map[key])
class LanguagesTable(ManyToManyTable):
def read_id_maps(self, db):

View File

@ -44,47 +44,13 @@ from calibre.utils.recycle_bin import delete_file, delete_tree
from calibre.utils.formatter_functions import load_user_template_functions
from calibre.db.errors import NoSuchFormat
from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.db.categories import Tag
from calibre.utils.localization import (canonicalize_lang,
calibre_langcode_to_name)
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
SPOOL_SIZE = 30*1024*1024
class Tag(object):
def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
tooltip=None, icon=None, category=None, id_set=None,
is_editable = True, is_searchable=True, use_sort_as_name=False):
self.name = self.original_name = name
self.id = id
self.count = count
self.state = state
self.is_hierarchical = ''
self.is_editable = is_editable
self.is_searchable = is_searchable
self.id_set = id_set if id_set is not None else set([])
self.avg_rating = avg/2.0 if avg is not None else 0
self.sort = sort
self.use_sort_as_name = use_sort_as_name
if self.avg_rating > 0:
if tooltip:
tooltip = tooltip + ': '
tooltip = _('%(tt)sAverage rating is %(rating)3.1f')%dict(
tt=tooltip, rating=self.avg_rating)
self.tooltip = tooltip
self.icon = icon
self.category = category
def __unicode__(self):
return u'%s:%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state,
self.category, self.tooltip)
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return str(self)
class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
An ebook metadata database that stores references to ebook files on disk.