From 614c8545f4f08d87249f62f35abc218e73abc451 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 1 Apr 2010 15:09:09 +0530 Subject: [PATCH] Fix #5158 (Large library performance problem) --- src/calibre/library/database2.py | 101 +++++++++++++++++++------------ 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 750e600cee..2851dba1d4 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -415,6 +415,15 @@ class Tag(object): self.count = count self.state = state + def __unicode__(self): + return u'%s:%s:%s:%s'%(self.name, self.count, self.id, self.state) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + return str(self) + class LibraryDatabase2(LibraryDatabase): ''' @@ -712,6 +721,24 @@ class LibraryDatabase2(LibraryDatabase): END TRANSACTION; ''') + def upgrade_version_8(self): + 'Add Tag Browser views' + def create_tag_browser_view(table_name, column_name): + self.conn.executescript(''' + DROP VIEW IF EXISTS tag_browser_{tn}; + CREATE VIEW tag_browser_{tn} AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count + FROM {tn}; + '''.format(tn=table_name, cn=column_name)) + + for tn in ('authors', 'tags', 'publishers', 'series'): + cn = tn[:-1] + if tn == 'series': + cn = tn + create_tag_browser_view(tn, cn) + def last_modified(self): ''' Return last modified time as a UTC datetime object''' @@ -1083,50 +1110,44 @@ class LibraryDatabase2(LibraryDatabase): return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False) def get_categories(self, sort_on_count=False): - categories = {} - def get(name, category, field='name'): - ans = self.conn.get('SELECT DISTINCT %s FROM %s'%(field, name)) - ans = [x[0].strip() for x in ans] - try: - ans.remove('') - except ValueError: pass - categories[category] = list(map(Tag, ans)) - tags = categories[category] - if name != 'data': - for tag in tags: - id = self.conn.get('SELECT id FROM %s WHERE %s=?'%(name, - field), (tag.name,), all=False) - tag.id = id - for tag in tags: - if tag.id is not None: - tag.count = self.conn.get('SELECT COUNT(id) FROM books_%s_link WHERE %s=?'%(name, category), (tag.id,), all=False) - else: - for tag in tags: - tag.count = self.conn.get('SELECT COUNT(format) FROM data WHERE format=?', - (tag.name,), all=False) - tags.sort(reverse=sort_on_count, cmp=(lambda - x,y:cmp(x.count,y.count)) if sort_on_count else (lambda - x,y:cmp(x.name, y.name))) - for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'), - ('series', 'series')): - get(*x) - get('data', 'format', 'format') + self.conn.executescript(u''' + CREATE TEMP VIEW IF NOT EXISTS tag_browser_news AS SELECT DISTINCT + id, + name, + (SELECT COUNT(id) FROM books_tags_link WHERE tag=x.id) count + FROM tags as x WHERE name!="{0}" AND id IN + (SELECT DISTINCT tag FROM books_tags_link WHERE book IN + (SELECT DISTINCT book FROM books_tags_link WHERE tag IN + (SELECT id FROM tags WHERE name="{0}"))); + '''.format(_('News'))) + self.conn.commit() - categories['news'] = [] - newspapers = self.conn.get('SELECT name FROM tags WHERE id IN (SELECT DISTINCT tag FROM books_tags_link WHERE book IN (select book from books_tags_link where tag IN (SELECT id FROM tags WHERE name=?)))', (_('News'),)) - if newspapers: - newspapers = [f[0] for f in newspapers] - try: - newspapers.remove(_('News')) - except ValueError: - pass - categories['news'] = list(map(Tag, newspapers)) - for tag in categories['news']: - tag.count = self.conn.get('SELECT COUNT(id) FROM books_tags_link WHERE tag IN (SELECT DISTINCT id FROM tags WHERE name=?)', (tag.name,), all=False) + categories = {} + for x in ('tags', 'series', 'news', 'publishers', 'authors'): + query = 'SELECT id,name,count FROM tag_browser_'+x + if sort_on_count: + query += ' ORDER BY count DESC' + else: + query += ' ORDER BY name ASC' + data = self.conn.get(query) + category = x if x in ('series', 'news') else x[:-1] + categories[category] = [Tag(r[1], count=r[2], id=r[0]) for r in data] + + categories['format'] = [] + for fmt in self.conn.get('SELECT DISTINCT format FROM data'): + fmt = fmt[0] + count = self.conn.get('SELECT COUNT(id) FROM data WHERE format="%s"'%fmt, + all=False) + categories['format'].append(Tag(fmt, count=count)) + + if sort_on_count: + categories['format'].sort(cmp=lambda x,y:cmp(x.count, y.count), + reverse=True) + else: + categories['format'].sort(cmp=lambda x,y:cmp(x.name, y.name)) return categories - def tags_older_than(self, tag, delta): tag = tag.lower().strip() now = nowf()