diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index dd7f3f7275..d515235e73 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -175,8 +175,7 @@ class BooksModel(QAbstractTableModel): self.endInsertRows() def search(self, text, refinement, reset=True): - tokens, OR = self.search_tokens(text) - self.db.filter(tokens, refilter=refinement, OR=OR) + self.db.search(text) self.last_search = text if reset: self.clear_caches() diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 67f677e68a..2f422ab1c4 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -9,6 +9,7 @@ Command line interface to the calibre database. import sys, os, cStringIO from textwrap import TextWrapper +from urllib import quote from calibre import terminal_controller, preferred_encoding from calibre.utils.config import OptionParser, prefs @@ -18,7 +19,6 @@ except: send_message = None from calibre.ebooks.metadata.meta import get_metadata from calibre.library.database2 import LibraryDatabase2 -from calibre.library.database import text_to_tokens from calibre.ebooks.metadata.opf import OPFCreator, OPFReader from calibre.utils.genshi.template import MarkupTemplate @@ -67,6 +67,8 @@ STANZA_TEMPLATE='''\ calibre http://calibre.kovidgoyal.net + $id + ${updated.strftime('%Y-%m-%dT%H:%M:%SZ')} ${subtitle} @@ -75,11 +77,11 @@ STANZA_TEMPLATE='''\ ${record['title']} urn:calibre:${record['id']} ${record['authors']} - ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+0000')} - - - -
${record['comments']}
+ ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')} + + + +
${record['comments']}
@@ -97,15 +99,14 @@ def get_db(dbpath, options): if options.library_path is not None: dbpath = options.library_path dbpath = os.path.abspath(dbpath) - print _('Using library at'), dbpath return LibraryDatabase2(dbpath, row_factory=True) def do_list(db, fields, sort_by, ascending, search_text, line_width, separator, prefix, output_format, subtitle='Books in the calibre database'): - db.refresh(sort_by, ascending) + if sort_by: + db.sort(sort_by, ascending) if search_text: - filters, OR = text_to_tokens(search_text) - db.filter(filters, False, OR) + db.search(search_text) authors_to_string = output_format in ['stanza', 'text'] data = db.get_data_as_dict(prefix, authors_as_string=authors_to_string) fields = ['id'] + fields @@ -159,7 +160,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator, elif output_format == 'stanza': data = [i for i in data if i.has_key('fmt_epub')] template = MarkupTemplate(STANZA_TEMPLATE) - return template.generate(data=data, subtitle=subtitle, sep=os.sep).render('xml') + return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle, + sep=os.sep, quote=quote, updated=db.last_modified()).render('xml') @@ -173,7 +175,7 @@ List the books available in the calibre database. )) parser.add_option('-f', '--fields', default='title,authors', help=_('The fields to display when listing books in the database. Should be a comma separated list of fields.\nAvailable fields: %s\nDefault: %%default. The special field "all" can be used to select all fields. Only has effect in the text output format.')%','.join(FIELDS)) - parser.add_option('--sort-by', default='timestamp', + parser.add_option('--sort-by', default=None, help=_('The field by which to sort the results.\nAvailable fields: %s\nDefault: %%default')%','.join(FIELDS)) parser.add_option('--ascending', default=False, action='store_true', help=_('Sort results in ascending order')) @@ -197,7 +199,7 @@ List the books available in the calibre database. return 1 db = get_db(dbpath, opts) - if not opts.sort_by in FIELDS: + if not opts.sort_by in FIELDS and opts.sort_by is not None: parser.print_help() print print >>sys.stderr, _('Invalid sort field. Available fields:'), ','.join(FIELDS) @@ -461,7 +463,7 @@ show_metadata command. def do_export(db, ids, dir, single_dir, by_author): if ids is None: - ids = db.all_ids() + ids = list(db.all_ids()) db.export_to_dir(dir, ids, byauthor=by_author, single_dir=single_dir, index_is_id=True) def command_export(args, dbpath): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f1a61f122d..d02799714f 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -9,6 +9,7 @@ The database used to store ebook metadata import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \ operator, itertools, functools, traceback from itertools import repeat +from datetime import datetime from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock from PyQt4.QtGui import QApplication, QPixmap, QImage @@ -16,6 +17,7 @@ __app = None from calibre.library.database import LibraryDatabase from calibre.library.sqlite import connect, IntegrityError +from calibre.utils.search_query_parser import SearchQueryParser from calibre.ebooks.metadata import string_to_authors, authors_to_string from calibre.constants import preferred_encoding, iswindows, isosx @@ -48,6 +50,12 @@ def sanitize_file_name(name, substitute='_'): one = _filename_sanitize.sub(substitute, name) return re.sub(r'\s', ' ', one).strip() +FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, + 'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10, + 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14} +INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys())) + + class CoverCache(QThread): def __init__(self, library_path, parent=None): @@ -158,27 +166,15 @@ class CoverCache(QThread): self.load_queue.appendleft(id) self.load_queue_lock.unlock() -class ResultCache(object): +class ResultCache(SearchQueryParser): ''' Stores sorted and filtered metadata in memory. ''' - METHOD_MAP = { - 'title' : 'title', - 'authors' : 'author_sort', - 'author' : 'author_sort', - 'publisher' : 'publisher', - 'size' : 'size', - 'date' : 'timestamp', - 'timestamp' : 'timestamp', - 'rating' : 'rating', - 'tags' : 'tags', - 'series' : 'series', - } - def __init__(self): self._map = self._map_filtered = self._data = [] + SearchQueryParser.__init__(self) def __getitem__(self, row): return self._data[self._map_filtered[row]] @@ -189,6 +185,31 @@ class ResultCache(object): def __iter__(self): for id in self._map_filtered: yield self._data[id] + + def universal_set(self): + return set([i[0] for i in self._data if i is not None]) + + def get_matches(self, location, query): + matches = set([]) + if query and query.strip(): + location = location.lower().strip() + query = query.lower() + if location in ('tag', 'author', 'format'): + location += 's' + all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats') + MAP = {} + for x in all: + MAP[x] = FIELD_MAP[x] + location = [location] if location != 'all' else list(MAP.keys()) + for i, loc in enumerate(location): + location[i] = MAP[loc] + for item in self._data: + if item is None: continue + for loc in location: + if item[loc] and query in item[loc].lower(): + matches.add(item[0]) + break + return matches def remove(self, id): self._data[id] = None @@ -222,103 +243,49 @@ class ResultCache(object): self._map[0:0] = ids self._map_filtered[0:0] = ids - def refresh(self, db, field, ascending): - field = field.lower() - method = getattr(self, 'sort_on_' + self.METHOD_MAP[field]) - # Fast mapping from sorted row numbers to ids - self._map = map(operator.itemgetter(0), method('ASC' if ascending else 'DESC', db)) # Preserves sort order - # Fast mapping from sorted, filtered row numbers to ids - # At the moment it is the same as self._map - self._map_filtered = list(self._map) + def refresh(self, db, field=None, ascending=True): temp = db.conn.get('SELECT * FROM meta') - # Fast mapping from ids to data. - # Can be None for ids that dont exist (i.e. have been deleted) self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else [] for r in temp: self._data[r[0]] = r + self._map = [i[0] for i in self._data if i is not None] + if field is not None: + self.sort(field, ascending) + self._map_filtered = list(self._map) - def filter(self, filters, refilter=False, OR=False): - ''' - Filter data based on filters. All the filters must match for an item to - be accepted. Matching is case independent regexp matching. - @param filters: A list of SearchToken objects - @param refilter: If True filters are applied to the results of the previous - filtering. - @param OR: If True, keeps a match if any one of the filters matches. If False, - keeps a match only if all the filters match - ''' - if not refilter: - self._map_filtered = list(self._map) - if filters: - remove = [] - for id in self._map_filtered: - if OR: - keep = False - for token in filters: - if token.match(self._data[id]): - keep = True - break - if not keep: - remove.append(id) - else: - for token in filters: - if not token.match(self._data[id]): - remove.append(id) - break - for id in remove: - self._map_filtered.remove(id) + def seriescmp(self, x, y): + ans = self.strcmp(self._data[x][9], self._data[y][9]) + if ans != 0: return ans + return cmp(self._data[x][10], self._data[y][10]) - def sort_on_title(self, order, db): - return db.conn.get('SELECT id FROM books ORDER BY sort ' + order) + def cmp(self, loc, x, y, str=True): + ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\ + cmp(self._data[x][loc], self._data[y][loc]) + if ans != 0: return ans + return cmp(self._data[x][11].lower(), self._data[y][11].lower()) - def sort_on_author_sort(self, order, db): - return db.conn.get('SELECT id FROM books ORDER BY author_sort,sort ' + order) + def sort(self, field, ascending): + import time + start = time.time() + field = field.lower().strip() + if field in ('author', 'tag', 'comment'): + field += 's' + if field == 'date': field = 'timestamp' + elif field == 'title': field = 'sort' + elif field == 'author': field = 'author_sort' + fcmp = self.seriescmp if field == 'series' else \ + functools.partial(self.cmp, FIELD_MAP[field], field not in ('size', 'rating', 'timestamp')) + self._map.sort(cmp=fcmp, reverse=not ascending) + print time.time() - start + + def search(self, query): + if not query or not query.strip(): + self._map_filtered = list(self._map) + return + matches = sorted(self.parse(query)) + self._map_filtered = [id for id in self._map if id in matches] - def sort_on_timestamp(self, order, db): - return db.conn.get('SELECT id FROM books ORDER BY id ' + order) - def sort_on_publisher(self, order, db): - no_publisher = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_publishers_link) ORDER BY books.sort') - ans = [] - for r in db.conn.get('SELECT id FROM publishers ORDER BY name '+order): - publishers_id = r[0] - ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_publishers_link WHERE publisher=?) ORDER BY books.sort '+order, (publishers_id,)) - ans = (no_publisher + ans) if order == 'ASC' else (ans + no_publisher) - return ans - - - def sort_on_size(self, order, db): - return db.conn.get('SELECT id FROM meta ORDER BY size ' + order) - - def sort_on_rating(self, order, db): - no_rating = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_ratings_link) ORDER BY books.sort') - ans = [] - for r in db.conn.get('SELECT id FROM ratings ORDER BY rating '+order): - ratings_id = r[0] - ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_ratings_link WHERE rating=?) ORDER BY books.sort', (ratings_id,)) - ans = (no_rating + ans) if order == 'ASC' else (ans + no_rating) - return ans - - - def sort_on_series(self, order, db): - no_series = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_series_link) ORDER BY books.sort') - ans = [] - for r in db.conn.get('SELECT id FROM series ORDER BY name '+order): - series_id = r[0] - ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_series_link WHERE series=?) ORDER BY books.series_index,books.id '+order, (series_id,)) - ans = (no_series + ans) if order == 'ASC' else (ans + no_series) - return ans - - - def sort_on_tags(self, order, db): - no_tags = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_tags_link) ORDER BY books.sort') - ans = [] - for r in db.conn.get('SELECT id FROM tags ORDER BY name '+order): - tag_id = r[0] - ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_tags_link WHERE tag=?) ORDER BY books.sort '+order, (tag_id,)) - ans = (no_tags + ans) if order == 'ASC' else (ans + no_tags) - return ans - class Tag(unicode): def __init__(self, name): @@ -381,8 +348,10 @@ class LibraryDatabase2(LibraryDatabase): self.user_version += 1 self.data = ResultCache() - self.filter = self.data.filter + self.data.refresh() + self.search = self.data.search self.refresh = functools.partial(self.data.refresh, self) + self.sort = functools.partial(self.data.refresh, self) self.index = self.data.index self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn) self.row = self.data.row @@ -423,6 +392,10 @@ class LibraryDatabase2(LibraryDatabase): self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag')) self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series')) + def last_modified(self): + ''' Return last modified time as a UTC datetime object''' + return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime) + def path(self, index, index_is_id=False): 'Return the relative path to the directory containing this books files as a unicode string.' id = index if index_is_id else self.id(index) @@ -993,12 +966,18 @@ class LibraryDatabase2(LibraryDatabase): def __iter__(self): - if len(self.data) == 0: + if len(self.data._data) == 0: self.refresh('timestamp', True) - for record in self.data: + for record in self.data._data: if record is not None: yield record - + + def all_ids(self): + for i in iter(self): + yield i['id'] + + def count(self): + return len(self.data._map) def get_data_as_dict(self, prefix=None, authors_as_string=False): ''' @@ -1012,7 +991,10 @@ class LibraryDatabase2(LibraryDatabase): prefix = self.library_path FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn']) data = [] - for record in iter(self): + if len(self.data) == 0: + self.refresh(None, True) + for record in self.data: + if record is None: continue x = {} for field in FIELDS: x[field] = record[field] diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index f1761e4ce0..f4a793bdea 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en' HTTP server for remote access to the calibre database. ''' -import sys, textwrap, cStringIO, mimetypes +import sys, textwrap, cStringIO, mimetypes, operator, os +from datetime import datetime import cherrypy from PIL import Image @@ -36,21 +37,22 @@ class LibraryServer(object): author_sort="${r[12]}" authors="${authors}" rating="${r[4]}" - timestamp="${timestamp.ctime()}" + timestamp="${r[5].ctime()}" size="${r[6]}" isbn="${r[14] if r[14] else ''}" formats="${r[13] if r[13] else ''}" series = "${r[9] if r[9] else ''}" series_index="${r[10]}" tags="${r[7] if r[7] else ''}" - publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''} + publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''} + ''') LIBRARY = MarkupTemplate(textwrap.dedent('''\ - + - ${Markup(book)} + ${Markup(book)} ''')) @@ -60,12 +62,12 @@ class LibraryServer(object): ${record['title']} urn:calibre:${record['id']} ${authors} - ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+0000')} + ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+00:00')} - -
${record['comments']}
+ +
${record['comments']}
''')) @@ -74,6 +76,8 @@ class LibraryServer(object): calibre Library + $id + ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')} calibre http://calibre.kovidgoyal.net @@ -94,6 +98,7 @@ class LibraryServer(object): item break self.opts = opts + cherrypy.config.update({ 'server.socket_port': opts.port, 'server.socket_timeout': opts.timeout, #seconds @@ -106,14 +111,6 @@ class LibraryServer(object): tools.gzip.mime_types = ['text/html', 'text/plain', 'text/xml'] ''')%dict(autoreload=opts.develop) - def to_xml(self): - books = [] - book = MarkupTemplate(self.BOOK) - for record in iter(self.db): - authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')]) - books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8')) - return self.LIBRARY.generate(books=books).render('xml') - def start(self): cherrypy.quickstart(self, config=cStringIO.StringIO(self.config)) @@ -122,6 +119,10 @@ class LibraryServer(object): if cover is None: raise cherrypy.HTTPError(404, 'no cover available for id: %d'%id) cherrypy.response.headers['Content-Type'] = 'image/jpeg' + path = getattr(cover, 'name', None) + if path and os.path.exists(path): + updated = datetime.fromutctimestamp(os.stat(path).st_mtime) + cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) if not thumbnail: return cover.read() try: @@ -139,18 +140,41 @@ class LibraryServer(object): def get_format(self, id, format): format = format.upper() - fmt = self.db.format(id, format, index_is_id=True) + fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') if fmt is None: raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) mt = mimetypes.guess_type('dummy.'+format.lower())[0] if mt is None: mt = 'application/octet-stream' cherrypy.response.headers['Content-Type'] = mt - return fmt + path = getattr(fmt, 'name', None) + if path and os.path.exists(path): + updated = datetime.fromutctimestamp(os.stat(path).st_mtime) + cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) + return fmt.read() + + def sort(self, items, field): + field = field.lower().strip() + if field == 'author': + field = 'authors' + if field not in ('title', 'authors', 'rating'): + raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) + cmpf = cmp if field == 'rating' else lambda x, y: cmp(x.lower(), y.lower()) + field = {'title':11, 'authors':12, 'rating':4}[field] + getter = operator.itemgetter(field) + items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y))) + + def last_modified(self, updated): + lm = updated.strftime('day, %d month %Y %H:%M:%S GMT') + day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'} + lm = lm.replace('day', day[int(lm.strftime('%w'))]) + month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', + 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} + return lm.replace('month', month[updated.month]) + @expose def stanza(self): - cherrypy.response.headers['Content-Type'] = 'text/xml' books = [] for record in iter(self.db): if 'EPUB' in record['formats'].upper(): @@ -160,12 +184,45 @@ class LibraryServer(object): port=self.opts.port, server=self.opts.hostname, ).render('xml').decode('utf8')) - return self.STANZA.generate(subtitle='', data=books).render('xml') + + updated = self.db.last_modified() + cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) + cherrypy.response.headers['Content-Type'] = 'text/xml' + + return self.STANZA.generate(subtitle='', data=books, + updated=updated, id='urn:calibre:main').render('xml') @expose - def library(self): + def library(self, start='0', num='50', sort=None, search=None): + ''' + :param sort: Sort results by ``sort``. Can be one of `title,author,rating`. + :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax + :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results + ''' + try: + start = int(start) + except ValueError: + raise cherrypy.HTTPError(400, 'start: %s is not an integer'%start) + try: + num = int(num) + except ValueError: + raise cherrypy.HTTPError(400, 'num: %s is not an integer'%num) + ids = self.db.data.parse(search) if search else self.db.data.universal_set() + ids = sorted(ids) + items = [r for r in iter(self.db) if r[0] in ids] + if sort is not None: + self.sort(items, sort) + + book, books = MarkupTemplate(self.BOOK), [] + for record in items[start:start+num]: + authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')]) + books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8')) + updated = self.db.last_modified() + cherrypy.response.headers['Content-Type'] = 'text/xml' - return self.to_xml() + cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) + return self.LIBRARY.generate(books=books, start=start, updated=updated, + total=self.db.count()).render('xml') @expose def index(self): @@ -217,4 +274,4 @@ def main(args=sys.argv): return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index 7c5a49c946..05e0ed3e8f 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -122,7 +122,7 @@ class SearchQueryParser(object): self._parser = Or #self._parser.setDebug(True) - self.parse('(tolstoy)') + #self.parse('(tolstoy)') self._parser.setDebug(False) @@ -520,4 +520,4 @@ def main(args=sys.argv): return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main())