@@ -97,15 +99,14 @@ def get_db(dbpath, options):
if options.library_path is not None:
dbpath = options.library_path
dbpath = os.path.abspath(dbpath)
- print _('Using library at'), dbpath
return LibraryDatabase2(dbpath, row_factory=True)
def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
prefix, output_format, subtitle='Books in the calibre database'):
- db.refresh(sort_by, ascending)
+ if sort_by:
+ db.sort(sort_by, ascending)
if search_text:
- filters, OR = text_to_tokens(search_text)
- db.filter(filters, False, OR)
+ db.search(search_text)
authors_to_string = output_format in ['stanza', 'text']
data = db.get_data_as_dict(prefix, authors_as_string=authors_to_string)
fields = ['id'] + fields
@@ -159,7 +160,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
elif output_format == 'stanza':
data = [i for i in data if i.has_key('fmt_epub')]
template = MarkupTemplate(STANZA_TEMPLATE)
- return template.generate(data=data, subtitle=subtitle, sep=os.sep).render('xml')
+ return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
+ sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')
@@ -173,7 +175,7 @@ List the books available in the calibre database.
))
parser.add_option('-f', '--fields', default='title,authors',
help=_('The fields to display when listing books in the database. Should be a comma separated list of fields.\nAvailable fields: %s\nDefault: %%default. The special field "all" can be used to select all fields. Only has effect in the text output format.')%','.join(FIELDS))
- parser.add_option('--sort-by', default='timestamp',
+ parser.add_option('--sort-by', default=None,
help=_('The field by which to sort the results.\nAvailable fields: %s\nDefault: %%default')%','.join(FIELDS))
parser.add_option('--ascending', default=False, action='store_true',
help=_('Sort results in ascending order'))
@@ -197,7 +199,7 @@ List the books available in the calibre database.
return 1
db = get_db(dbpath, opts)
- if not opts.sort_by in FIELDS:
+ if not opts.sort_by in FIELDS and opts.sort_by is not None:
parser.print_help()
print
print >>sys.stderr, _('Invalid sort field. Available fields:'), ','.join(FIELDS)
@@ -461,7 +463,7 @@ show_metadata command.
def do_export(db, ids, dir, single_dir, by_author):
if ids is None:
- ids = db.all_ids()
+ ids = list(db.all_ids())
db.export_to_dir(dir, ids, byauthor=by_author, single_dir=single_dir, index_is_id=True)
def command_export(args, dbpath):
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index f1a61f122d..d02799714f 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -9,6 +9,7 @@ The database used to store ebook metadata
import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \
operator, itertools, functools, traceback
from itertools import repeat
+from datetime import datetime
from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock
from PyQt4.QtGui import QApplication, QPixmap, QImage
@@ -16,6 +17,7 @@ __app = None
from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError
+from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.constants import preferred_encoding, iswindows, isosx
@@ -48,6 +50,12 @@ def sanitize_file_name(name, substitute='_'):
one = _filename_sanitize.sub(substitute, name)
return re.sub(r'\s', ' ', one).strip()
+FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
+ 'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,
+ 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14}
+INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys()))
+
+
class CoverCache(QThread):
def __init__(self, library_path, parent=None):
@@ -158,27 +166,15 @@ class CoverCache(QThread):
self.load_queue.appendleft(id)
self.load_queue_lock.unlock()
-class ResultCache(object):
+class ResultCache(SearchQueryParser):
'''
Stores sorted and filtered metadata in memory.
'''
- METHOD_MAP = {
- 'title' : 'title',
- 'authors' : 'author_sort',
- 'author' : 'author_sort',
- 'publisher' : 'publisher',
- 'size' : 'size',
- 'date' : 'timestamp',
- 'timestamp' : 'timestamp',
- 'rating' : 'rating',
- 'tags' : 'tags',
- 'series' : 'series',
- }
-
def __init__(self):
self._map = self._map_filtered = self._data = []
+ SearchQueryParser.__init__(self)
def __getitem__(self, row):
return self._data[self._map_filtered[row]]
@@ -189,6 +185,31 @@ class ResultCache(object):
def __iter__(self):
for id in self._map_filtered:
yield self._data[id]
+
+ def universal_set(self):
+ return set([i[0] for i in self._data if i is not None])
+
+ def get_matches(self, location, query):
+ matches = set([])
+ if query and query.strip():
+ location = location.lower().strip()
+ query = query.lower()
+ if location in ('tag', 'author', 'format'):
+ location += 's'
+ all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats')
+ MAP = {}
+ for x in all:
+ MAP[x] = FIELD_MAP[x]
+ location = [location] if location != 'all' else list(MAP.keys())
+ for i, loc in enumerate(location):
+ location[i] = MAP[loc]
+ for item in self._data:
+ if item is None: continue
+ for loc in location:
+ if item[loc] and query in item[loc].lower():
+ matches.add(item[0])
+ break
+ return matches
def remove(self, id):
self._data[id] = None
@@ -222,103 +243,49 @@ class ResultCache(object):
self._map[0:0] = ids
self._map_filtered[0:0] = ids
- def refresh(self, db, field, ascending):
- field = field.lower()
- method = getattr(self, 'sort_on_' + self.METHOD_MAP[field])
- # Fast mapping from sorted row numbers to ids
- self._map = map(operator.itemgetter(0), method('ASC' if ascending else 'DESC', db)) # Preserves sort order
- # Fast mapping from sorted, filtered row numbers to ids
- # At the moment it is the same as self._map
- self._map_filtered = list(self._map)
+ def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta')
- # Fast mapping from ids to data.
- # Can be None for ids that dont exist (i.e. have been deleted)
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
for r in temp:
self._data[r[0]] = r
+ self._map = [i[0] for i in self._data if i is not None]
+ if field is not None:
+ self.sort(field, ascending)
+ self._map_filtered = list(self._map)
- def filter(self, filters, refilter=False, OR=False):
- '''
- Filter data based on filters. All the filters must match for an item to
- be accepted. Matching is case independent regexp matching.
- @param filters: A list of SearchToken objects
- @param refilter: If True filters are applied to the results of the previous
- filtering.
- @param OR: If True, keeps a match if any one of the filters matches. If False,
- keeps a match only if all the filters match
- '''
- if not refilter:
- self._map_filtered = list(self._map)
- if filters:
- remove = []
- for id in self._map_filtered:
- if OR:
- keep = False
- for token in filters:
- if token.match(self._data[id]):
- keep = True
- break
- if not keep:
- remove.append(id)
- else:
- for token in filters:
- if not token.match(self._data[id]):
- remove.append(id)
- break
- for id in remove:
- self._map_filtered.remove(id)
+ def seriescmp(self, x, y):
+ ans = self.strcmp(self._data[x][9], self._data[y][9])
+ if ans != 0: return ans
+ return cmp(self._data[x][10], self._data[y][10])
- def sort_on_title(self, order, db):
- return db.conn.get('SELECT id FROM books ORDER BY sort ' + order)
+ def cmp(self, loc, x, y, str=True):
+ ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\
+ cmp(self._data[x][loc], self._data[y][loc])
+ if ans != 0: return ans
+ return cmp(self._data[x][11].lower(), self._data[y][11].lower())
- def sort_on_author_sort(self, order, db):
- return db.conn.get('SELECT id FROM books ORDER BY author_sort,sort ' + order)
+ def sort(self, field, ascending):
+ import time
+ start = time.time()
+ field = field.lower().strip()
+ if field in ('author', 'tag', 'comment'):
+ field += 's'
+ if field == 'date': field = 'timestamp'
+ elif field == 'title': field = 'sort'
+ elif field == 'author': field = 'author_sort'
+ fcmp = self.seriescmp if field == 'series' else \
+ functools.partial(self.cmp, FIELD_MAP[field], field not in ('size', 'rating', 'timestamp'))
+ self._map.sort(cmp=fcmp, reverse=not ascending)
+ print time.time() - start
+
+ def search(self, query):
+ if not query or not query.strip():
+ self._map_filtered = list(self._map)
+ return
+ matches = sorted(self.parse(query))
+ self._map_filtered = [id for id in self._map if id in matches]
- def sort_on_timestamp(self, order, db):
- return db.conn.get('SELECT id FROM books ORDER BY id ' + order)
- def sort_on_publisher(self, order, db):
- no_publisher = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_publishers_link) ORDER BY books.sort')
- ans = []
- for r in db.conn.get('SELECT id FROM publishers ORDER BY name '+order):
- publishers_id = r[0]
- ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_publishers_link WHERE publisher=?) ORDER BY books.sort '+order, (publishers_id,))
- ans = (no_publisher + ans) if order == 'ASC' else (ans + no_publisher)
- return ans
-
-
- def sort_on_size(self, order, db):
- return db.conn.get('SELECT id FROM meta ORDER BY size ' + order)
-
- def sort_on_rating(self, order, db):
- no_rating = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_ratings_link) ORDER BY books.sort')
- ans = []
- for r in db.conn.get('SELECT id FROM ratings ORDER BY rating '+order):
- ratings_id = r[0]
- ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_ratings_link WHERE rating=?) ORDER BY books.sort', (ratings_id,))
- ans = (no_rating + ans) if order == 'ASC' else (ans + no_rating)
- return ans
-
-
- def sort_on_series(self, order, db):
- no_series = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_series_link) ORDER BY books.sort')
- ans = []
- for r in db.conn.get('SELECT id FROM series ORDER BY name '+order):
- series_id = r[0]
- ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_series_link WHERE series=?) ORDER BY books.series_index,books.id '+order, (series_id,))
- ans = (no_series + ans) if order == 'ASC' else (ans + no_series)
- return ans
-
-
- def sort_on_tags(self, order, db):
- no_tags = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_tags_link) ORDER BY books.sort')
- ans = []
- for r in db.conn.get('SELECT id FROM tags ORDER BY name '+order):
- tag_id = r[0]
- ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_tags_link WHERE tag=?) ORDER BY books.sort '+order, (tag_id,))
- ans = (no_tags + ans) if order == 'ASC' else (ans + no_tags)
- return ans
-
class Tag(unicode):
def __init__(self, name):
@@ -381,8 +348,10 @@ class LibraryDatabase2(LibraryDatabase):
self.user_version += 1
self.data = ResultCache()
- self.filter = self.data.filter
+ self.data.refresh()
+ self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self)
+ self.sort = functools.partial(self.data.refresh, self)
self.index = self.data.index
self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn)
self.row = self.data.row
@@ -423,6 +392,10 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
+ def last_modified(self):
+ ''' Return last modified time as a UTC datetime object'''
+ return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
+
def path(self, index, index_is_id=False):
'Return the relative path to the directory containing this books files as a unicode string.'
id = index if index_is_id else self.id(index)
@@ -993,12 +966,18 @@ class LibraryDatabase2(LibraryDatabase):
def __iter__(self):
- if len(self.data) == 0:
+ if len(self.data._data) == 0:
self.refresh('timestamp', True)
- for record in self.data:
+ for record in self.data._data:
if record is not None:
yield record
-
+
+ def all_ids(self):
+ for i in iter(self):
+ yield i['id']
+
+ def count(self):
+ return len(self.data._map)
def get_data_as_dict(self, prefix=None, authors_as_string=False):
'''
@@ -1012,7 +991,10 @@ class LibraryDatabase2(LibraryDatabase):
prefix = self.library_path
FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn'])
data = []
- for record in iter(self):
+ if len(self.data) == 0:
+ self.refresh(None, True)
+ for record in self.data:
+ if record is None: continue
x = {}
for field in FIELDS:
x[field] = record[field]
diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py
index f1761e4ce0..f4a793bdea 100644
--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en'
HTTP server for remote access to the calibre database.
'''
-import sys, textwrap, cStringIO, mimetypes
+import sys, textwrap, cStringIO, mimetypes, operator, os
+from datetime import datetime
import cherrypy
from PIL import Image
@@ -36,21 +37,22 @@ class LibraryServer(object):
author_sort="${r[12]}"
authors="${authors}"
rating="${r[4]}"
- timestamp="${timestamp.ctime()}"
+ timestamp="${r[5].ctime()}"
size="${r[6]}"
isbn="${r[14] if r[14] else ''}"
formats="${r[13] if r[13] else ''}"
series = "${r[9] if r[9] else ''}"
series_index="${r[10]}"
tags="${r[7] if r[7] else ''}"
- publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''}
+ publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''}
+
''')
LIBRARY = MarkupTemplate(textwrap.dedent('''\
-
+
- ${Markup(book)}
+ ${Markup(book)}
'''))
@@ -60,12 +62,12 @@ class LibraryServer(object):
${record['title']}urn:calibre:${record['id']}${authors}
- ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+0000')}
+ ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+00:00')}
-
-
${record['comments']}
+
+
${record['comments']}
'''))
@@ -74,6 +76,8 @@ class LibraryServer(object):
calibre Library
+ $id
+ ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}calibrehttp://calibre.kovidgoyal.net
@@ -94,6 +98,7 @@ class LibraryServer(object):
item
break
self.opts = opts
+
cherrypy.config.update({
'server.socket_port': opts.port,
'server.socket_timeout': opts.timeout, #seconds
@@ -106,14 +111,6 @@ class LibraryServer(object):
tools.gzip.mime_types = ['text/html', 'text/plain', 'text/xml']
''')%dict(autoreload=opts.develop)
- def to_xml(self):
- books = []
- book = MarkupTemplate(self.BOOK)
- for record in iter(self.db):
- authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')])
- books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8'))
- return self.LIBRARY.generate(books=books).render('xml')
-
def start(self):
cherrypy.quickstart(self, config=cStringIO.StringIO(self.config))
@@ -122,6 +119,10 @@ class LibraryServer(object):
if cover is None:
raise cherrypy.HTTPError(404, 'no cover available for id: %d'%id)
cherrypy.response.headers['Content-Type'] = 'image/jpeg'
+ path = getattr(cover, 'name', None)
+ if path and os.path.exists(path):
+ updated = datetime.fromutctimestamp(os.stat(path).st_mtime)
+ cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
if not thumbnail:
return cover.read()
try:
@@ -139,18 +140,41 @@ class LibraryServer(object):
def get_format(self, id, format):
format = format.upper()
- fmt = self.db.format(id, format, index_is_id=True)
+ fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
if fmt is None:
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
mt = mimetypes.guess_type('dummy.'+format.lower())[0]
if mt is None:
mt = 'application/octet-stream'
cherrypy.response.headers['Content-Type'] = mt
- return fmt
+ path = getattr(fmt, 'name', None)
+ if path and os.path.exists(path):
+ updated = datetime.fromutctimestamp(os.stat(path).st_mtime)
+ cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
+ return fmt.read()
+
+ def sort(self, items, field):
+ field = field.lower().strip()
+ if field == 'author':
+ field = 'authors'
+ if field not in ('title', 'authors', 'rating'):
+ raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
+ cmpf = cmp if field == 'rating' else lambda x, y: cmp(x.lower(), y.lower())
+ field = {'title':11, 'authors':12, 'rating':4}[field]
+ getter = operator.itemgetter(field)
+ items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)))
+
+ def last_modified(self, updated):
+ lm = updated.strftime('day, %d month %Y %H:%M:%S GMT')
+ day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'}
+ lm = lm.replace('day', day[int(lm.strftime('%w'))])
+ month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul',
+ 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
+ return lm.replace('month', month[updated.month])
+
@expose
def stanza(self):
- cherrypy.response.headers['Content-Type'] = 'text/xml'
books = []
for record in iter(self.db):
if 'EPUB' in record['formats'].upper():
@@ -160,12 +184,45 @@ class LibraryServer(object):
port=self.opts.port,
server=self.opts.hostname,
).render('xml').decode('utf8'))
- return self.STANZA.generate(subtitle='', data=books).render('xml')
+
+ updated = self.db.last_modified()
+ cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
+ cherrypy.response.headers['Content-Type'] = 'text/xml'
+
+ return self.STANZA.generate(subtitle='', data=books,
+ updated=updated, id='urn:calibre:main').render('xml')
@expose
- def library(self):
+ def library(self, start='0', num='50', sort=None, search=None):
+ '''
+ :param sort: Sort results by ``sort``. Can be one of `title,author,rating`.
+ :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax
+ :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results
+ '''
+ try:
+ start = int(start)
+ except ValueError:
+ raise cherrypy.HTTPError(400, 'start: %s is not an integer'%start)
+ try:
+ num = int(num)
+ except ValueError:
+ raise cherrypy.HTTPError(400, 'num: %s is not an integer'%num)
+ ids = self.db.data.parse(search) if search else self.db.data.universal_set()
+ ids = sorted(ids)
+ items = [r for r in iter(self.db) if r[0] in ids]
+ if sort is not None:
+ self.sort(items, sort)
+
+ book, books = MarkupTemplate(self.BOOK), []
+ for record in items[start:start+num]:
+ authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')])
+ books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8'))
+ updated = self.db.last_modified()
+
cherrypy.response.headers['Content-Type'] = 'text/xml'
- return self.to_xml()
+ cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
+ return self.LIBRARY.generate(books=books, start=start, updated=updated,
+ total=self.db.count()).render('xml')
@expose
def index(self):
@@ -217,4 +274,4 @@ def main(args=sys.argv):
return 0
if __name__ == '__main__':
- sys.exit(main())
\ No newline at end of file
+ sys.exit(main())
diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py
index 7c5a49c946..05e0ed3e8f 100644
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@@ -122,7 +122,7 @@ class SearchQueryParser(object):
self._parser = Or
#self._parser.setDebug(True)
- self.parse('(tolstoy)')
+ #self.parse('(tolstoy)')
self._parser.setDebug(False)
@@ -520,4 +520,4 @@ def main(args=sys.argv):
return 0
if __name__ == '__main__':
- sys.exit(main())
\ No newline at end of file
+ sys.exit(main())