This commit is contained in:
Kovid Goyal 2008-11-01 13:44:23 -07:00
parent b3eb0dc196
commit d02ec242d5
5 changed files with 188 additions and 148 deletions

View File

@ -175,8 +175,7 @@ class BooksModel(QAbstractTableModel):
self.endInsertRows()
def search(self, text, refinement, reset=True):
tokens, OR = self.search_tokens(text)
self.db.filter(tokens, refilter=refinement, OR=OR)
self.db.search(text)
self.last_search = text
if reset:
self.clear_caches()

View File

@ -9,6 +9,7 @@ Command line interface to the calibre database.
import sys, os, cStringIO
from textwrap import TextWrapper
from urllib import quote
from calibre import terminal_controller, preferred_encoding
from calibre.utils.config import OptionParser, prefs
@ -18,7 +19,6 @@ except:
send_message = None
from calibre.ebooks.metadata.meta import get_metadata
from calibre.library.database2 import LibraryDatabase2
from calibre.library.database import text_to_tokens
from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
from calibre.utils.genshi.template import MarkupTemplate
@ -67,6 +67,8 @@ STANZA_TEMPLATE='''\
<name>calibre</name>
<uri>http://calibre.kovidgoyal.net</uri>
</author>
<id>$id</id>
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
<subtitle>
${subtitle}
</subtitle>
@ -75,11 +77,11 @@ STANZA_TEMPLATE='''\
<title>${record['title']}</title>
<id>urn:calibre:${record['id']}</id>
<author><name>${record['authors']}</name></author>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+0000')}</updated>
<link type="application/epub+zip" href="${record['fmt_epub'].replace(sep, '/')}" />
<link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${record['cover'].replace(sep, '/')}" />
<content py:if="record['comments']" type="xhtml">
<pre>${record['comments']}</pre>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
<link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/'))}" />
<link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}" />
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"><pre>${record['comments']}</pre></div>
</content>
</entry>
</py:for>
@ -97,15 +99,14 @@ def get_db(dbpath, options):
if options.library_path is not None:
dbpath = options.library_path
dbpath = os.path.abspath(dbpath)
print _('Using library at'), dbpath
return LibraryDatabase2(dbpath, row_factory=True)
def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
prefix, output_format, subtitle='Books in the calibre database'):
db.refresh(sort_by, ascending)
if sort_by:
db.sort(sort_by, ascending)
if search_text:
filters, OR = text_to_tokens(search_text)
db.filter(filters, False, OR)
db.search(search_text)
authors_to_string = output_format in ['stanza', 'text']
data = db.get_data_as_dict(prefix, authors_as_string=authors_to_string)
fields = ['id'] + fields
@ -159,7 +160,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
elif output_format == 'stanza':
data = [i for i in data if i.has_key('fmt_epub')]
template = MarkupTemplate(STANZA_TEMPLATE)
return template.generate(data=data, subtitle=subtitle, sep=os.sep).render('xml')
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')
@ -173,7 +175,7 @@ List the books available in the calibre database.
))
parser.add_option('-f', '--fields', default='title,authors',
help=_('The fields to display when listing books in the database. Should be a comma separated list of fields.\nAvailable fields: %s\nDefault: %%default. The special field "all" can be used to select all fields. Only has effect in the text output format.')%','.join(FIELDS))
parser.add_option('--sort-by', default='timestamp',
parser.add_option('--sort-by', default=None,
help=_('The field by which to sort the results.\nAvailable fields: %s\nDefault: %%default')%','.join(FIELDS))
parser.add_option('--ascending', default=False, action='store_true',
help=_('Sort results in ascending order'))
@ -197,7 +199,7 @@ List the books available in the calibre database.
return 1
db = get_db(dbpath, opts)
if not opts.sort_by in FIELDS:
if not opts.sort_by in FIELDS and opts.sort_by is not None:
parser.print_help()
print
print >>sys.stderr, _('Invalid sort field. Available fields:'), ','.join(FIELDS)
@ -461,7 +463,7 @@ show_metadata command.
def do_export(db, ids, dir, single_dir, by_author):
if ids is None:
ids = db.all_ids()
ids = list(db.all_ids())
db.export_to_dir(dir, ids, byauthor=by_author, single_dir=single_dir, index_is_id=True)
def command_export(args, dbpath):

View File

@ -9,6 +9,7 @@ The database used to store ebook metadata
import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \
operator, itertools, functools, traceback
from itertools import repeat
from datetime import datetime
from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock
from PyQt4.QtGui import QApplication, QPixmap, QImage
@ -16,6 +17,7 @@ __app = None
from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.constants import preferred_encoding, iswindows, isosx
@ -48,6 +50,12 @@ def sanitize_file_name(name, substitute='_'):
one = _filename_sanitize.sub(substitute, name)
return re.sub(r'\s', ' ', one).strip()
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14}
INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys()))
class CoverCache(QThread):
def __init__(self, library_path, parent=None):
@ -158,27 +166,15 @@ class CoverCache(QThread):
self.load_queue.appendleft(id)
self.load_queue_lock.unlock()
class ResultCache(object):
class ResultCache(SearchQueryParser):
'''
Stores sorted and filtered metadata in memory.
'''
METHOD_MAP = {
'title' : 'title',
'authors' : 'author_sort',
'author' : 'author_sort',
'publisher' : 'publisher',
'size' : 'size',
'date' : 'timestamp',
'timestamp' : 'timestamp',
'rating' : 'rating',
'tags' : 'tags',
'series' : 'series',
}
def __init__(self):
self._map = self._map_filtered = self._data = []
SearchQueryParser.__init__(self)
def __getitem__(self, row):
return self._data[self._map_filtered[row]]
@ -190,6 +186,31 @@ class ResultCache(object):
for id in self._map_filtered:
yield self._data[id]
def universal_set(self):
return set([i[0] for i in self._data if i is not None])
def get_matches(self, location, query):
matches = set([])
if query and query.strip():
location = location.lower().strip()
query = query.lower()
if location in ('tag', 'author', 'format'):
location += 's'
all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats')
MAP = {}
for x in all:
MAP[x] = FIELD_MAP[x]
location = [location] if location != 'all' else list(MAP.keys())
for i, loc in enumerate(location):
location[i] = MAP[loc]
for item in self._data:
if item is None: continue
for loc in location:
if item[loc] and query in item[loc].lower():
matches.add(item[0])
break
return matches
def remove(self, id):
self._data[id] = None
if id in self._map:
@ -222,102 +243,48 @@ class ResultCache(object):
self._map[0:0] = ids
self._map_filtered[0:0] = ids
def refresh(self, db, field, ascending):
field = field.lower()
method = getattr(self, 'sort_on_' + self.METHOD_MAP[field])
# Fast mapping from sorted row numbers to ids
self._map = map(operator.itemgetter(0), method('ASC' if ascending else 'DESC', db)) # Preserves sort order
# Fast mapping from sorted, filtered row numbers to ids
# At the moment it is the same as self._map
self._map_filtered = list(self._map)
def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta')
# Fast mapping from ids to data.
# Can be None for ids that dont exist (i.e. have been deleted)
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
for r in temp:
self._data[r[0]] = r
self._map = [i[0] for i in self._data if i is not None]
if field is not None:
self.sort(field, ascending)
self._map_filtered = list(self._map)
def filter(self, filters, refilter=False, OR=False):
'''
Filter data based on filters. All the filters must match for an item to
be accepted. Matching is case independent regexp matching.
@param filters: A list of SearchToken objects
@param refilter: If True filters are applied to the results of the previous
filtering.
@param OR: If True, keeps a match if any one of the filters matches. If False,
keeps a match only if all the filters match
'''
if not refilter:
self._map_filtered = list(self._map)
if filters:
remove = []
for id in self._map_filtered:
if OR:
keep = False
for token in filters:
if token.match(self._data[id]):
keep = True
break
if not keep:
remove.append(id)
else:
for token in filters:
if not token.match(self._data[id]):
remove.append(id)
break
for id in remove:
self._map_filtered.remove(id)
def seriescmp(self, x, y):
ans = self.strcmp(self._data[x][9], self._data[y][9])
if ans != 0: return ans
return cmp(self._data[x][10], self._data[y][10])
def sort_on_title(self, order, db):
return db.conn.get('SELECT id FROM books ORDER BY sort ' + order)
def cmp(self, loc, x, y, str=True):
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\
cmp(self._data[x][loc], self._data[y][loc])
if ans != 0: return ans
return cmp(self._data[x][11].lower(), self._data[y][11].lower())
def sort_on_author_sort(self, order, db):
return db.conn.get('SELECT id FROM books ORDER BY author_sort,sort ' + order)
def sort(self, field, ascending):
import time
start = time.time()
field = field.lower().strip()
if field in ('author', 'tag', 'comment'):
field += 's'
if field == 'date': field = 'timestamp'
elif field == 'title': field = 'sort'
elif field == 'author': field = 'author_sort'
fcmp = self.seriescmp if field == 'series' else \
functools.partial(self.cmp, FIELD_MAP[field], field not in ('size', 'rating', 'timestamp'))
self._map.sort(cmp=fcmp, reverse=not ascending)
print time.time() - start
def sort_on_timestamp(self, order, db):
return db.conn.get('SELECT id FROM books ORDER BY id ' + order)
def search(self, query):
if not query or not query.strip():
self._map_filtered = list(self._map)
return
matches = sorted(self.parse(query))
self._map_filtered = [id for id in self._map if id in matches]
def sort_on_publisher(self, order, db):
no_publisher = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_publishers_link) ORDER BY books.sort')
ans = []
for r in db.conn.get('SELECT id FROM publishers ORDER BY name '+order):
publishers_id = r[0]
ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_publishers_link WHERE publisher=?) ORDER BY books.sort '+order, (publishers_id,))
ans = (no_publisher + ans) if order == 'ASC' else (ans + no_publisher)
return ans
def sort_on_size(self, order, db):
return db.conn.get('SELECT id FROM meta ORDER BY size ' + order)
def sort_on_rating(self, order, db):
no_rating = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_ratings_link) ORDER BY books.sort')
ans = []
for r in db.conn.get('SELECT id FROM ratings ORDER BY rating '+order):
ratings_id = r[0]
ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_ratings_link WHERE rating=?) ORDER BY books.sort', (ratings_id,))
ans = (no_rating + ans) if order == 'ASC' else (ans + no_rating)
return ans
def sort_on_series(self, order, db):
no_series = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_series_link) ORDER BY books.sort')
ans = []
for r in db.conn.get('SELECT id FROM series ORDER BY name '+order):
series_id = r[0]
ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_series_link WHERE series=?) ORDER BY books.series_index,books.id '+order, (series_id,))
ans = (no_series + ans) if order == 'ASC' else (ans + no_series)
return ans
def sort_on_tags(self, order, db):
no_tags = db.conn.get('SELECT id FROM books WHERE books.id NOT IN (SELECT book FROM books_tags_link) ORDER BY books.sort')
ans = []
for r in db.conn.get('SELECT id FROM tags ORDER BY name '+order):
tag_id = r[0]
ans += db.conn.get('SELECT id FROM books WHERE books.id IN (SELECT book FROM books_tags_link WHERE tag=?) ORDER BY books.sort '+order, (tag_id,))
ans = (no_tags + ans) if order == 'ASC' else (ans + no_tags)
return ans
class Tag(unicode):
@ -381,8 +348,10 @@ class LibraryDatabase2(LibraryDatabase):
self.user_version += 1
self.data = ResultCache()
self.filter = self.data.filter
self.data.refresh()
self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self)
self.sort = functools.partial(self.data.refresh, self)
self.index = self.data.index
self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn)
self.row = self.data.row
@ -423,6 +392,10 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
def last_modified(self):
''' Return last modified time as a UTC datetime object'''
return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def path(self, index, index_is_id=False):
'Return the relative path to the directory containing this books files as a unicode string.'
id = index if index_is_id else self.id(index)
@ -993,12 +966,18 @@ class LibraryDatabase2(LibraryDatabase):
def __iter__(self):
if len(self.data) == 0:
if len(self.data._data) == 0:
self.refresh('timestamp', True)
for record in self.data:
for record in self.data._data:
if record is not None:
yield record
def all_ids(self):
for i in iter(self):
yield i['id']
def count(self):
return len(self.data._map)
def get_data_as_dict(self, prefix=None, authors_as_string=False):
'''
@ -1012,7 +991,10 @@ class LibraryDatabase2(LibraryDatabase):
prefix = self.library_path
FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn'])
data = []
for record in iter(self):
if len(self.data) == 0:
self.refresh(None, True)
for record in self.data:
if record is None: continue
x = {}
for field in FIELDS:
x[field] = record[field]

View File

@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en'
HTTP server for remote access to the calibre database.
'''
import sys, textwrap, cStringIO, mimetypes
import sys, textwrap, cStringIO, mimetypes, operator, os
from datetime import datetime
import cherrypy
from PIL import Image
@ -36,21 +37,22 @@ class LibraryServer(object):
author_sort="${r[12]}"
authors="${authors}"
rating="${r[4]}"
timestamp="${timestamp.ctime()}"
timestamp="${r[5].ctime()}"
size="${r[6]}"
isbn="${r[14] if r[14] else ''}"
formats="${r[13] if r[13] else ''}"
series = "${r[9] if r[9] else ''}"
series_index="${r[10]}"
tags="${r[7] if r[7] else ''}"
publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''}</book>
publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''}
</book>
''')
LIBRARY = MarkupTemplate(textwrap.dedent('''\
<?xml version="1.0" encoding="utf-8"?>
<library xmlns:py="http://genshi.edgewall.org/" size="${len(books)}">
<library xmlns:py="http://genshi.edgewall.org/" start="$start" num="${len(books)}" total="$total" updated="${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}>
<py:for each="book in books">
${Markup(book)}
${Markup(book)}
</py:for>
</library>
'''))
@ -60,12 +62,12 @@ class LibraryServer(object):
<title>${record['title']}</title>
<id>urn:calibre:${record['id']}</id>
<author><name>${authors}</name></author>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+0000')}</updated>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
<link type="application/epub+zip" href="http://${server}:${port}/get/epub/${record['id']}" />
<link rel="x-stanza-cover-image" type="image/jpeg" href="http://${server}:${port}/get/cover/${record['id']}" />
<link rel="x-stanza-cover-image-thumbnail" type="image/jpeg" href="http://${server}:${port}/get/thumb/${record['id']}" />
<content py:if="record['comments']" type="xhtml">
<pre>${record['comments']}</pre>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"><pre>${record['comments']}</pre></div>
</content>
</entry>
'''))
@ -74,6 +76,8 @@ class LibraryServer(object):
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:py="http://genshi.edgewall.org/">
<title>calibre Library</title>
<id>$id</id>
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
<author>
<name>calibre</name>
<uri>http://calibre.kovidgoyal.net</uri>
@ -94,6 +98,7 @@ class LibraryServer(object):
item
break
self.opts = opts
cherrypy.config.update({
'server.socket_port': opts.port,
'server.socket_timeout': opts.timeout, #seconds
@ -106,14 +111,6 @@ class LibraryServer(object):
tools.gzip.mime_types = ['text/html', 'text/plain', 'text/xml']
''')%dict(autoreload=opts.develop)
def to_xml(self):
books = []
book = MarkupTemplate(self.BOOK)
for record in iter(self.db):
authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')])
books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8'))
return self.LIBRARY.generate(books=books).render('xml')
def start(self):
cherrypy.quickstart(self, config=cStringIO.StringIO(self.config))
@ -122,6 +119,10 @@ class LibraryServer(object):
if cover is None:
raise cherrypy.HTTPError(404, 'no cover available for id: %d'%id)
cherrypy.response.headers['Content-Type'] = 'image/jpeg'
path = getattr(cover, 'name', None)
if path and os.path.exists(path):
updated = datetime.fromutctimestamp(os.stat(path).st_mtime)
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
if not thumbnail:
return cover.read()
try:
@ -139,18 +140,41 @@ class LibraryServer(object):
def get_format(self, id, format):
format = format.upper()
fmt = self.db.format(id, format, index_is_id=True)
fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
if fmt is None:
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
mt = mimetypes.guess_type('dummy.'+format.lower())[0]
if mt is None:
mt = 'application/octet-stream'
cherrypy.response.headers['Content-Type'] = mt
return fmt
path = getattr(fmt, 'name', None)
if path and os.path.exists(path):
updated = datetime.fromutctimestamp(os.stat(path).st_mtime)
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
return fmt.read()
def sort(self, items, field):
field = field.lower().strip()
if field == 'author':
field = 'authors'
if field not in ('title', 'authors', 'rating'):
raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
cmpf = cmp if field == 'rating' else lambda x, y: cmp(x.lower(), y.lower())
field = {'title':11, 'authors':12, 'rating':4}[field]
getter = operator.itemgetter(field)
items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)))
def last_modified(self, updated):
lm = updated.strftime('day, %d month %Y %H:%M:%S GMT')
day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'}
lm = lm.replace('day', day[int(lm.strftime('%w'))])
month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul',
8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
return lm.replace('month', month[updated.month])
@expose
def stanza(self):
cherrypy.response.headers['Content-Type'] = 'text/xml'
books = []
for record in iter(self.db):
if 'EPUB' in record['formats'].upper():
@ -160,12 +184,45 @@ class LibraryServer(object):
port=self.opts.port,
server=self.opts.hostname,
).render('xml').decode('utf8'))
return self.STANZA.generate(subtitle='', data=books).render('xml')
updated = self.db.last_modified()
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
cherrypy.response.headers['Content-Type'] = 'text/xml'
return self.STANZA.generate(subtitle='', data=books,
updated=updated, id='urn:calibre:main').render('xml')
@expose
def library(self):
def library(self, start='0', num='50', sort=None, search=None):
'''
:param sort: Sort results by ``sort``. Can be one of `title,author,rating`.
:param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax
:param start,num: Return the slice `[start:start+num]` of the sorted and filtered results
'''
try:
start = int(start)
except ValueError:
raise cherrypy.HTTPError(400, 'start: %s is not an integer'%start)
try:
num = int(num)
except ValueError:
raise cherrypy.HTTPError(400, 'num: %s is not an integer'%num)
ids = self.db.data.parse(search) if search else self.db.data.universal_set()
ids = sorted(ids)
items = [r for r in iter(self.db) if r[0] in ids]
if sort is not None:
self.sort(items, sort)
book, books = MarkupTemplate(self.BOOK), []
for record in items[start:start+num]:
authors = ' & '.join([i.replace('|', ',') for i in record[2].split(',')])
books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8'))
updated = self.db.last_modified()
cherrypy.response.headers['Content-Type'] = 'text/xml'
return self.to_xml()
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
return self.LIBRARY.generate(books=books, start=start, updated=updated,
total=self.db.count()).render('xml')
@expose
def index(self):

View File

@ -122,7 +122,7 @@ class SearchQueryParser(object):
self._parser = Or
#self._parser.setDebug(True)
self.parse('(tolstoy)')
#self.parse('(tolstoy)')
self._parser.setDebug(False)