Merge from trunk

This commit is contained in:
Charles Haley 2013-01-20 15:45:59 +01:00
commit 9e1627569f
12 changed files with 608 additions and 73 deletions

View File

@ -64,8 +64,8 @@ class Barrons(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open('http://commerce.barrons.com/auth/login') br.open('http://commerce.barrons.com/auth/login')
br.select_form(name='login_form') br.select_form(nr=0)
br['user'] = self.username br['username'] = self.username
br['password'] = self.password br['password'] = self.password
br.submit() br.submit()
return br return br

0
recipes/conowego_pl.recipe Executable file → Normal file
View File

View File

Before

Width:  |  Height:  |  Size: 605 B

After

Width:  |  Height:  |  Size: 605 B

0
recipes/linux_journal.recipe Executable file → Normal file
View File

View File

@ -18,6 +18,8 @@ class MichelleMalkin(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
use_embedded_content = False
conversion_options = { conversion_options = {
@ -29,16 +31,16 @@ class MichelleMalkin(BasicNewsRecipe):
} }
keep_only_tags = [ #keep_only_tags = [
dict(name='div', attrs={'class':'article'}) #dict(name='div', attrs={'class':'article'})
] #]
remove_tags = [ #remove_tags = [
dict(name=['iframe', 'embed', 'object']), #dict(name=['iframe', 'embed', 'object']),
dict(name='div', attrs={'id':['comments', 'commentForm']}), #dict(name='div', attrs={'id':['comments', 'commentForm']}),
dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']}) #dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})
] #]
feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')] feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')]

View File

@ -13,6 +13,7 @@ from functools import wraps, partial
from calibre.db.locking import create_locks, RecordLock from calibre.db.locking import create_locks, RecordLock
from calibre.db.fields import create_field from calibre.db.fields import create_field
from calibre.db.search import Search
from calibre.db.tables import VirtualTable from calibre.db.tables import VirtualTable
from calibre.db.lazy import FormatMetadata, FormatsList from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
@ -50,6 +51,7 @@ class Cache(object):
self.record_lock = RecordLock(self.read_lock) self.record_lock = RecordLock(self.read_lock)
self.format_metadata_cache = defaultdict(dict) self.format_metadata_cache = defaultdict(dict)
self.formatter_template_cache = {} self.formatter_template_cache = {}
self._search_api = Search(self.field_metadata.get_search_terms())
# Implement locking for all simple read/write API methods # Implement locking for all simple read/write API methods
# An unlocked version of the method is stored with the name starting # An unlocked version of the method is stored with the name starting
@ -269,11 +271,11 @@ class Cache(object):
return () return ()
@read_api @read_api
def all_book_ids(self): def all_book_ids(self, type=frozenset):
''' '''
Frozen set of all known book ids. Frozen set of all known book ids.
''' '''
return frozenset(self.fields['uuid']) return type(self.fields['uuid'])
@read_api @read_api
def all_field_ids(self, name): def all_field_ids(self, name):
@ -316,6 +318,10 @@ class Cache(object):
self.format_metadata_cache[book_id][fmt] = ans self.format_metadata_cache[book_id][fmt] = ans
return ans return ans
@read_api
def pref(self, name):
return self.backend.prefs[name]
@api @api
def get_metadata(self, book_id, def get_metadata(self, book_id,
get_cover=False, get_user_categories=True, cover_as_data=False): get_cover=False, get_user_categories=True, cover_as_data=False):
@ -378,17 +384,21 @@ class Cache(object):
all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None
else ids_to_sort) else ids_to_sort)
get_metadata = partial(self._get_metadata, get_user_categories=False) get_metadata = partial(self._get_metadata, get_user_categories=False)
def get_lang(book_id):
ans = self._field_for('languages', book_id)
return ans[0] if ans else None
fm = {'title':'sort', 'authors':'author_sort'} fm = {'title':'sort', 'authors':'author_sort'}
def sort_key(field): def sort_key(field):
'Handle series type fields' 'Handle series type fields'
ans = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata,
all_book_ids)
idx = field + '_index' idx = field + '_index'
if idx in self.fields: is_series = idx in self.fields
idx_ans = self.fields[idx].sort_keys_for_books(get_metadata, ans = self.fields[fm.get(field, field)].sort_keys_for_books(
all_book_ids) get_metadata, get_lang, all_book_ids,)
if is_series:
idx_ans = self.fields[idx].sort_keys_for_books(
get_metadata, get_lang, all_book_ids)
ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()} ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()}
return ans return ans
@ -401,6 +411,10 @@ class Cache(object):
else: else:
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys)) return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
@read_api
def search(self, query, restriction):
return self._search_api(self, query, restriction)
# }}} # }}}
class SortKey(object): class SortKey(object):

View File

@ -9,14 +9,19 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from threading import Lock from threading import Lock
from collections import defaultdict
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
from calibre.ebooks.metadata import title_sort
from calibre.utils.config_base import tweaks
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.date import UNDEFINED_DATE from calibre.utils.date import UNDEFINED_DATE
from calibre.utils.localization import calibre_langcode_to_name from calibre.utils.localization import calibre_langcode_to_name
class Field(object): class Field(object):
is_many = False
def __init__(self, name, table): def __init__(self, name, table):
self.name, self.table = name, table self.name, self.table = name, table
self.has_text_data = self.metadata['datatype'] in ('text', 'comments', self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
@ -72,7 +77,7 @@ class Field(object):
''' '''
return iter(()) return iter(())
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
''' '''
Return a mapping of book_id -> sort_key. The sort key is suitable for Return a mapping of book_id -> sort_key. The sort key is suitable for
use in sorting the list of all books by this field, via the python cmp use in sorting the list of all books by this field, via the python cmp
@ -81,6 +86,13 @@ class Field(object):
''' '''
raise NotImplementedError() raise NotImplementedError()
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
'''
Return a generator that yields items of the form (value, set of books
ids that have this value). Here, value is a searchable value. Returned
books_ids are restricted to the set of ids in candidates.
'''
raise NotImplementedError()
class OneToOneField(Field): class OneToOneField(Field):
@ -96,10 +108,24 @@ class OneToOneField(Field):
def __iter__(self): def __iter__(self):
return self.table.book_col_map.iterkeys() return self.table.book_col_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
return {id_ : self._sort_key(self.table.book_col_map.get(id_, return {id_ : self._sort_key(self.table.book_col_map.get(id_,
self._default_sort_key)) for id_ in all_book_ids} self._default_sort_key)) for id_ in all_book_ids}
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.book_col_map
if (self.name in {'id', 'uuid', 'title'} or
self.metadata['datatype'] == 'datetime'):
# Values are likely to be unique
for book_id in candidates:
yield cbm.get(book_id, default_value), {book_id}
else:
val_map = defaultdict(set)
for book_id in candidates:
val_map[cbm.get(book_id, default_value)].add(book_id)
for val, book_ids in val_map.iteritems():
yield val, book_ids
class CompositeField(OneToOneField): class CompositeField(OneToOneField):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -133,10 +159,16 @@ class CompositeField(OneToOneField):
ans = mi.get('#'+self.metadata['label']) ans = mi.get('#'+self.metadata['label'])
return ans return ans
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
all_book_ids} all_book_ids}
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
val_map = defaultdict(set)
for book_id in candidates:
val_map[self.get_value_with_cache(book_id, get_metadata)].add(book_id)
for val, book_ids in val_map.iteritems():
yield val, book_ids
class OnDeviceField(OneToOneField): class OnDeviceField(OneToOneField):
@ -170,12 +202,21 @@ class OnDeviceField(OneToOneField):
def __iter__(self): def __iter__(self):
return iter(()) return iter(())
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
return {id_ : self.for_book(id_) for id_ in return {id_ : self.for_book(id_) for id_ in
all_book_ids} all_book_ids}
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
val_map = defaultdict(set)
for book_id in candidates:
val_map[self.for_book(book_id, default_value=default_value)].add(book_id)
for val, book_ids in val_map.iteritems():
yield val, book_ids
class ManyToOneField(Field): class ManyToOneField(Field):
is_many = True
def for_book(self, book_id, default_value=None): def for_book(self, book_id, default_value=None):
ids = self.table.book_col_map.get(book_id, None) ids = self.table.book_col_map.get(book_id, None)
if ids is not None: if ids is not None:
@ -196,7 +237,7 @@ class ManyToOneField(Field):
def __iter__(self): def __iter__(self):
return self.table.id_map.iterkeys() return self.table.id_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
ans = {id_ : self.table.book_col_map.get(id_, None) ans = {id_ : self.table.book_col_map.get(id_, None)
for id_ in all_book_ids} for id_ in all_book_ids}
sk_map = {cid : (self._default_sort_key if cid is None else sk_map = {cid : (self._default_sort_key if cid is None else
@ -204,8 +245,17 @@ class ManyToOneField(Field):
for cid in ans.itervalues()} for cid in ans.itervalues()}
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()} return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.col_book_map
for item_id, val in self.table.id_map.iteritems():
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
if book_ids:
yield val, book_ids
class ManyToManyField(Field): class ManyToManyField(Field):
is_many = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Field.__init__(self, *args, **kwargs) Field.__init__(self, *args, **kwargs)
self.alphabetical_sort = self.name != 'authors' self.alphabetical_sort = self.name != 'authors'
@ -227,7 +277,7 @@ class ManyToManyField(Field):
def __iter__(self): def __iter__(self):
return self.table.id_map.iterkeys() return self.table.id_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
ans = {id_ : self.table.book_col_map.get(id_, ()) ans = {id_ : self.table.book_col_map.get(id_, ())
for id_ in all_book_ids} for id_ in all_book_ids}
all_cids = set() all_cids = set()
@ -239,6 +289,20 @@ class ManyToManyField(Field):
(self._default_sort_key,)) (self._default_sort_key,))
for id_, cids in ans.iteritems()} for id_, cids in ans.iteritems()}
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.col_book_map
for item_id, val in self.table.id_map.iteritems():
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
if book_ids:
yield val, book_ids
def iter_counts(self, candidates):
val_map = defaultdict(set)
cbm = self.table.book_col_map
for book_id in candidates:
val_map[len(cbm.get(book_id, ()))].add(book_id)
for count, book_ids in val_map.iteritems():
yield count, book_ids
class IdentifiersField(ManyToManyField): class IdentifiersField(ManyToManyField):
@ -248,7 +312,7 @@ class IdentifiersField(ManyToManyField):
ids = default_value ids = default_value
return ids return ids
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
'Sort by identifier keys' 'Sort by identifier keys'
ans = {id_ : self.table.book_col_map.get(id_, ()) ans = {id_ : self.table.book_col_map.get(id_, ())
for id_ in all_book_ids} for id_ in all_book_ids}
@ -274,6 +338,32 @@ class FormatsField(ManyToManyField):
def format_fname(self, book_id, fmt): def format_fname(self, book_id, fmt):
return self.table.fname_map[book_id][fmt.upper()] return self.table.fname_map[book_id][fmt.upper()]
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
val_map = defaultdict(set)
cbm = self.table.book_col_map
for book_id in candidates:
vals = cbm.get(book_id, ())
for val in vals:
val_map[val].add(book_id)
for val, book_ids in val_map.iteritems():
yield val, book_ids
class SeriesField(ManyToOneField):
def sort_key_for_series(self, book_id, get_lang, series_sort_order):
sid = self.table.book_col_map.get(book_id, None)
if sid is None:
return self._default_sort_key
return self._sort_key(title_sort(self.table.id_map[sid],
order=series_sort_order,
lang=get_lang(book_id)))
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
sso = tweaks['title_series_sorting']
return {book_id:self.sort_key_for_series(book_id, get_lang, sso) for book_id
in all_book_ids}
def create_field(name, table): def create_field(name, table):
cls = { cls = {
ONE_ONE : OneToOneField, ONE_ONE : OneToOneField,
@ -290,5 +380,7 @@ def create_field(name, table):
cls = IdentifiersField cls = IdentifiersField
elif table.metadata['datatype'] == 'composite': elif table.metadata['datatype'] == 'composite':
cls = CompositeField cls = CompositeField
elif table.metadata['datatype'] == 'series':
cls = SeriesField
return cls(name, table) return cls(name, table)

390
src/calibre/db/search.py Normal file
View File

@ -0,0 +1,390 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from functools import partial
from datetime import timedelta
from calibre.utils.config_base import prefs
from calibre.utils.date import parse_date, UNDEFINED_DATE, now
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
# TODO: Thread safety of saved searches
class DateSearch(object): # {{{
def __init__(self):
self.operators = {
'=' : (1, self.eq),
'!=' : (2, self.ne),
'>' : (1, self.gt),
'>=' : (2, self.ge),
'<' : (1, self.lt),
'<=' : (2, self.le),
}
self.local_today = { '_today', 'today', icu_lower(_('today')) }
self.local_yesterday = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
self.local_thismonth = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
def eq(self, dbdate, query, field_count):
if dbdate.year == query.year:
if field_count == 1:
return True
if dbdate.month == query.month:
if field_count == 2:
return True
return dbdate.day == query.day
return False
def ne(self, *args):
return not self.eq(*args)
def gt(self, dbdate, query, field_count):
if dbdate.year > query.year:
return True
if field_count > 1 and dbdate.year == query.year:
if dbdate.month > query.month:
return True
return (field_count == 3 and dbdate.month == query.month and
dbdate.day > query.day)
return False
def le(self, *args):
return not self.gt(*args)
def lt(self, dbdate, query, field_count):
if dbdate.year < query.year:
return True
if field_count > 1 and dbdate.year == query.year:
if dbdate.month < query.month:
return True
return (field_count == 3 and dbdate.month == query.month and
dbdate.day < query.day)
return False
def ge(self, *args):
return not self.lt(*args)
def __call__(self, query, field_iter):
matches = set()
if len(query) < 2:
return matches
if query == 'false':
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
v = parse_date(v)
if v is None or v <= UNDEFINED_DATE:
matches |= book_ids
return matches
if query == 'true':
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
v = parse_date(v)
if v is not None and v > UNDEFINED_DATE:
matches |= book_ids
return matches
relop = None
for k, op in self.operators.iteritems():
if query.startswith(k):
p, relop = op
query = query[p:]
if relop is None:
relop = self.operators['='][-1]
if query in self.local_today:
qd = now()
field_count = 3
elif query in self.local_yesterday:
qd = now() - timedelta(1)
field_count = 3
elif query in self.local_thismonth:
qd = now()
field_count = 2
else:
m = self.daysago_pat.search(query)
if m is not None:
num = query[:-len(m.group(1))]
try:
qd = now() - timedelta(int(num))
except:
raise ParseException(query, len(query), 'Number conversion error')
field_count = 3
else:
try:
qd = parse_date(query, as_utc=False)
except:
raise ParseException(query, len(query), 'Date conversion error')
if '-' in query:
field_count = query.count('-') + 1
else:
field_count = query.count('/') + 1
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
v = parse_date(v)
if v is not None and relop(v, qd, field_count):
matches |= book_ids
return matches
# }}}
class NumericSearch(object): # {{{
def __init__(self):
self.operators = {
'=':( 1, lambda r, q: r == q ),
'>':( 1, lambda r, q: r is not None and r > q ),
'<':( 1, lambda r, q: r is not None and r < q ),
'!=':( 2, lambda r, q: r != q ),
'>=':( 2, lambda r, q: r is not None and r >= q ),
'<=':( 2, lambda r, q: r is not None and r <= q )
}
def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
matches = set()
if not query:
return matches
q = ''
cast = adjust = lambda x: x
dt = datatype
if is_many and query in {'true', 'false'}:
valcheck = lambda x: True
if datatype == 'rating':
valcheck = lambda x: x is not None and x > 0
found = set()
for val, book_ids in field_iter():
if valcheck(val):
found |= book_ids
return found if query == 'true' else candidates - found
if query == 'false':
if location == 'cover':
relop = lambda x,y: not bool(x)
else:
relop = lambda x,y: x is None
elif query == 'true':
if location == 'cover':
relop = lambda x,y: bool(x)
else:
relop = lambda x,y: x is not None
else:
relop = None
for k, op in self.operators.iteritems():
if query.startswith(k):
p, relop = op
query = query[p:]
if relop is None:
p, relop = self.operators['=']
cast = int
if dt == 'rating':
cast = lambda x: 0 if x is None else int(x)
adjust = lambda x: x/2
elif dt in ('float', 'composite'):
cast = float
mult = 1.0
if len(query) > 1:
mult = query[-1].lower()
mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0)
if mult != 1.0:
query = query[:-1]
else:
mult = 1.0
try:
q = cast(query) * mult
except:
raise ParseException(query, len(query),
'Non-numeric value in query: %r'%query)
for val, book_ids in field_iter():
if val is None:
continue
try:
v = cast(val)
except:
v = None
if v:
v = adjust(v)
if relop(v, q):
matches |= book_ids
return matches
# }}}
class Parser(SearchQueryParser):
def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
limit_search_columns, limit_search_columns_to, locations):
self.dbcache, self.all_book_ids = dbcache, all_book_ids
self.all_search_locations = frozenset(locations)
self.grouped_search_terms = gst
self.date_search, self.num_search = date_search, num_search
self.limit_search_columns, self.limit_search_columns_to = (
limit_search_columns, limit_search_columns_to)
super(Parser, self).__init__(locations, optimize=True)
@property
def field_metadata(self):
return self.dbcache.field_metadata
def universal_set(self):
return self.all_book_ids
def field_iter(self, name, candidates):
get_metadata = partial(self.dbcache._get_metadata, get_user_categories=False)
return self.dbcache.fields[name].iter_searchable_values(get_metadata,
candidates)
def get_matches(self, location, query, candidates=None,
allow_recursion=True):
# If candidates is not None, it must not be modified. Changing its
# value will break query optimization in the search parser
matches = set()
if candidates is None:
candidates = self.all_book_ids
if not candidates or not query or not query.strip():
return matches
if location not in self.all_search_locations:
return matches
if (len(location) > 2 and location.startswith('@') and
location[1:] in self.grouped_search_terms):
location = location[1:]
# get metadata key associated with the search term. Eliminates
# dealing with plurals and other aliases
# original_location = location
location = self.field_metadata.search_term_to_field_key(
icu_lower(location.strip()))
# grouped search terms
if isinstance(location, list):
if allow_recursion:
if query.lower() == 'false':
invert = True
query = 'true'
else:
invert = False
for loc in location:
c = candidates.copy()
m = self.get_matches(loc, query,
candidates=c, allow_recursion=False)
matches |= m
c -= m
if len(c) == 0:
break
if invert:
matches = self.all_book_ids - matches
return matches
raise ParseException(query, len(query), 'Recursive query group detected')
# If the user has asked to restrict searching over all field, apply
# that restriction
if (location == 'all' and self.limit_search_columns and
self.limit_search_columns_to):
terms = set()
for l in self.limit_search_columns_to:
l = icu_lower(l.strip())
if l and l != 'all' and l in self.all_search_locations:
terms.add(l)
if terms:
c = candidates.copy()
for l in terms:
try:
m = self.get_matches(l, query,
candidates=c, allow_recursion=allow_recursion)
matches |= m
c -= m
if len(c) == 0:
break
except:
pass
return matches
if location in self.field_metadata:
fm = self.field_metadata[location]
dt = fm['datatype']
# take care of dates special case
if (dt == 'datetime' or (
dt == 'composite' and
fm['display'].get('composite_sort', '') == 'date')):
if location == 'date':
location = 'timestamp'
return self.date_search(
icu_lower(query), partial(self.field_iter, location, candidates))
# take care of numbers special case
if (dt in ('rating', 'int', 'float') or
(dt == 'composite' and
fm['display'].get('composite_sort', '') == 'number')):
field = self.dbcache.fields[location]
return self.num_search(
icu_lower(query), partial(self.field_iter, location, candidates),
location, dt, candidates, is_many=field.is_many)
# take care of the 'count' operator for is_multiples
if (fm['is_multiple'] and
len(query) > 1 and query[0] == '#' and query[1] in '=<>!'):
return self.num_search(icu_lower(query[1:]), partial(
self.dbcache.fields[location].iter_counts, candidates),
location, dt, candidates)
return matches
class Search(object):
def __init__(self, all_search_locations):
self.all_search_locations = all_search_locations
self.date_search = DateSearch()
self.num_search = NumericSearch()
def change_locations(self, newlocs):
self.all_search_locations = newlocs
def __call__(self, dbcache, query, search_restriction):
'''
Return the set of ids of all records that match the specified
query and restriction
'''
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'(%s) and (%s)' % (search_restriction, query)
all_book_ids = dbcache.all_book_ids(type=set)
if not q:
return all_book_ids
# We construct a new parser instance per search as pyparsing is not
# thread safe. On my desktop, constructing a SearchQueryParser instance
# takes 0.000975 seconds and restoring it from a pickle takes
# 0.000974 seconds.
sqp = Parser(
dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
self.date_search, self.num_search, prefs[ 'limit_search_columns' ],
prefs[ 'limit_search_columns_to' ], self.all_search_locations)
try:
ret = sqp.parse(query)
finally:
sqp.dbcache = None
return ret

View File

@ -148,11 +148,11 @@ class ManyToManyTable(ManyToOneTable):
''' '''
table_type = MANY_MANY table_type = MANY_MANY
selectq = 'SELECT book, {0} FROM {1}'
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute( for row in db.conn.execute(
'SELECT book, {0} FROM {1}'.format( self.selectq.format(self.metadata['link_column'], self.link_table)):
self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map: if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = [] self.col_book_map[row[1]] = []
self.col_book_map[row[1]].append(row[0]) self.col_book_map[row[1]].append(row[0])
@ -168,6 +168,8 @@ class ManyToManyTable(ManyToOneTable):
class AuthorsTable(ManyToManyTable): class AuthorsTable(ManyToManyTable):
selectq = 'SELECT book, {0} FROM {1} ORDER BY id'
def read_id_maps(self, db): def read_id_maps(self, db):
self.alink_map = {} self.alink_map = {}
self.asort_map = {} self.asort_map = {}

View File

@ -7,8 +7,8 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import unittest, os, shutil import unittest, os, shutil
from future_builtins import map
class BaseTest(unittest.TestCase): class BaseTest(unittest.TestCase):
@ -39,7 +39,10 @@ class BaseTest(unittest.TestCase):
'ondevice_col', 'last_modified'}.union(allfk1) 'ondevice_col', 'last_modified'}.union(allfk1)
for attr in all_keys: for attr in all_keys:
if attr == 'user_metadata': continue if attr == 'user_metadata': continue
if attr == 'format_metadata': continue # TODO: Not implemented yet
attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr) attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr)
if attr == 'formats':
attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2))
self.assertEqual(attr1, attr2, self.assertEqual(attr1, attr2,
'%s not the same: %r != %r'%(attr, attr1, attr2)) '%s not the same: %r != %r'%(attr, attr1, attr2))
if attr.startswith('#'): if attr.startswith('#'):

Binary file not shown.

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import shutil, unittest, tempfile, datetime import shutil, unittest, tempfile, datetime
from cStringIO import StringIO from cStringIO import StringIO
from calibre.utils.date import local_tz from calibre.utils.date import utc_tz
from calibre.db.tests.base import BaseTest from calibre.db.tests.base import BaseTest
class ReadingTest(BaseTest): class ReadingTest(BaseTest):
@ -37,12 +37,12 @@ class ReadingTest(BaseTest):
'tags': (), 'tags': (),
'formats':(), 'formats':(),
'identifiers': {}, 'identifiers': {},
'timestamp': datetime.datetime(2011, 9, 7, 13, 54, 41, 'timestamp': datetime.datetime(2011, 9, 7, 19, 54, 41,
tzinfo=local_tz), tzinfo=utc_tz),
'pubdate': datetime.datetime(2011, 9, 7, 13, 54, 41, 'pubdate': datetime.datetime(2011, 9, 7, 19, 54, 41,
tzinfo=local_tz), tzinfo=utc_tz),
'last_modified': datetime.datetime(2011, 9, 7, 13, 54, 41, 'last_modified': datetime.datetime(2011, 9, 7, 19, 54, 41,
tzinfo=local_tz), tzinfo=utc_tz),
'publisher': None, 'publisher': None,
'languages': (), 'languages': (),
'comments': None, 'comments': None,
@ -63,23 +63,23 @@ class ReadingTest(BaseTest):
'sort': 'One', 'sort': 'One',
'authors': ('Author One',), 'authors': ('Author One',),
'author_sort': 'One, Author', 'author_sort': 'One, Author',
'series' : 'Series One', 'series' : 'A Series One',
'series_index': 1.0, 'series_index': 1.0,
'tags':('Tag Two', 'Tag One'), 'tags':('Tag One', 'Tag Two'),
'formats': (), 'formats': (),
'rating': 4.0, 'rating': 4.0,
'identifiers': {'test':'one'}, 'identifiers': {'test':'one'},
'timestamp': datetime.datetime(2011, 9, 5, 15, 6, 'timestamp': datetime.datetime(2011, 9, 5, 21, 6,
tzinfo=local_tz), tzinfo=utc_tz),
'pubdate': datetime.datetime(2011, 9, 5, 15, 6, 'pubdate': datetime.datetime(2011, 9, 5, 21, 6,
tzinfo=local_tz), tzinfo=utc_tz),
'publisher': 'Publisher One', 'publisher': 'Publisher One',
'languages': ('eng',), 'languages': ('eng',),
'comments': '<p>Comments One</p>', 'comments': '<p>Comments One</p>',
'#enum':'One', '#enum':'One',
'#authors':('Custom One', 'Custom Two'), '#authors':('Custom One', 'Custom Two'),
'#date':datetime.datetime(2011, 9, 5, 0, 0, '#date':datetime.datetime(2011, 9, 5, 6, 0,
tzinfo=local_tz), tzinfo=utc_tz),
'#rating':2.0, '#rating':2.0,
'#series':'My Series One', '#series':'My Series One',
'#series_index': 1.0, '#series_index': 1.0,
@ -92,23 +92,23 @@ class ReadingTest(BaseTest):
'sort': 'Title Two', 'sort': 'Title Two',
'authors': ('Author Two', 'Author One'), 'authors': ('Author Two', 'Author One'),
'author_sort': 'Two, Author & One, Author', 'author_sort': 'Two, Author & One, Author',
'series' : 'Series One', 'series' : 'A Series One',
'series_index': 2.0, 'series_index': 2.0,
'rating': 6.0, 'rating': 6.0,
'tags': ('Tag One',), 'tags': ('Tag One',),
'formats':(), 'formats':(),
'identifiers': {'test':'two'}, 'identifiers': {'test':'two'},
'timestamp': datetime.datetime(2011, 9, 6, 0, 0, 'timestamp': datetime.datetime(2011, 9, 6, 6, 0,
tzinfo=local_tz), tzinfo=utc_tz),
'pubdate': datetime.datetime(2011, 8, 5, 0, 0, 'pubdate': datetime.datetime(2011, 8, 5, 6, 0,
tzinfo=local_tz), tzinfo=utc_tz),
'publisher': 'Publisher Two', 'publisher': 'Publisher Two',
'languages': ('deu',), 'languages': ('deu',),
'comments': '<p>Comments Two</p>', 'comments': '<p>Comments Two</p>',
'#enum':'Two', '#enum':'Two',
'#authors':('My Author Two',), '#authors':('My Author Two',),
'#date':datetime.datetime(2011, 9, 1, 0, 0, '#date':datetime.datetime(2011, 9, 1, 6, 0,
tzinfo=local_tz), tzinfo=utc_tz),
'#rating':4.0, '#rating':4.0,
'#series':'My Series Two', '#series':'My Series Two',
'#series_index': 3.0, '#series_index': 3.0,
@ -130,30 +130,31 @@ class ReadingTest(BaseTest):
'Test sorting' 'Test sorting'
cache = self.init_cache(self.library_path) cache = self.init_cache(self.library_path)
for field, order in { for field, order in {
'title' : [2, 1, 3], 'title' : [2, 1, 3],
'authors': [2, 1, 3], 'authors': [2, 1, 3],
'series' : [3, 2, 1], 'series' : [3, 1, 2],
'tags' : [3, 1, 2], 'tags' : [3, 1, 2],
'rating' : [3, 2, 1], 'rating' : [3, 2, 1],
# 'identifiers': [3, 2, 1], There is no stable sort since 1 and # 'identifiers': [3, 2, 1], There is no stable sort since 1 and
# 2 have the same identifier keys # 2 have the same identifier keys
# TODO: Add an empty book to the db and ensure that empty # 'last_modified': [3, 2, 1], There is no stable sort as two
# fields sort the same as they do in db2 # records have the exact same value
'timestamp': [2, 1, 3], 'timestamp': [2, 1, 3],
'pubdate' : [1, 2, 3], 'pubdate' : [1, 2, 3],
'publisher': [3, 2, 1], 'publisher': [3, 2, 1],
'last_modified': [2, 1, 3], 'languages': [3, 2, 1],
'languages': [3, 2, 1], 'comments': [3, 2, 1],
'comments': [3, 2, 1], '#enum' : [3, 2, 1],
'#enum' : [3, 2, 1], '#authors' : [3, 2, 1],
'#authors' : [3, 2, 1], '#date': [3, 1, 2],
'#date': [3, 1, 2], '#rating':[3, 2, 1],
'#rating':[3, 2, 1], '#series':[3, 2, 1],
'#series':[3, 2, 1], '#tags':[3, 2, 1],
'#tags':[3, 2, 1], '#yesno':[3, 1, 2],
'#yesno':[3, 1, 2], '#comments':[3, 2, 1],
'#comments':[3, 2, 1], # TODO: Add an empty book to the db and ensure that empty
}.iteritems(): # fields sort the same as they do in db2
}.iteritems():
x = list(reversed(order)) x = list(reversed(order))
self.assertEqual(order, cache.multisort([(field, True)], self.assertEqual(order, cache.multisort([(field, True)],
ids_to_sort=x), ids_to_sort=x),
@ -190,6 +191,37 @@ class ReadingTest(BaseTest):
# }}} # }}}
def test_searching(self): # {{{
'Test searching returns the same data for both backends'
from calibre.library.database2 import LibraryDatabase2
old = LibraryDatabase2(self.library_path)
oldvals = {query:set(old.search_getting_ids(query, '')) for query in (
# Date tests
'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011',
'#date:true', 'date:<100daysago', 'date:>9/6/2011',
'#date:>9/1/2011', '#date:=2011',
# Number tests
'rating:3', 'rating:>2', 'rating:=2', 'rating:true',
'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7',
'cover:false', 'cover:true', '#float:>11', '#float:<1k',
'#float:10.01', 'series_index:1', 'series_index:<3', 'id:1',
'id:>2',
# TODO: Tests for searching the size column and
# cover:true|false
)}
old = None
cache = self.init_cache(self.library_path)
for query, ans in oldvals.iteritems():
nr = cache.search(query, '')
self.assertEqual(ans, nr,
'Old result: %r != New result: %r for search: %s'%(
ans, nr, query))
# }}}
def tests(): def tests():
return unittest.TestLoader().loadTestsFromTestCase(ReadingTest) return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)