mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
9e1627569f
@ -64,8 +64,8 @@ class Barrons(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.barrons.com/auth/login')
|
||||
br.select_form(name='login_form')
|
||||
br['user'] = self.username
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
0
recipes/conowego_pl.recipe
Executable file → Normal file
0
recipes/conowego_pl.recipe
Executable file → Normal file
Before Width: | Height: | Size: 605 B After Width: | Height: | Size: 605 B |
0
recipes/linux_journal.recipe
Executable file → Normal file
0
recipes/linux_journal.recipe
Executable file → Normal file
@ -18,6 +18,8 @@ class MichelleMalkin(BasicNewsRecipe):
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
conversion_options = {
|
||||
@ -29,16 +31,16 @@ class MichelleMalkin(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'})
|
||||
]
|
||||
#keep_only_tags = [
|
||||
#dict(name='div', attrs={'class':'article'})
|
||||
#]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe', 'embed', 'object']),
|
||||
dict(name='div', attrs={'id':['comments', 'commentForm']}),
|
||||
dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})
|
||||
#remove_tags = [
|
||||
#dict(name=['iframe', 'embed', 'object']),
|
||||
#dict(name='div', attrs={'id':['comments', 'commentForm']}),
|
||||
#dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})
|
||||
|
||||
]
|
||||
#]
|
||||
|
||||
|
||||
feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')]
|
||||
|
@ -13,6 +13,7 @@ from functools import wraps, partial
|
||||
|
||||
from calibre.db.locking import create_locks, RecordLock
|
||||
from calibre.db.fields import create_field
|
||||
from calibre.db.search import Search
|
||||
from calibre.db.tables import VirtualTable
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -50,6 +51,7 @@ class Cache(object):
|
||||
self.record_lock = RecordLock(self.read_lock)
|
||||
self.format_metadata_cache = defaultdict(dict)
|
||||
self.formatter_template_cache = {}
|
||||
self._search_api = Search(self.field_metadata.get_search_terms())
|
||||
|
||||
# Implement locking for all simple read/write API methods
|
||||
# An unlocked version of the method is stored with the name starting
|
||||
@ -269,11 +271,11 @@ class Cache(object):
|
||||
return ()
|
||||
|
||||
@read_api
|
||||
def all_book_ids(self):
|
||||
def all_book_ids(self, type=frozenset):
|
||||
'''
|
||||
Frozen set of all known book ids.
|
||||
'''
|
||||
return frozenset(self.fields['uuid'])
|
||||
return type(self.fields['uuid'])
|
||||
|
||||
@read_api
|
||||
def all_field_ids(self, name):
|
||||
@ -316,6 +318,10 @@ class Cache(object):
|
||||
self.format_metadata_cache[book_id][fmt] = ans
|
||||
return ans
|
||||
|
||||
@read_api
|
||||
def pref(self, name):
|
||||
return self.backend.prefs[name]
|
||||
|
||||
@api
|
||||
def get_metadata(self, book_id,
|
||||
get_cover=False, get_user_categories=True, cover_as_data=False):
|
||||
@ -378,17 +384,21 @@ class Cache(object):
|
||||
all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None
|
||||
else ids_to_sort)
|
||||
get_metadata = partial(self._get_metadata, get_user_categories=False)
|
||||
def get_lang(book_id):
|
||||
ans = self._field_for('languages', book_id)
|
||||
return ans[0] if ans else None
|
||||
|
||||
fm = {'title':'sort', 'authors':'author_sort'}
|
||||
|
||||
def sort_key(field):
|
||||
'Handle series type fields'
|
||||
ans = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata,
|
||||
all_book_ids)
|
||||
idx = field + '_index'
|
||||
if idx in self.fields:
|
||||
idx_ans = self.fields[idx].sort_keys_for_books(get_metadata,
|
||||
all_book_ids)
|
||||
is_series = idx in self.fields
|
||||
ans = self.fields[fm.get(field, field)].sort_keys_for_books(
|
||||
get_metadata, get_lang, all_book_ids,)
|
||||
if is_series:
|
||||
idx_ans = self.fields[idx].sort_keys_for_books(
|
||||
get_metadata, get_lang, all_book_ids)
|
||||
ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()}
|
||||
return ans
|
||||
|
||||
@ -401,6 +411,10 @@ class Cache(object):
|
||||
else:
|
||||
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
|
||||
|
||||
@read_api
|
||||
def search(self, query, restriction):
|
||||
return self._search_api(self, query, restriction)
|
||||
|
||||
# }}}
|
||||
|
||||
class SortKey(object):
|
||||
|
@ -9,14 +9,19 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from threading import Lock
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
|
||||
from calibre.ebooks.metadata import title_sort
|
||||
from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.date import UNDEFINED_DATE
|
||||
from calibre.utils.localization import calibre_langcode_to_name
|
||||
|
||||
class Field(object):
|
||||
|
||||
is_many = False
|
||||
|
||||
def __init__(self, name, table):
|
||||
self.name, self.table = name, table
|
||||
self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
|
||||
@ -72,7 +77,7 @@ class Field(object):
|
||||
'''
|
||||
return iter(())
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
'''
|
||||
Return a mapping of book_id -> sort_key. The sort key is suitable for
|
||||
use in sorting the list of all books by this field, via the python cmp
|
||||
@ -81,6 +86,13 @@ class Field(object):
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
'''
|
||||
Return a generator that yields items of the form (value, set of books
|
||||
ids that have this value). Here, value is a searchable value. Returned
|
||||
books_ids are restricted to the set of ids in candidates.
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
class OneToOneField(Field):
|
||||
|
||||
@ -96,10 +108,24 @@ class OneToOneField(Field):
|
||||
def __iter__(self):
|
||||
return self.table.book_col_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
return {id_ : self._sort_key(self.table.book_col_map.get(id_,
|
||||
self._default_sort_key)) for id_ in all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
cbm = self.table.book_col_map
|
||||
if (self.name in {'id', 'uuid', 'title'} or
|
||||
self.metadata['datatype'] == 'datetime'):
|
||||
# Values are likely to be unique
|
||||
for book_id in candidates:
|
||||
yield cbm.get(book_id, default_value), {book_id}
|
||||
else:
|
||||
val_map = defaultdict(set)
|
||||
for book_id in candidates:
|
||||
val_map[cbm.get(book_id, default_value)].add(book_id)
|
||||
for val, book_ids in val_map.iteritems():
|
||||
yield val, book_ids
|
||||
|
||||
class CompositeField(OneToOneField):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -133,10 +159,16 @@ class CompositeField(OneToOneField):
|
||||
ans = mi.get('#'+self.metadata['label'])
|
||||
return ans
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
val_map = defaultdict(set)
|
||||
for book_id in candidates:
|
||||
val_map[self.get_value_with_cache(book_id, get_metadata)].add(book_id)
|
||||
for val, book_ids in val_map.iteritems():
|
||||
yield val, book_ids
|
||||
|
||||
class OnDeviceField(OneToOneField):
|
||||
|
||||
@ -170,12 +202,21 @@ class OnDeviceField(OneToOneField):
|
||||
def __iter__(self):
|
||||
return iter(())
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
return {id_ : self.for_book(id_) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
val_map = defaultdict(set)
|
||||
for book_id in candidates:
|
||||
val_map[self.for_book(book_id, default_value=default_value)].add(book_id)
|
||||
for val, book_ids in val_map.iteritems():
|
||||
yield val, book_ids
|
||||
|
||||
class ManyToOneField(Field):
|
||||
|
||||
is_many = True
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
ids = self.table.book_col_map.get(book_id, None)
|
||||
if ids is not None:
|
||||
@ -196,7 +237,7 @@ class ManyToOneField(Field):
|
||||
def __iter__(self):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
ans = {id_ : self.table.book_col_map.get(id_, None)
|
||||
for id_ in all_book_ids}
|
||||
sk_map = {cid : (self._default_sort_key if cid is None else
|
||||
@ -204,8 +245,17 @@ class ManyToOneField(Field):
|
||||
for cid in ans.itervalues()}
|
||||
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
cbm = self.table.col_book_map
|
||||
for item_id, val in self.table.id_map.iteritems():
|
||||
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
|
||||
if book_ids:
|
||||
yield val, book_ids
|
||||
|
||||
class ManyToManyField(Field):
|
||||
|
||||
is_many = True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
Field.__init__(self, *args, **kwargs)
|
||||
self.alphabetical_sort = self.name != 'authors'
|
||||
@ -227,7 +277,7 @@ class ManyToManyField(Field):
|
||||
def __iter__(self):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
ans = {id_ : self.table.book_col_map.get(id_, ())
|
||||
for id_ in all_book_ids}
|
||||
all_cids = set()
|
||||
@ -239,6 +289,20 @@ class ManyToManyField(Field):
|
||||
(self._default_sort_key,))
|
||||
for id_, cids in ans.iteritems()}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
cbm = self.table.col_book_map
|
||||
for item_id, val in self.table.id_map.iteritems():
|
||||
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
|
||||
if book_ids:
|
||||
yield val, book_ids
|
||||
|
||||
def iter_counts(self, candidates):
|
||||
val_map = defaultdict(set)
|
||||
cbm = self.table.book_col_map
|
||||
for book_id in candidates:
|
||||
val_map[len(cbm.get(book_id, ()))].add(book_id)
|
||||
for count, book_ids in val_map.iteritems():
|
||||
yield count, book_ids
|
||||
|
||||
class IdentifiersField(ManyToManyField):
|
||||
|
||||
@ -248,7 +312,7 @@ class IdentifiersField(ManyToManyField):
|
||||
ids = default_value
|
||||
return ids
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
'Sort by identifier keys'
|
||||
ans = {id_ : self.table.book_col_map.get(id_, ())
|
||||
for id_ in all_book_ids}
|
||||
@ -274,6 +338,32 @@ class FormatsField(ManyToManyField):
|
||||
def format_fname(self, book_id, fmt):
|
||||
return self.table.fname_map[book_id][fmt.upper()]
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
val_map = defaultdict(set)
|
||||
cbm = self.table.book_col_map
|
||||
for book_id in candidates:
|
||||
vals = cbm.get(book_id, ())
|
||||
for val in vals:
|
||||
val_map[val].add(book_id)
|
||||
|
||||
for val, book_ids in val_map.iteritems():
|
||||
yield val, book_ids
|
||||
|
||||
class SeriesField(ManyToOneField):
|
||||
|
||||
def sort_key_for_series(self, book_id, get_lang, series_sort_order):
|
||||
sid = self.table.book_col_map.get(book_id, None)
|
||||
if sid is None:
|
||||
return self._default_sort_key
|
||||
return self._sort_key(title_sort(self.table.id_map[sid],
|
||||
order=series_sort_order,
|
||||
lang=get_lang(book_id)))
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
|
||||
sso = tweaks['title_series_sorting']
|
||||
return {book_id:self.sort_key_for_series(book_id, get_lang, sso) for book_id
|
||||
in all_book_ids}
|
||||
|
||||
def create_field(name, table):
|
||||
cls = {
|
||||
ONE_ONE : OneToOneField,
|
||||
@ -290,5 +380,7 @@ def create_field(name, table):
|
||||
cls = IdentifiersField
|
||||
elif table.metadata['datatype'] == 'composite':
|
||||
cls = CompositeField
|
||||
elif table.metadata['datatype'] == 'series':
|
||||
cls = SeriesField
|
||||
return cls(name, table)
|
||||
|
||||
|
390
src/calibre/db/search.py
Normal file
390
src/calibre/db/search.py
Normal file
@ -0,0 +1,390 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from functools import partial
|
||||
from datetime import timedelta
|
||||
|
||||
from calibre.utils.config_base import prefs
|
||||
from calibre.utils.date import parse_date, UNDEFINED_DATE, now
|
||||
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
||||
|
||||
# TODO: Thread safety of saved searches
|
||||
|
||||
class DateSearch(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self.operators = {
|
||||
'=' : (1, self.eq),
|
||||
'!=' : (2, self.ne),
|
||||
'>' : (1, self.gt),
|
||||
'>=' : (2, self.ge),
|
||||
'<' : (1, self.lt),
|
||||
'<=' : (2, self.le),
|
||||
}
|
||||
self.local_today = { '_today', 'today', icu_lower(_('today')) }
|
||||
self.local_yesterday = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
|
||||
self.local_thismonth = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
|
||||
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
|
||||
|
||||
def eq(self, dbdate, query, field_count):
|
||||
if dbdate.year == query.year:
|
||||
if field_count == 1:
|
||||
return True
|
||||
if dbdate.month == query.month:
|
||||
if field_count == 2:
|
||||
return True
|
||||
return dbdate.day == query.day
|
||||
return False
|
||||
|
||||
def ne(self, *args):
|
||||
return not self.eq(*args)
|
||||
|
||||
def gt(self, dbdate, query, field_count):
|
||||
if dbdate.year > query.year:
|
||||
return True
|
||||
if field_count > 1 and dbdate.year == query.year:
|
||||
if dbdate.month > query.month:
|
||||
return True
|
||||
return (field_count == 3 and dbdate.month == query.month and
|
||||
dbdate.day > query.day)
|
||||
return False
|
||||
|
||||
def le(self, *args):
|
||||
return not self.gt(*args)
|
||||
|
||||
def lt(self, dbdate, query, field_count):
|
||||
if dbdate.year < query.year:
|
||||
return True
|
||||
if field_count > 1 and dbdate.year == query.year:
|
||||
if dbdate.month < query.month:
|
||||
return True
|
||||
return (field_count == 3 and dbdate.month == query.month and
|
||||
dbdate.day < query.day)
|
||||
return False
|
||||
|
||||
def ge(self, *args):
|
||||
return not self.lt(*args)
|
||||
|
||||
def __call__(self, query, field_iter):
|
||||
matches = set()
|
||||
if len(query) < 2:
|
||||
return matches
|
||||
|
||||
if query == 'false':
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
v = parse_date(v)
|
||||
if v is None or v <= UNDEFINED_DATE:
|
||||
matches |= book_ids
|
||||
return matches
|
||||
|
||||
if query == 'true':
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
v = parse_date(v)
|
||||
if v is not None and v > UNDEFINED_DATE:
|
||||
matches |= book_ids
|
||||
return matches
|
||||
|
||||
relop = None
|
||||
for k, op in self.operators.iteritems():
|
||||
if query.startswith(k):
|
||||
p, relop = op
|
||||
query = query[p:]
|
||||
if relop is None:
|
||||
relop = self.operators['='][-1]
|
||||
|
||||
if query in self.local_today:
|
||||
qd = now()
|
||||
field_count = 3
|
||||
elif query in self.local_yesterday:
|
||||
qd = now() - timedelta(1)
|
||||
field_count = 3
|
||||
elif query in self.local_thismonth:
|
||||
qd = now()
|
||||
field_count = 2
|
||||
else:
|
||||
m = self.daysago_pat.search(query)
|
||||
if m is not None:
|
||||
num = query[:-len(m.group(1))]
|
||||
try:
|
||||
qd = now() - timedelta(int(num))
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Number conversion error')
|
||||
field_count = 3
|
||||
else:
|
||||
try:
|
||||
qd = parse_date(query, as_utc=False)
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Date conversion error')
|
||||
if '-' in query:
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
field_count = query.count('/') + 1
|
||||
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
v = parse_date(v)
|
||||
if v is not None and relop(v, qd, field_count):
|
||||
matches |= book_ids
|
||||
|
||||
return matches
|
||||
# }}}
|
||||
|
||||
class NumericSearch(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self.operators = {
|
||||
'=':( 1, lambda r, q: r == q ),
|
||||
'>':( 1, lambda r, q: r is not None and r > q ),
|
||||
'<':( 1, lambda r, q: r is not None and r < q ),
|
||||
'!=':( 2, lambda r, q: r != q ),
|
||||
'>=':( 2, lambda r, q: r is not None and r >= q ),
|
||||
'<=':( 2, lambda r, q: r is not None and r <= q )
|
||||
}
|
||||
|
||||
def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
|
||||
matches = set()
|
||||
if not query:
|
||||
return matches
|
||||
|
||||
q = ''
|
||||
cast = adjust = lambda x: x
|
||||
dt = datatype
|
||||
|
||||
if is_many and query in {'true', 'false'}:
|
||||
valcheck = lambda x: True
|
||||
if datatype == 'rating':
|
||||
valcheck = lambda x: x is not None and x > 0
|
||||
found = set()
|
||||
for val, book_ids in field_iter():
|
||||
if valcheck(val):
|
||||
found |= book_ids
|
||||
return found if query == 'true' else candidates - found
|
||||
|
||||
if query == 'false':
|
||||
if location == 'cover':
|
||||
relop = lambda x,y: not bool(x)
|
||||
else:
|
||||
relop = lambda x,y: x is None
|
||||
elif query == 'true':
|
||||
if location == 'cover':
|
||||
relop = lambda x,y: bool(x)
|
||||
else:
|
||||
relop = lambda x,y: x is not None
|
||||
else:
|
||||
relop = None
|
||||
for k, op in self.operators.iteritems():
|
||||
if query.startswith(k):
|
||||
p, relop = op
|
||||
query = query[p:]
|
||||
if relop is None:
|
||||
p, relop = self.operators['=']
|
||||
|
||||
cast = int
|
||||
if dt == 'rating':
|
||||
cast = lambda x: 0 if x is None else int(x)
|
||||
adjust = lambda x: x/2
|
||||
elif dt in ('float', 'composite'):
|
||||
cast = float
|
||||
|
||||
mult = 1.0
|
||||
if len(query) > 1:
|
||||
mult = query[-1].lower()
|
||||
mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0)
|
||||
if mult != 1.0:
|
||||
query = query[:-1]
|
||||
else:
|
||||
mult = 1.0
|
||||
|
||||
try:
|
||||
q = cast(query) * mult
|
||||
except:
|
||||
raise ParseException(query, len(query),
|
||||
'Non-numeric value in query: %r'%query)
|
||||
|
||||
for val, book_ids in field_iter():
|
||||
if val is None:
|
||||
continue
|
||||
try:
|
||||
v = cast(val)
|
||||
except:
|
||||
v = None
|
||||
if v:
|
||||
v = adjust(v)
|
||||
if relop(v, q):
|
||||
matches |= book_ids
|
||||
return matches
|
||||
|
||||
# }}}
|
||||
|
||||
class Parser(SearchQueryParser):
|
||||
|
||||
def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
|
||||
limit_search_columns, limit_search_columns_to, locations):
|
||||
self.dbcache, self.all_book_ids = dbcache, all_book_ids
|
||||
self.all_search_locations = frozenset(locations)
|
||||
self.grouped_search_terms = gst
|
||||
self.date_search, self.num_search = date_search, num_search
|
||||
self.limit_search_columns, self.limit_search_columns_to = (
|
||||
limit_search_columns, limit_search_columns_to)
|
||||
super(Parser, self).__init__(locations, optimize=True)
|
||||
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.dbcache.field_metadata
|
||||
|
||||
def universal_set(self):
|
||||
return self.all_book_ids
|
||||
|
||||
def field_iter(self, name, candidates):
|
||||
get_metadata = partial(self.dbcache._get_metadata, get_user_categories=False)
|
||||
return self.dbcache.fields[name].iter_searchable_values(get_metadata,
|
||||
candidates)
|
||||
|
||||
def get_matches(self, location, query, candidates=None,
|
||||
allow_recursion=True):
|
||||
# If candidates is not None, it must not be modified. Changing its
|
||||
# value will break query optimization in the search parser
|
||||
matches = set()
|
||||
|
||||
if candidates is None:
|
||||
candidates = self.all_book_ids
|
||||
if not candidates or not query or not query.strip():
|
||||
return matches
|
||||
if location not in self.all_search_locations:
|
||||
return matches
|
||||
|
||||
if (len(location) > 2 and location.startswith('@') and
|
||||
location[1:] in self.grouped_search_terms):
|
||||
location = location[1:]
|
||||
|
||||
# get metadata key associated with the search term. Eliminates
|
||||
# dealing with plurals and other aliases
|
||||
# original_location = location
|
||||
location = self.field_metadata.search_term_to_field_key(
|
||||
icu_lower(location.strip()))
|
||||
# grouped search terms
|
||||
if isinstance(location, list):
|
||||
if allow_recursion:
|
||||
if query.lower() == 'false':
|
||||
invert = True
|
||||
query = 'true'
|
||||
else:
|
||||
invert = False
|
||||
for loc in location:
|
||||
c = candidates.copy()
|
||||
m = self.get_matches(loc, query,
|
||||
candidates=c, allow_recursion=False)
|
||||
matches |= m
|
||||
c -= m
|
||||
if len(c) == 0:
|
||||
break
|
||||
if invert:
|
||||
matches = self.all_book_ids - matches
|
||||
return matches
|
||||
raise ParseException(query, len(query), 'Recursive query group detected')
|
||||
|
||||
# If the user has asked to restrict searching over all field, apply
|
||||
# that restriction
|
||||
if (location == 'all' and self.limit_search_columns and
|
||||
self.limit_search_columns_to):
|
||||
terms = set()
|
||||
for l in self.limit_search_columns_to:
|
||||
l = icu_lower(l.strip())
|
||||
if l and l != 'all' and l in self.all_search_locations:
|
||||
terms.add(l)
|
||||
if terms:
|
||||
c = candidates.copy()
|
||||
for l in terms:
|
||||
try:
|
||||
m = self.get_matches(l, query,
|
||||
candidates=c, allow_recursion=allow_recursion)
|
||||
matches |= m
|
||||
c -= m
|
||||
if len(c) == 0:
|
||||
break
|
||||
except:
|
||||
pass
|
||||
return matches
|
||||
|
||||
if location in self.field_metadata:
|
||||
fm = self.field_metadata[location]
|
||||
dt = fm['datatype']
|
||||
|
||||
# take care of dates special case
|
||||
if (dt == 'datetime' or (
|
||||
dt == 'composite' and
|
||||
fm['display'].get('composite_sort', '') == 'date')):
|
||||
if location == 'date':
|
||||
location = 'timestamp'
|
||||
return self.date_search(
|
||||
icu_lower(query), partial(self.field_iter, location, candidates))
|
||||
|
||||
# take care of numbers special case
|
||||
if (dt in ('rating', 'int', 'float') or
|
||||
(dt == 'composite' and
|
||||
fm['display'].get('composite_sort', '') == 'number')):
|
||||
field = self.dbcache.fields[location]
|
||||
return self.num_search(
|
||||
icu_lower(query), partial(self.field_iter, location, candidates),
|
||||
location, dt, candidates, is_many=field.is_many)
|
||||
|
||||
# take care of the 'count' operator for is_multiples
|
||||
if (fm['is_multiple'] and
|
||||
len(query) > 1 and query[0] == '#' and query[1] in '=<>!'):
|
||||
return self.num_search(icu_lower(query[1:]), partial(
|
||||
self.dbcache.fields[location].iter_counts, candidates),
|
||||
location, dt, candidates)
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
class Search(object):
|
||||
|
||||
def __init__(self, all_search_locations):
|
||||
self.all_search_locations = all_search_locations
|
||||
self.date_search = DateSearch()
|
||||
self.num_search = NumericSearch()
|
||||
|
||||
def change_locations(self, newlocs):
|
||||
self.all_search_locations = newlocs
|
||||
|
||||
def __call__(self, dbcache, query, search_restriction):
|
||||
'''
|
||||
Return the set of ids of all records that match the specified
|
||||
query and restriction
|
||||
'''
|
||||
q = ''
|
||||
if not query or not query.strip():
|
||||
q = search_restriction
|
||||
else:
|
||||
q = query
|
||||
if search_restriction:
|
||||
q = u'(%s) and (%s)' % (search_restriction, query)
|
||||
|
||||
all_book_ids = dbcache.all_book_ids(type=set)
|
||||
if not q:
|
||||
return all_book_ids
|
||||
|
||||
# We construct a new parser instance per search as pyparsing is not
|
||||
# thread safe. On my desktop, constructing a SearchQueryParser instance
|
||||
# takes 0.000975 seconds and restoring it from a pickle takes
|
||||
# 0.000974 seconds.
|
||||
sqp = Parser(
|
||||
dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
|
||||
self.date_search, self.num_search, prefs[ 'limit_search_columns' ],
|
||||
prefs[ 'limit_search_columns_to' ], self.all_search_locations)
|
||||
try:
|
||||
ret = sqp.parse(query)
|
||||
finally:
|
||||
sqp.dbcache = None
|
||||
return ret
|
||||
|
@ -148,11 +148,11 @@ class ManyToManyTable(ManyToOneTable):
|
||||
'''
|
||||
|
||||
table_type = MANY_MANY
|
||||
selectq = 'SELECT book, {0} FROM {1}'
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
self.metadata['link_column'], self.link_table)):
|
||||
self.selectq.format(self.metadata['link_column'], self.link_table)):
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
@ -168,6 +168,8 @@ class ManyToManyTable(ManyToOneTable):
|
||||
|
||||
class AuthorsTable(ManyToManyTable):
|
||||
|
||||
selectq = 'SELECT book, {0} FROM {1} ORDER BY id'
|
||||
|
||||
def read_id_maps(self, db):
|
||||
self.alink_map = {}
|
||||
self.asort_map = {}
|
||||
|
@ -7,8 +7,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
import unittest, os, shutil
|
||||
from future_builtins import map
|
||||
|
||||
class BaseTest(unittest.TestCase):
|
||||
|
||||
@ -39,7 +39,10 @@ class BaseTest(unittest.TestCase):
|
||||
'ondevice_col', 'last_modified'}.union(allfk1)
|
||||
for attr in all_keys:
|
||||
if attr == 'user_metadata': continue
|
||||
if attr == 'format_metadata': continue # TODO: Not implemented yet
|
||||
attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr)
|
||||
if attr == 'formats':
|
||||
attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2))
|
||||
self.assertEqual(attr1, attr2,
|
||||
'%s not the same: %r != %r'%(attr, attr1, attr2))
|
||||
if attr.startswith('#'):
|
||||
|
Binary file not shown.
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import shutil, unittest, tempfile, datetime
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.utils.date import local_tz
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.db.tests.base import BaseTest
|
||||
|
||||
class ReadingTest(BaseTest):
|
||||
@ -37,12 +37,12 @@ class ReadingTest(BaseTest):
|
||||
'tags': (),
|
||||
'formats':(),
|
||||
'identifiers': {},
|
||||
'timestamp': datetime.datetime(2011, 9, 7, 13, 54, 41,
|
||||
tzinfo=local_tz),
|
||||
'pubdate': datetime.datetime(2011, 9, 7, 13, 54, 41,
|
||||
tzinfo=local_tz),
|
||||
'last_modified': datetime.datetime(2011, 9, 7, 13, 54, 41,
|
||||
tzinfo=local_tz),
|
||||
'timestamp': datetime.datetime(2011, 9, 7, 19, 54, 41,
|
||||
tzinfo=utc_tz),
|
||||
'pubdate': datetime.datetime(2011, 9, 7, 19, 54, 41,
|
||||
tzinfo=utc_tz),
|
||||
'last_modified': datetime.datetime(2011, 9, 7, 19, 54, 41,
|
||||
tzinfo=utc_tz),
|
||||
'publisher': None,
|
||||
'languages': (),
|
||||
'comments': None,
|
||||
@ -63,23 +63,23 @@ class ReadingTest(BaseTest):
|
||||
'sort': 'One',
|
||||
'authors': ('Author One',),
|
||||
'author_sort': 'One, Author',
|
||||
'series' : 'Series One',
|
||||
'series' : 'A Series One',
|
||||
'series_index': 1.0,
|
||||
'tags':('Tag Two', 'Tag One'),
|
||||
'tags':('Tag One', 'Tag Two'),
|
||||
'formats': (),
|
||||
'rating': 4.0,
|
||||
'identifiers': {'test':'one'},
|
||||
'timestamp': datetime.datetime(2011, 9, 5, 15, 6,
|
||||
tzinfo=local_tz),
|
||||
'pubdate': datetime.datetime(2011, 9, 5, 15, 6,
|
||||
tzinfo=local_tz),
|
||||
'timestamp': datetime.datetime(2011, 9, 5, 21, 6,
|
||||
tzinfo=utc_tz),
|
||||
'pubdate': datetime.datetime(2011, 9, 5, 21, 6,
|
||||
tzinfo=utc_tz),
|
||||
'publisher': 'Publisher One',
|
||||
'languages': ('eng',),
|
||||
'comments': '<p>Comments One</p>',
|
||||
'#enum':'One',
|
||||
'#authors':('Custom One', 'Custom Two'),
|
||||
'#date':datetime.datetime(2011, 9, 5, 0, 0,
|
||||
tzinfo=local_tz),
|
||||
'#date':datetime.datetime(2011, 9, 5, 6, 0,
|
||||
tzinfo=utc_tz),
|
||||
'#rating':2.0,
|
||||
'#series':'My Series One',
|
||||
'#series_index': 1.0,
|
||||
@ -92,23 +92,23 @@ class ReadingTest(BaseTest):
|
||||
'sort': 'Title Two',
|
||||
'authors': ('Author Two', 'Author One'),
|
||||
'author_sort': 'Two, Author & One, Author',
|
||||
'series' : 'Series One',
|
||||
'series' : 'A Series One',
|
||||
'series_index': 2.0,
|
||||
'rating': 6.0,
|
||||
'tags': ('Tag One',),
|
||||
'formats':(),
|
||||
'identifiers': {'test':'two'},
|
||||
'timestamp': datetime.datetime(2011, 9, 6, 0, 0,
|
||||
tzinfo=local_tz),
|
||||
'pubdate': datetime.datetime(2011, 8, 5, 0, 0,
|
||||
tzinfo=local_tz),
|
||||
'timestamp': datetime.datetime(2011, 9, 6, 6, 0,
|
||||
tzinfo=utc_tz),
|
||||
'pubdate': datetime.datetime(2011, 8, 5, 6, 0,
|
||||
tzinfo=utc_tz),
|
||||
'publisher': 'Publisher Two',
|
||||
'languages': ('deu',),
|
||||
'comments': '<p>Comments Two</p>',
|
||||
'#enum':'Two',
|
||||
'#authors':('My Author Two',),
|
||||
'#date':datetime.datetime(2011, 9, 1, 0, 0,
|
||||
tzinfo=local_tz),
|
||||
'#date':datetime.datetime(2011, 9, 1, 6, 0,
|
||||
tzinfo=utc_tz),
|
||||
'#rating':4.0,
|
||||
'#series':'My Series Two',
|
||||
'#series_index': 3.0,
|
||||
@ -130,30 +130,31 @@ class ReadingTest(BaseTest):
|
||||
'Test sorting'
|
||||
cache = self.init_cache(self.library_path)
|
||||
for field, order in {
|
||||
'title' : [2, 1, 3],
|
||||
'authors': [2, 1, 3],
|
||||
'series' : [3, 2, 1],
|
||||
'tags' : [3, 1, 2],
|
||||
'rating' : [3, 2, 1],
|
||||
# 'identifiers': [3, 2, 1], There is no stable sort since 1 and
|
||||
# 2 have the same identifier keys
|
||||
# TODO: Add an empty book to the db and ensure that empty
|
||||
# fields sort the same as they do in db2
|
||||
'timestamp': [2, 1, 3],
|
||||
'pubdate' : [1, 2, 3],
|
||||
'publisher': [3, 2, 1],
|
||||
'last_modified': [2, 1, 3],
|
||||
'languages': [3, 2, 1],
|
||||
'comments': [3, 2, 1],
|
||||
'#enum' : [3, 2, 1],
|
||||
'#authors' : [3, 2, 1],
|
||||
'#date': [3, 1, 2],
|
||||
'#rating':[3, 2, 1],
|
||||
'#series':[3, 2, 1],
|
||||
'#tags':[3, 2, 1],
|
||||
'#yesno':[3, 1, 2],
|
||||
'#comments':[3, 2, 1],
|
||||
}.iteritems():
|
||||
'title' : [2, 1, 3],
|
||||
'authors': [2, 1, 3],
|
||||
'series' : [3, 1, 2],
|
||||
'tags' : [3, 1, 2],
|
||||
'rating' : [3, 2, 1],
|
||||
# 'identifiers': [3, 2, 1], There is no stable sort since 1 and
|
||||
# 2 have the same identifier keys
|
||||
# 'last_modified': [3, 2, 1], There is no stable sort as two
|
||||
# records have the exact same value
|
||||
'timestamp': [2, 1, 3],
|
||||
'pubdate' : [1, 2, 3],
|
||||
'publisher': [3, 2, 1],
|
||||
'languages': [3, 2, 1],
|
||||
'comments': [3, 2, 1],
|
||||
'#enum' : [3, 2, 1],
|
||||
'#authors' : [3, 2, 1],
|
||||
'#date': [3, 1, 2],
|
||||
'#rating':[3, 2, 1],
|
||||
'#series':[3, 2, 1],
|
||||
'#tags':[3, 2, 1],
|
||||
'#yesno':[3, 1, 2],
|
||||
'#comments':[3, 2, 1],
|
||||
# TODO: Add an empty book to the db and ensure that empty
|
||||
# fields sort the same as they do in db2
|
||||
}.iteritems():
|
||||
x = list(reversed(order))
|
||||
self.assertEqual(order, cache.multisort([(field, True)],
|
||||
ids_to_sort=x),
|
||||
@ -190,6 +191,37 @@ class ReadingTest(BaseTest):
|
||||
|
||||
# }}}
|
||||
|
||||
def test_searching(self): # {{{
|
||||
'Test searching returns the same data for both backends'
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
old = LibraryDatabase2(self.library_path)
|
||||
oldvals = {query:set(old.search_getting_ids(query, '')) for query in (
|
||||
# Date tests
|
||||
'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011',
|
||||
'#date:true', 'date:<100daysago', 'date:>9/6/2011',
|
||||
'#date:>9/1/2011', '#date:=2011',
|
||||
|
||||
# Number tests
|
||||
'rating:3', 'rating:>2', 'rating:=2', 'rating:true',
|
||||
'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7',
|
||||
'cover:false', 'cover:true', '#float:>11', '#float:<1k',
|
||||
'#float:10.01', 'series_index:1', 'series_index:<3', 'id:1',
|
||||
'id:>2',
|
||||
|
||||
# TODO: Tests for searching the size column and
|
||||
# cover:true|false
|
||||
)}
|
||||
old = None
|
||||
|
||||
cache = self.init_cache(self.library_path)
|
||||
for query, ans in oldvals.iteritems():
|
||||
nr = cache.search(query, '')
|
||||
self.assertEqual(ans, nr,
|
||||
'Old result: %r != New result: %r for search: %s'%(
|
||||
ans, nr, query))
|
||||
|
||||
# }}}
|
||||
|
||||
def tests():
|
||||
return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user