Merge from trunk

This commit is contained in:
Charles Haley 2013-01-21 09:36:42 +01:00
commit 9f220fb79b
9 changed files with 275 additions and 84 deletions

View File

@ -8,13 +8,16 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
description = 'News as provided by The Metro -UK'
#timefmt = ''
__author__ = 'Dave Asbury'
#last update 9/6/12
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
oldest_article = 1
__author__ = 'fleclerc & Dave Asbury'
#last update 20/1/13
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
remove_empty_feeds = True
remove_javascript = True
auto_cleanup = True
max_articles_per_feed = 12
ignore_duplicate_articles = {'title', 'url'}
encoding = 'UTF-8'
language = 'en_GB'

View File

@ -114,17 +114,8 @@ class OneToOneField(Field):
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
cbm = self.table.book_col_map
if (self.name in {'id', 'uuid', 'title'} or
self.metadata['datatype'] == 'datetime'):
# Values are likely to be unique
for book_id in candidates:
yield cbm.get(book_id, default_value), {book_id}
else:
val_map = defaultdict(set)
for book_id in candidates:
val_map[cbm.get(book_id, default_value)].add(book_id)
for val, book_ids in val_map.iteritems():
yield val, book_ids
for book_id in candidates:
yield cbm.get(book_id, default_value), {book_id}
class CompositeField(OneToOneField):
@ -320,6 +311,12 @@ class IdentifiersField(ManyToManyField):
(self._default_sort_key,))
for id_, cids in ans.iteritems()}
def iter_searchable_values(self, get_metadata, candidates, default_value=()):
bcm = self.table.book_col_map
for book_id in candidates:
val = bcm.get(book_id, default_value)
if val:
yield val, {book_id}
class AuthorsField(ManyToManyField):

View File

@ -13,9 +13,87 @@ from datetime import timedelta
from calibre.utils.config_base import prefs
from calibre.utils.date import parse_date, UNDEFINED_DATE, now
from calibre.utils.icu import primary_find
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
# TODO: Thread safety of saved searches
CONTAINS_MATCH = 0
EQUALS_MATCH = 1
REGEXP_MATCH = 2
# Utils {{{
def force_to_bool(val):
if isinstance(val, (str, unicode)):
try:
val = icu_lower(val)
if not val:
val = None
elif val in [_('yes'), _('checked'), 'true', 'yes']:
val = True
elif val in [_('no'), _('unchecked'), 'false', 'no']:
val = False
else:
val = bool(int(val))
except:
val = None
return val
def _matchkind(query):
matchkind = CONTAINS_MATCH
if (len(query) > 1):
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH:
# leave case in regexps because it can be significant e.g. \S \W \D
query = icu_lower(query)
return matchkind, query
def _match(query, value, matchkind, use_primary_find_in_search=True):
if query.startswith('..'):
query = query[1:]
sq = query[1:]
internal_match_ok = True
else:
internal_match_ok = False
for t in value:
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
t = icu_lower(t)
if (matchkind == EQUALS_MATCH):
if internal_match_ok:
if query == t:
return True
comps = [c.strip() for c in t.split('.') if c.strip()]
for comp in comps:
if sq == comp:
return True
elif query[0] == '.':
if t.startswith(query[1:]):
ql = len(query) - 1
if (len(t) == ql) or (t[ql:ql+1] == '.'):
return True
elif query == t:
return True
elif matchkind == REGEXP_MATCH:
if re.search(query, t, re.I|re.UNICODE):
return True
elif matchkind == CONTAINS_MATCH:
if use_primary_find_in_search:
if primary_find(query, t)[0] != -1:
return True
elif query in t:
return True
except re.error:
pass
return False
# }}}
class DateSearch(object): # {{{
@ -225,14 +303,101 @@ class NumericSearch(object): # {{{
# }}}
class BooleanSearch(object): # {{{
def __init__(self):
self.local_no = icu_lower(_('no'))
self.local_yes = icu_lower(_('yes'))
self.local_unchecked = icu_lower(_('unchecked'))
self.local_checked = icu_lower(_('checked'))
self.local_empty = icu_lower(_('empty'))
self.local_blank = icu_lower(_('blank'))
self.local_bool_values = {
self.local_no, self.local_unchecked, '_no', 'false', 'no',
self.local_yes, self.local_checked, '_yes', 'true', 'yes',
self.local_empty, self.local_blank, '_empty', 'empty'}
def __call__(self, query, field_iter, bools_are_tristate):
matches = set()
if query not in self.local_bool_values:
raise ParseException(_('Invalid boolean query "{0}"').format(query))
for val, book_ids in field_iter():
val = force_to_bool(val)
if not bools_are_tristate:
if val is None or not val: # item is None or set to false
if query in { self.local_no, self.local_unchecked, 'no', '_no', 'false' }:
matches |= book_ids
else: # item is explicitly set to true
if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
matches |= book_ids
else:
if val is None:
if query in { self.local_empty, self.local_blank, 'empty', '_empty', 'false' }:
matches |= book_ids
elif not val: # is not None and false
if query in { self.local_no, self.local_unchecked, 'no', '_no', 'true' }:
matches |= book_ids
else: # item is not None and true
if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
matches |= book_ids
return matches
# }}}
class KeyPairSearch(object): # {{{
def __call__(self, query, field_iter, candidates, use_primary_find):
matches = set()
if ':' in query:
q = [q.strip() for q in query.split(':')]
if len(q) != 2:
raise ParseException(query, len(query),
'Invalid query format for colon-separated search')
keyq, valq = q
keyq_mkind, keyq = _matchkind(keyq)
valq_mkind, valq = _matchkind(valq)
else:
keyq = keyq_mkind = ''
valq_mkind, valq = _matchkind(query)
keyq_mkind
if valq in {'true', 'false'}:
found = set()
if keyq:
for val, book_ids in field_iter():
if val and val.get(keyq, False):
found |= book_ids
else:
for val, book_ids in field_iter():
if val:
found |= book_ids
return found if valq == 'true' else candidates - found
for m, book_ids in field_iter():
for key, val in m.iteritems():
if (keyq and not _match(keyq, (key,), keyq_mkind,
use_primary_find_in_search=use_primary_find)):
continue
if (valq and not _match(valq, (val,), valq_mkind,
use_primary_find_in_search=use_primary_find)):
continue
matches |= book_ids
break
return matches
# }}}
class Parser(SearchQueryParser):
def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
limit_search_columns, limit_search_columns_to, locations):
bool_search, keypair_search, limit_search_columns, limit_search_columns_to,
locations):
self.dbcache, self.all_book_ids = dbcache, all_book_ids
self.all_search_locations = frozenset(locations)
self.grouped_search_terms = gst
self.date_search, self.num_search = date_search, num_search
self.bool_search, self.keypair_search = bool_search, keypair_search
self.limit_search_columns, self.limit_search_columns_to = (
limit_search_columns, limit_search_columns_to)
super(Parser, self).__init__(locations, optimize=True)
@ -268,7 +433,7 @@ class Parser(SearchQueryParser):
# get metadata key associated with the search term. Eliminates
# dealing with plurals and other aliases
# original_location = location
original_location = location
location = self.field_metadata.search_term_to_field_key(
icu_lower(location.strip()))
# grouped search terms
@ -344,8 +509,51 @@ class Parser(SearchQueryParser):
self.dbcache.fields[location].iter_counts, candidates),
location, dt, candidates)
# take care of boolean special case
if dt == 'bool':
return self.bool_search(icu_lower(query),
partial(self.field_iter, location, candidates),
self.dbcache.pref('bools_are_tristate'))
# special case: colon-separated fields such as identifiers. isbn
# is a special case within the case
if fm.get('is_csp', False):
field_iter = partial(self.field_iter, location, candidates)
upf = prefs['use_primary_find_in_search']
if location == 'identifiers' and original_location == 'isbn':
return self.keypair_search('=isbn:'+query, field_iter,
candidates, upf)
return self.keypair_search(query, field_iter, candidates, upf)
# check for user categories
if len(location) >= 2 and location.startswith('@'):
return self.get_user_category_matches(location[1:], icu_lower(query), candidates)
return matches
def get_user_category_matches(self, location, query, candidates):
matches = set()
if len(query) < 2:
return matches
user_cats = self.dbcache.pref('user_categories')
c = set(candidates)
if query.startswith('.'):
check_subcats = True
query = query[1:]
else:
check_subcats = False
for key in user_cats:
if key == location or (check_subcats and key.startswith(location + '.')):
for (item, category, ign) in user_cats[key]:
s = self.get_matches(category, '=' + item, candidates=c)
c -= s
matches |= s
if query == 'false':
return candidates - matches
return matches
class Search(object):
@ -353,6 +561,8 @@ class Search(object):
self.all_search_locations = all_search_locations
self.date_search = DateSearch()
self.num_search = NumericSearch()
self.bool_search = BooleanSearch()
self.keypair_search = KeyPairSearch()
def change_locations(self, newlocs):
self.all_search_locations = newlocs
@ -374,16 +584,22 @@ class Search(object):
if not q:
return all_book_ids
if not isinstance(q, type(u'')):
q = q.decode('utf-8')
# We construct a new parser instance per search as pyparsing is not
# thread safe. On my desktop, constructing a SearchQueryParser instance
# takes 0.000975 seconds and restoring it from a pickle takes
# 0.000974 seconds.
sqp = Parser(
dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
self.date_search, self.num_search, prefs[ 'limit_search_columns' ],
self.date_search, self.num_search, self.bool_search,
self.keypair_search,
prefs[ 'limit_search_columns' ],
prefs[ 'limit_search_columns_to' ], self.all_search_locations)
try:
ret = sqp.parse(query)
ret = sqp.parse(q)
finally:
sqp.dbcache = None
return ret

Binary file not shown.

View File

@ -208,8 +208,19 @@ class ReadingTest(BaseTest):
'#float:10.01', 'series_index:1', 'series_index:<3', 'id:1',
'id:>2',
# Bool tests
'#yesno:true', '#yesno:false', '#yesno:yes', '#yesno:no',
'#yesno:empty',
# Keypair tests
'identifiers:true', 'identifiers:false', 'identifiers:test',
'identifiers:test:false', 'identifiers:test:one',
'identifiers:t:n', 'identifiers:=test:=two', 'identifiers:x:y',
'identifiers:z',
# TODO: Tests for searching the size column and
# cover:true|false
# TODO: Tests for user categories searching
)}
old = None

View File

@ -16,12 +16,12 @@ from calibre.utils.pyparsing import ParseException
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import tweaks, device_prefs
from calibre.utils.config import tweaks, device_prefs, prefs
from calibre.utils.date import dt_factory, qt_to_dt, as_local_time
from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH,
REGEXP_MATCH, MetadataBackup, force_to_bool)
from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
from calibre.library.caches import (MetadataBackup, force_to_bool)
from calibre.library.save_to_disk import find_plugboard
from calibre import strftime, isbytestring
from calibre.constants import filesystem_encoding, DEBUG
@ -1037,6 +1037,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{
}
for x in ('author', 'format'):
q[x+'s'] = q[x]
upf = prefs['use_primary_find_in_search']
for index, row in enumerate(self.model.db):
for locvalue in locations:
accessor = q[locvalue]
@ -1063,7 +1064,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{
vals = accessor(row).split(',')
else:
vals = [accessor(row)]
if _match(query, vals, m):
if _match(query, vals, m, use_primary_find_in_search=upf):
matches.add(index)
break
except ValueError: # Unicode errors

View File

@ -10,8 +10,8 @@ from PyQt4.Qt import (Qt, QAbstractItemModel, QIcon, QVariant, QModelIndex, QSiz
from calibre.gui2 import NONE
from calibre.customize.ui import is_disabled, disable_plugin, enable_plugin
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH
from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
from calibre.utils.config_base import prefs
from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import SearchQueryParser
@ -60,13 +60,13 @@ class Matches(QAbstractItemModel):
index = self.createIndex(i, 0)
data = QVariant(True)
self.setData(index, data, Qt.CheckStateRole)
def enable_none(self):
for i in xrange(len(self.matches)):
index = self.createIndex(i, 0)
data = QVariant(False)
self.setData(index, data, Qt.CheckStateRole)
def enable_invert(self):
for i in xrange(len(self.matches)):
self.toggle_plugin(self.createIndex(i, 0))
@ -243,6 +243,7 @@ class SearchFilter(SearchQueryParser):
'name': lambda x : x.name.lower(),
}
q['formats'] = q['format']
upf = prefs['use_primary_find_in_search']
for sr in self.srs:
for locvalue in locations:
accessor = q[locvalue]
@ -276,7 +277,7 @@ class SearchFilter(SearchQueryParser):
vals = accessor(sr).split(',')
else:
vals = [accessor(sr)]
if _match(query, vals, m):
if _match(query, vals, m, use_primary_find_in_search=upf):
matches.add(sr)
break
except ValueError: # Unicode errors

View File

@ -11,13 +11,13 @@ from operator import attrgetter
from PyQt4.Qt import (Qt, QAbstractItemModel, QModelIndex, QVariant, pyqtSignal)
from calibre.gui2 import NONE
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH
from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
from calibre.utils.config_base import prefs
from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import SearchQueryParser
class BooksModel(QAbstractItemModel):
total_changed = pyqtSignal(int)
HEADERS = [_('Title'), _('Author(s)'), _('Format')]
@ -37,8 +37,8 @@ class BooksModel(QAbstractItemModel):
return self.books[row]
else:
return None
def search(self, filter):
def search(self, filter):
self.filter = filter.strip()
if not self.filter:
self.books = self.all_books
@ -50,7 +50,7 @@ class BooksModel(QAbstractItemModel):
self.layoutChanged.emit()
self.sort(self.sort_col, self.sort_order)
self.total_changed.emit(self.rowCount())
def index(self, row, column, parent=QModelIndex()):
return self.createIndex(row, column)
@ -64,7 +64,7 @@ class BooksModel(QAbstractItemModel):
def columnCount(self, *args):
return len(self.HEADERS)
def headerData(self, section, orientation, role):
if role != Qt.DisplayRole:
return NONE
@ -112,7 +112,7 @@ class BooksModel(QAbstractItemModel):
class SearchFilter(SearchQueryParser):
USABLE_LOCATIONS = [
'all',
'author',
@ -161,6 +161,7 @@ class SearchFilter(SearchQueryParser):
}
for x in ('author', 'format'):
q[x+'s'] = q[x]
upf = prefs['use_primary_find_in_search']
for sr in self.srs:
for locvalue in locations:
accessor = q[locvalue]
@ -182,7 +183,7 @@ class SearchFilter(SearchQueryParser):
m = matchkind
vals = [accessor(sr)]
if _match(query, vals, m):
if _match(query, vals, m, use_primary_find_in_search=upf):
matches.add(sr)
break
except ValueError: # Unicode errors

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, itertools, time, traceback, locale
import itertools, time, traceback, locale
from itertools import repeat, izip, imap
from datetime import timedelta
from threading import Thread
@ -16,10 +16,10 @@ from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_s
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException
from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints
from calibre.utils.icu import primary_find
class MetadataBackup(Thread): # {{{
'''
@ -118,7 +118,6 @@ class MetadataBackup(Thread): # {{{
# }}}
### Global utility function for get_match here and in gui2/library.py
# This is a global for performance
pref_use_primary_find_in_search = False
@ -127,47 +126,6 @@ def set_use_primary_find_in_search(toWhat):
global pref_use_primary_find_in_search
pref_use_primary_find_in_search = toWhat
CONTAINS_MATCH = 0
EQUALS_MATCH = 1
REGEXP_MATCH = 2
def _match(query, value, matchkind):
if query.startswith('..'):
query = query[1:]
sq = query[1:]
internal_match_ok = True
else:
internal_match_ok = False
for t in value:
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
t = icu_lower(t)
if (matchkind == EQUALS_MATCH):
if internal_match_ok:
if query == t:
return True
comps = [c.strip() for c in t.split('.') if c.strip()]
for comp in comps:
if sq == comp:
return True
elif query[0] == '.':
if t.startswith(query[1:]):
ql = len(query) - 1
if (len(t) == ql) or (t[ql:ql+1] == '.'):
return True
elif query == t:
return True
elif matchkind == REGEXP_MATCH:
if re.search(query, t, re.I|re.UNICODE):
return True
elif matchkind == CONTAINS_MATCH:
if pref_use_primary_find_in_search:
if primary_find(query, t)[0] != -1:
return True
elif query in t:
return True
except re.error:
pass
return False
def force_to_bool(val):
if isinstance(val, (str, unicode)):
try:
@ -576,7 +534,8 @@ class ResultCache(SearchQueryParser): # {{{
continue
k = parts[:1]
v = parts[1:]
if keyq and not _match(keyq, k, keyq_mkind):
if keyq and not _match(keyq, k, keyq_mkind,
use_primary_find_in_search=pref_use_primary_find_in_search):
continue
if valq:
if valq == 'true':
@ -586,7 +545,8 @@ class ResultCache(SearchQueryParser): # {{{
if v:
add_if_nothing_matches = False
continue
elif not _match(valq, v, valq_mkind):
elif not _match(valq, v, valq_mkind,
use_primary_find_in_search=pref_use_primary_find_in_search):
continue
matches.add(id_)
@ -851,7 +811,8 @@ class ResultCache(SearchQueryParser): # {{{
vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
else:
vals = [item[loc]] ### make into list to make _match happy
if _match(q, vals, matchkind):
if _match(q, vals, matchkind,
use_primary_find_in_search=pref_use_primary_find_in_search):
matches.add(item[0])
continue
current_candidates -= matches