diff --git a/recipes/metro_uk.recipe b/recipes/metro_uk.recipe index 934fbab793..78db75daf8 100644 --- a/recipes/metro_uk.recipe +++ b/recipes/metro_uk.recipe @@ -8,13 +8,16 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Metro UK' description = 'News as provided by The Metro -UK' #timefmt = '' - __author__ = 'Dave Asbury' - #last update 9/6/12 - cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg' - oldest_article = 1 + __author__ = 'fleclerc & Dave Asbury' + #last update 20/1/13 + #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg' + + cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg' remove_empty_feeds = True remove_javascript = True auto_cleanup = True + max_articles_per_feed = 12 + ignore_duplicate_articles = {'title', 'url'} encoding = 'UTF-8' language = 'en_GB' diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 34a12c9491..e0950fff3b 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -114,17 +114,8 @@ class OneToOneField(Field): def iter_searchable_values(self, get_metadata, candidates, default_value=None): cbm = self.table.book_col_map - if (self.name in {'id', 'uuid', 'title'} or - self.metadata['datatype'] == 'datetime'): - # Values are likely to be unique - for book_id in candidates: - yield cbm.get(book_id, default_value), {book_id} - else: - val_map = defaultdict(set) - for book_id in candidates: - val_map[cbm.get(book_id, default_value)].add(book_id) - for val, book_ids in val_map.iteritems(): - yield val, book_ids + for book_id in candidates: + yield cbm.get(book_id, default_value), {book_id} class CompositeField(OneToOneField): @@ -320,6 +311,12 @@ class IdentifiersField(ManyToManyField): (self._default_sort_key,)) for id_, cids in ans.iteritems()} + def iter_searchable_values(self, get_metadata, candidates, default_value=()): + bcm = self.table.book_col_map + for book_id in candidates: + val = bcm.get(book_id, default_value) + if val: + yield val, {book_id} class AuthorsField(ManyToManyField): diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index fe9cec79c8..334bc046d8 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -13,9 +13,87 @@ from datetime import timedelta from calibre.utils.config_base import prefs from calibre.utils.date import parse_date, UNDEFINED_DATE, now +from calibre.utils.icu import primary_find from calibre.utils.search_query_parser import SearchQueryParser, ParseException # TODO: Thread safety of saved searches +CONTAINS_MATCH = 0 +EQUALS_MATCH = 1 +REGEXP_MATCH = 2 + +# Utils {{{ + +def force_to_bool(val): + if isinstance(val, (str, unicode)): + try: + val = icu_lower(val) + if not val: + val = None + elif val in [_('yes'), _('checked'), 'true', 'yes']: + val = True + elif val in [_('no'), _('unchecked'), 'false', 'no']: + val = False + else: + val = bool(int(val)) + except: + val = None + return val + +def _matchkind(query): + matchkind = CONTAINS_MATCH + if (len(query) > 1): + if query.startswith('\\'): + query = query[1:] + elif query.startswith('='): + matchkind = EQUALS_MATCH + query = query[1:] + elif query.startswith('~'): + matchkind = REGEXP_MATCH + query = query[1:] + + if matchkind != REGEXP_MATCH: + # leave case in regexps because it can be significant e.g. \S \W \D + query = icu_lower(query) + return matchkind, query + +def _match(query, value, matchkind, use_primary_find_in_search=True): + if query.startswith('..'): + query = query[1:] + sq = query[1:] + internal_match_ok = True + else: + internal_match_ok = False + for t in value: + try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished + t = icu_lower(t) + if (matchkind == EQUALS_MATCH): + if internal_match_ok: + if query == t: + return True + comps = [c.strip() for c in t.split('.') if c.strip()] + for comp in comps: + if sq == comp: + return True + elif query[0] == '.': + if t.startswith(query[1:]): + ql = len(query) - 1 + if (len(t) == ql) or (t[ql:ql+1] == '.'): + return True + elif query == t: + return True + elif matchkind == REGEXP_MATCH: + if re.search(query, t, re.I|re.UNICODE): + return True + elif matchkind == CONTAINS_MATCH: + if use_primary_find_in_search: + if primary_find(query, t)[0] != -1: + return True + elif query in t: + return True + except re.error: + pass + return False +# }}} class DateSearch(object): # {{{ @@ -225,14 +303,101 @@ class NumericSearch(object): # {{{ # }}} +class BooleanSearch(object): # {{{ + + def __init__(self): + self.local_no = icu_lower(_('no')) + self.local_yes = icu_lower(_('yes')) + self.local_unchecked = icu_lower(_('unchecked')) + self.local_checked = icu_lower(_('checked')) + self.local_empty = icu_lower(_('empty')) + self.local_blank = icu_lower(_('blank')) + self.local_bool_values = { + self.local_no, self.local_unchecked, '_no', 'false', 'no', + self.local_yes, self.local_checked, '_yes', 'true', 'yes', + self.local_empty, self.local_blank, '_empty', 'empty'} + + def __call__(self, query, field_iter, bools_are_tristate): + matches = set() + if query not in self.local_bool_values: + raise ParseException(_('Invalid boolean query "{0}"').format(query)) + for val, book_ids in field_iter(): + val = force_to_bool(val) + if not bools_are_tristate: + if val is None or not val: # item is None or set to false + if query in { self.local_no, self.local_unchecked, 'no', '_no', 'false' }: + matches |= book_ids + else: # item is explicitly set to true + if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }: + matches |= book_ids + else: + if val is None: + if query in { self.local_empty, self.local_blank, 'empty', '_empty', 'false' }: + matches |= book_ids + elif not val: # is not None and false + if query in { self.local_no, self.local_unchecked, 'no', '_no', 'true' }: + matches |= book_ids + else: # item is not None and true + if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }: + matches |= book_ids + return matches + +# }}} + +class KeyPairSearch(object): # {{{ + + def __call__(self, query, field_iter, candidates, use_primary_find): + matches = set() + if ':' in query: + q = [q.strip() for q in query.split(':')] + if len(q) != 2: + raise ParseException(query, len(query), + 'Invalid query format for colon-separated search') + keyq, valq = q + keyq_mkind, keyq = _matchkind(keyq) + valq_mkind, valq = _matchkind(valq) + else: + keyq = keyq_mkind = '' + valq_mkind, valq = _matchkind(query) + keyq_mkind + + if valq in {'true', 'false'}: + found = set() + if keyq: + for val, book_ids in field_iter(): + if val and val.get(keyq, False): + found |= book_ids + else: + for val, book_ids in field_iter(): + if val: + found |= book_ids + return found if valq == 'true' else candidates - found + + for m, book_ids in field_iter(): + for key, val in m.iteritems(): + if (keyq and not _match(keyq, (key,), keyq_mkind, + use_primary_find_in_search=use_primary_find)): + continue + if (valq and not _match(valq, (val,), valq_mkind, + use_primary_find_in_search=use_primary_find)): + continue + matches |= book_ids + break + + return matches + +# }}} + class Parser(SearchQueryParser): def __init__(self, dbcache, all_book_ids, gst, date_search, num_search, - limit_search_columns, limit_search_columns_to, locations): + bool_search, keypair_search, limit_search_columns, limit_search_columns_to, + locations): self.dbcache, self.all_book_ids = dbcache, all_book_ids self.all_search_locations = frozenset(locations) self.grouped_search_terms = gst self.date_search, self.num_search = date_search, num_search + self.bool_search, self.keypair_search = bool_search, keypair_search self.limit_search_columns, self.limit_search_columns_to = ( limit_search_columns, limit_search_columns_to) super(Parser, self).__init__(locations, optimize=True) @@ -268,7 +433,7 @@ class Parser(SearchQueryParser): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases - # original_location = location + original_location = location location = self.field_metadata.search_term_to_field_key( icu_lower(location.strip())) # grouped search terms @@ -344,8 +509,51 @@ class Parser(SearchQueryParser): self.dbcache.fields[location].iter_counts, candidates), location, dt, candidates) + # take care of boolean special case + if dt == 'bool': + return self.bool_search(icu_lower(query), + partial(self.field_iter, location, candidates), + self.dbcache.pref('bools_are_tristate')) + + # special case: colon-separated fields such as identifiers. isbn + # is a special case within the case + if fm.get('is_csp', False): + field_iter = partial(self.field_iter, location, candidates) + upf = prefs['use_primary_find_in_search'] + if location == 'identifiers' and original_location == 'isbn': + return self.keypair_search('=isbn:'+query, field_iter, + candidates, upf) + return self.keypair_search(query, field_iter, candidates, upf) + + # check for user categories + if len(location) >= 2 and location.startswith('@'): + return self.get_user_category_matches(location[1:], icu_lower(query), candidates) + return matches + def get_user_category_matches(self, location, query, candidates): + matches = set() + if len(query) < 2: + return matches + + user_cats = self.dbcache.pref('user_categories') + c = set(candidates) + + if query.startswith('.'): + check_subcats = True + query = query[1:] + else: + check_subcats = False + + for key in user_cats: + if key == location or (check_subcats and key.startswith(location + '.')): + for (item, category, ign) in user_cats[key]: + s = self.get_matches(category, '=' + item, candidates=c) + c -= s + matches |= s + if query == 'false': + return candidates - matches + return matches class Search(object): @@ -353,6 +561,8 @@ class Search(object): self.all_search_locations = all_search_locations self.date_search = DateSearch() self.num_search = NumericSearch() + self.bool_search = BooleanSearch() + self.keypair_search = KeyPairSearch() def change_locations(self, newlocs): self.all_search_locations = newlocs @@ -374,16 +584,22 @@ class Search(object): if not q: return all_book_ids + if not isinstance(q, type(u'')): + q = q.decode('utf-8') + # We construct a new parser instance per search as pyparsing is not # thread safe. On my desktop, constructing a SearchQueryParser instance # takes 0.000975 seconds and restoring it from a pickle takes # 0.000974 seconds. sqp = Parser( dbcache, all_book_ids, dbcache.pref('grouped_search_terms'), - self.date_search, self.num_search, prefs[ 'limit_search_columns' ], + self.date_search, self.num_search, self.bool_search, + self.keypair_search, + prefs[ 'limit_search_columns' ], prefs[ 'limit_search_columns_to' ], self.all_search_locations) + try: - ret = sqp.parse(query) + ret = sqp.parse(q) finally: sqp.dbcache = None return ret diff --git a/src/calibre/db/tests/metadata.db b/src/calibre/db/tests/metadata.db index 4bd6dfe4f9..94748877b6 100644 Binary files a/src/calibre/db/tests/metadata.db and b/src/calibre/db/tests/metadata.db differ diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 4792f498f8..627a692860 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -208,8 +208,19 @@ class ReadingTest(BaseTest): '#float:10.01', 'series_index:1', 'series_index:<3', 'id:1', 'id:>2', + # Bool tests + '#yesno:true', '#yesno:false', '#yesno:yes', '#yesno:no', + '#yesno:empty', + + # Keypair tests + 'identifiers:true', 'identifiers:false', 'identifiers:test', + 'identifiers:test:false', 'identifiers:test:one', + 'identifiers:t:n', 'identifiers:=test:=two', 'identifiers:x:y', + 'identifiers:z', + # TODO: Tests for searching the size column and # cover:true|false + # TODO: Tests for user categories searching )} old = None diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 891b775448..bbd8566a37 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -16,12 +16,12 @@ from calibre.utils.pyparsing import ParseException from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors from calibre.ebooks.metadata.book.base import SafeFormat from calibre.ptempfile import PersistentTemporaryFile -from calibre.utils.config import tweaks, device_prefs +from calibre.utils.config import tweaks, device_prefs, prefs from calibre.utils.date import dt_factory, qt_to_dt, as_local_time from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import SearchQueryParser -from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH, - REGEXP_MATCH, MetadataBackup, force_to_bool) +from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH +from calibre.library.caches import (MetadataBackup, force_to_bool) from calibre.library.save_to_disk import find_plugboard from calibre import strftime, isbytestring from calibre.constants import filesystem_encoding, DEBUG @@ -1037,6 +1037,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{ } for x in ('author', 'format'): q[x+'s'] = q[x] + upf = prefs['use_primary_find_in_search'] for index, row in enumerate(self.model.db): for locvalue in locations: accessor = q[locvalue] @@ -1063,7 +1064,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{ vals = accessor(row).split(',') else: vals = [accessor(row)] - if _match(query, vals, m): + if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(index) break except ValueError: # Unicode errors diff --git a/src/calibre/gui2/store/config/chooser/models.py b/src/calibre/gui2/store/config/chooser/models.py index 24f6bdfc25..036b45bcaf 100644 --- a/src/calibre/gui2/store/config/chooser/models.py +++ b/src/calibre/gui2/store/config/chooser/models.py @@ -10,8 +10,8 @@ from PyQt4.Qt import (Qt, QAbstractItemModel, QIcon, QVariant, QModelIndex, QSiz from calibre.gui2 import NONE from calibre.customize.ui import is_disabled, disable_plugin, enable_plugin -from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ - REGEXP_MATCH +from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH +from calibre.utils.config_base import prefs from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import SearchQueryParser @@ -60,13 +60,13 @@ class Matches(QAbstractItemModel): index = self.createIndex(i, 0) data = QVariant(True) self.setData(index, data, Qt.CheckStateRole) - + def enable_none(self): for i in xrange(len(self.matches)): index = self.createIndex(i, 0) data = QVariant(False) self.setData(index, data, Qt.CheckStateRole) - + def enable_invert(self): for i in xrange(len(self.matches)): self.toggle_plugin(self.createIndex(i, 0)) @@ -243,6 +243,7 @@ class SearchFilter(SearchQueryParser): 'name': lambda x : x.name.lower(), } q['formats'] = q['format'] + upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] @@ -276,7 +277,7 @@ class SearchFilter(SearchQueryParser): vals = accessor(sr).split(',') else: vals = [accessor(sr)] - if _match(query, vals, m): + if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors diff --git a/src/calibre/gui2/store/stores/mobileread/models.py b/src/calibre/gui2/store/stores/mobileread/models.py index 297707e248..60f038c4e2 100644 --- a/src/calibre/gui2/store/stores/mobileread/models.py +++ b/src/calibre/gui2/store/stores/mobileread/models.py @@ -11,13 +11,13 @@ from operator import attrgetter from PyQt4.Qt import (Qt, QAbstractItemModel, QModelIndex, QVariant, pyqtSignal) from calibre.gui2 import NONE -from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ - REGEXP_MATCH +from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH +from calibre.utils.config_base import prefs from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import SearchQueryParser class BooksModel(QAbstractItemModel): - + total_changed = pyqtSignal(int) HEADERS = [_('Title'), _('Author(s)'), _('Format')] @@ -37,8 +37,8 @@ class BooksModel(QAbstractItemModel): return self.books[row] else: return None - - def search(self, filter): + + def search(self, filter): self.filter = filter.strip() if not self.filter: self.books = self.all_books @@ -50,7 +50,7 @@ class BooksModel(QAbstractItemModel): self.layoutChanged.emit() self.sort(self.sort_col, self.sort_order) self.total_changed.emit(self.rowCount()) - + def index(self, row, column, parent=QModelIndex()): return self.createIndex(row, column) @@ -64,7 +64,7 @@ class BooksModel(QAbstractItemModel): def columnCount(self, *args): return len(self.HEADERS) - + def headerData(self, section, orientation, role): if role != Qt.DisplayRole: return NONE @@ -112,7 +112,7 @@ class BooksModel(QAbstractItemModel): class SearchFilter(SearchQueryParser): - + USABLE_LOCATIONS = [ 'all', 'author', @@ -161,6 +161,7 @@ class SearchFilter(SearchQueryParser): } for x in ('author', 'format'): q[x+'s'] = q[x] + upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] @@ -182,7 +183,7 @@ class SearchFilter(SearchQueryParser): m = matchkind vals = [accessor(sr)] - if _match(query, vals, m): + if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 507305528d..b453c654df 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, itertools, time, traceback, locale +import itertools, time, traceback, locale from itertools import repeat, izip, imap from datetime import timedelta from threading import Thread @@ -16,10 +16,10 @@ from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_s from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc) +from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre import prints -from calibre.utils.icu import primary_find class MetadataBackup(Thread): # {{{ ''' @@ -118,7 +118,6 @@ class MetadataBackup(Thread): # {{{ # }}} - ### Global utility function for get_match here and in gui2/library.py # This is a global for performance pref_use_primary_find_in_search = False @@ -127,47 +126,6 @@ def set_use_primary_find_in_search(toWhat): global pref_use_primary_find_in_search pref_use_primary_find_in_search = toWhat -CONTAINS_MATCH = 0 -EQUALS_MATCH = 1 -REGEXP_MATCH = 2 -def _match(query, value, matchkind): - if query.startswith('..'): - query = query[1:] - sq = query[1:] - internal_match_ok = True - else: - internal_match_ok = False - for t in value: - try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished - t = icu_lower(t) - if (matchkind == EQUALS_MATCH): - if internal_match_ok: - if query == t: - return True - comps = [c.strip() for c in t.split('.') if c.strip()] - for comp in comps: - if sq == comp: - return True - elif query[0] == '.': - if t.startswith(query[1:]): - ql = len(query) - 1 - if (len(t) == ql) or (t[ql:ql+1] == '.'): - return True - elif query == t: - return True - elif matchkind == REGEXP_MATCH: - if re.search(query, t, re.I|re.UNICODE): - return True - elif matchkind == CONTAINS_MATCH: - if pref_use_primary_find_in_search: - if primary_find(query, t)[0] != -1: - return True - elif query in t: - return True - except re.error: - pass - return False - def force_to_bool(val): if isinstance(val, (str, unicode)): try: @@ -576,7 +534,8 @@ class ResultCache(SearchQueryParser): # {{{ continue k = parts[:1] v = parts[1:] - if keyq and not _match(keyq, k, keyq_mkind): + if keyq and not _match(keyq, k, keyq_mkind, + use_primary_find_in_search=pref_use_primary_find_in_search): continue if valq: if valq == 'true': @@ -586,7 +545,8 @@ class ResultCache(SearchQueryParser): # {{{ if v: add_if_nothing_matches = False continue - elif not _match(valq, v, valq_mkind): + elif not _match(valq, v, valq_mkind, + use_primary_find_in_search=pref_use_primary_find_in_search): continue matches.add(id_) @@ -851,7 +811,8 @@ class ResultCache(SearchQueryParser): # {{{ vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] else: vals = [item[loc]] ### make into list to make _match happy - if _match(q, vals, matchkind): + if _match(q, vals, matchkind, + use_primary_find_in_search=pref_use_primary_find_in_search): matches.add(item[0]) continue current_candidates -= matches