Merge from trunk

2025-07-09 03:04:10 -04:00 · 2013-01-21 09:36:42 +01:00 · 2013-01-21 09:36:42 +01:00 · 9f220fb79b
commit 9f220fb79b
parent d2866a2208 5000f5c133
9 changed files with 275 additions and 84 deletions
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -8,13 +8,16 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
    description = 'News as provided by The Metro -UK'
    #timefmt = ''
-    __author__ = 'Dave Asbury'
-    #last update 9/6/12
-    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
-    oldest_article = 1
+    __author__ = 'fleclerc & Dave Asbury'
+    #last update 20/1/13
+    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
+
+    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
    remove_empty_feeds = True
    remove_javascript     = True
    auto_cleanup = True
+    max_articles_per_feed = 12
+    ignore_duplicate_articles = {'title', 'url'}
    encoding = 'UTF-8'

    language = 'en_GB'
--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@ -114,17 +114,8 @@ class OneToOneField(Field):

    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
        cbm = self.table.book_col_map
-        if (self.name in {'id', 'uuid', 'title'} or
-            self.metadata['datatype'] == 'datetime'):
-            # Values are likely to be unique
-            for book_id in candidates:
-                yield cbm.get(book_id, default_value), {book_id}
-        else:
-            val_map = defaultdict(set)
-            for book_id in candidates:
-                val_map[cbm.get(book_id, default_value)].add(book_id)
-            for val, book_ids in val_map.iteritems():
-                yield val, book_ids
+        for book_id in candidates:
+            yield cbm.get(book_id, default_value), {book_id}

 class CompositeField(OneToOneField):

@ -320,6 +311,12 @@ class IdentifiersField(ManyToManyField):
                        (self._default_sort_key,))
                for id_, cids in ans.iteritems()}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=()):
+        bcm = self.table.book_col_map
+        for book_id in candidates:
+            val = bcm.get(book_id, default_value)
+            if val:
+                yield val, {book_id}

 class AuthorsField(ManyToManyField):

--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@ -13,9 +13,87 @@ from datetime import timedelta

 from calibre.utils.config_base import prefs
 from calibre.utils.date import parse_date, UNDEFINED_DATE, now
+from calibre.utils.icu import primary_find
 from calibre.utils.search_query_parser import SearchQueryParser, ParseException

 # TODO: Thread safety of saved searches
+CONTAINS_MATCH = 0
+EQUALS_MATCH   = 1
+REGEXP_MATCH   = 2
+
+# Utils {{{
+
+def force_to_bool(val):
+    if isinstance(val, (str, unicode)):
+        try:
+            val = icu_lower(val)
+            if not val:
+                val = None
+            elif val in [_('yes'), _('checked'), 'true', 'yes']:
+                val = True
+            elif val in [_('no'), _('unchecked'), 'false', 'no']:
+                val = False
+            else:
+                val = bool(int(val))
+        except:
+            val = None
+    return val
+
+def _matchkind(query):
+    matchkind = CONTAINS_MATCH
+    if (len(query) > 1):
+        if query.startswith('\\'):
+            query = query[1:]
+        elif query.startswith('='):
+            matchkind = EQUALS_MATCH
+            query = query[1:]
+        elif query.startswith('~'):
+            matchkind = REGEXP_MATCH
+            query = query[1:]
+
+    if matchkind != REGEXP_MATCH:
+        # leave case in regexps because it can be significant e.g. \S \W \D
+        query = icu_lower(query)
+    return matchkind, query
+
+def _match(query, value, matchkind, use_primary_find_in_search=True):
+    if query.startswith('..'):
+        query = query[1:]
+        sq = query[1:]
+        internal_match_ok = True
+    else:
+        internal_match_ok = False
+    for t in value:
+        try:     ### ignore regexp exceptions, required because search-ahead tries before typing is finished
+            t = icu_lower(t)
+            if (matchkind == EQUALS_MATCH):
+                if internal_match_ok:
+                    if query == t:
+                        return True
+                    comps = [c.strip() for c in t.split('.') if c.strip()]
+                    for comp in comps:
+                        if sq == comp:
+                            return True
+                elif query[0] == '.':
+                    if t.startswith(query[1:]):
+                        ql = len(query) - 1
+                        if (len(t) == ql) or (t[ql:ql+1] == '.'):
+                            return True
+                elif query == t:
+                    return True
+            elif matchkind == REGEXP_MATCH:
+                if re.search(query, t, re.I|re.UNICODE):
+                    return True
+            elif matchkind == CONTAINS_MATCH:
+                if use_primary_find_in_search:
+                    if primary_find(query, t)[0] != -1:
+                        return True
+                elif query in t:
+                        return True
+        except re.error:
+            pass
+    return False
+# }}}

 class DateSearch(object): # {{{

@ -225,14 +303,101 @@ class NumericSearch(object): # {{{

 # }}}

+class BooleanSearch(object): # {{{
+
+    def __init__(self):
+        self.local_no        = icu_lower(_('no'))
+        self.local_yes       = icu_lower(_('yes'))
+        self.local_unchecked = icu_lower(_('unchecked'))
+        self.local_checked   = icu_lower(_('checked'))
+        self.local_empty     = icu_lower(_('empty'))
+        self.local_blank     = icu_lower(_('blank'))
+        self.local_bool_values = {
+            self.local_no, self.local_unchecked, '_no', 'false', 'no',
+            self.local_yes, self.local_checked, '_yes', 'true', 'yes',
+            self.local_empty, self.local_blank, '_empty', 'empty'}
+
+    def __call__(self, query, field_iter, bools_are_tristate):
+        matches = set()
+        if query not in self.local_bool_values:
+            raise ParseException(_('Invalid boolean query "{0}"').format(query))
+        for val, book_ids in field_iter():
+            val = force_to_bool(val)
+            if not bools_are_tristate:
+                if val is None or not val: # item is None or set to false
+                    if query in { self.local_no, self.local_unchecked, 'no', '_no', 'false' }:
+                        matches |= book_ids
+                else: # item is explicitly set to true
+                    if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
+                        matches |= book_ids
+            else:
+                if val is None:
+                    if query in { self.local_empty, self.local_blank, 'empty', '_empty', 'false' }:
+                        matches |= book_ids
+                elif not val: # is not None and false
+                    if query in { self.local_no, self.local_unchecked, 'no', '_no', 'true' }:
+                        matches |= book_ids
+                else: # item is not None and true
+                    if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
+                        matches |= book_ids
+        return matches
+
+# }}}
+
+class KeyPairSearch(object): # {{{
+
+    def __call__(self, query, field_iter, candidates, use_primary_find):
+        matches = set()
+        if ':' in query:
+            q = [q.strip() for q in query.split(':')]
+            if len(q) != 2:
+                raise ParseException(query, len(query),
+                        'Invalid query format for colon-separated search')
+            keyq, valq = q
+            keyq_mkind, keyq = _matchkind(keyq)
+            valq_mkind, valq = _matchkind(valq)
+        else:
+            keyq = keyq_mkind = ''
+            valq_mkind, valq = _matchkind(query)
+            keyq_mkind
+
+        if valq in {'true', 'false'}:
+            found = set()
+            if keyq:
+                for val, book_ids in field_iter():
+                    if val and val.get(keyq, False):
+                        found |= book_ids
+            else:
+                for val, book_ids in field_iter():
+                    if val:
+                        found |= book_ids
+            return found if valq == 'true' else candidates - found
+
+        for m, book_ids in field_iter():
+            for key, val in m.iteritems():
+                if (keyq and not _match(keyq, (key,), keyq_mkind,
+                                        use_primary_find_in_search=use_primary_find)):
+                    continue
+                if (valq and not _match(valq, (val,), valq_mkind,
+                                        use_primary_find_in_search=use_primary_find)):
+                    continue
+                matches |= book_ids
+                break
+
+        return matches
+
+# }}}
+
 class Parser(SearchQueryParser):

    def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
-                 limit_search_columns, limit_search_columns_to, locations):
+                 bool_search, keypair_search, limit_search_columns, limit_search_columns_to,
+                 locations):
        self.dbcache, self.all_book_ids = dbcache, all_book_ids
        self.all_search_locations = frozenset(locations)
        self.grouped_search_terms = gst
        self.date_search, self.num_search = date_search, num_search
+        self.bool_search, self.keypair_search = bool_search, keypair_search
        self.limit_search_columns, self.limit_search_columns_to = (
            limit_search_columns, limit_search_columns_to)
        super(Parser, self).__init__(locations, optimize=True)
@ -268,7 +433,7 @@ class Parser(SearchQueryParser):

        # get metadata key associated with the search term. Eliminates
        # dealing with plurals and other aliases
-        # original_location = location
+        original_location = location
        location = self.field_metadata.search_term_to_field_key(
            icu_lower(location.strip()))
        # grouped search terms
@ -344,8 +509,51 @@ class Parser(SearchQueryParser):
                        self.dbcache.fields[location].iter_counts, candidates),
                    location, dt, candidates)

+            # take care of boolean special case
+            if dt == 'bool':
+                return self.bool_search(icu_lower(query),
+                                partial(self.field_iter, location, candidates),
+                                self.dbcache.pref('bools_are_tristate'))
+
+            # special case: colon-separated fields such as identifiers. isbn
+            # is a special case within the case
+            if fm.get('is_csp', False):
+                field_iter = partial(self.field_iter, location, candidates)
+                upf = prefs['use_primary_find_in_search']
+                if location == 'identifiers' and original_location == 'isbn':
+                    return self.keypair_search('=isbn:'+query, field_iter,
+                                        candidates, upf)
+                return self.keypair_search(query, field_iter, candidates, upf)
+
+        # check for user categories
+        if len(location) >= 2 and location.startswith('@'):
+            return self.get_user_category_matches(location[1:], icu_lower(query), candidates)
+
        return matches

+    def get_user_category_matches(self, location, query, candidates):
+        matches = set()
+        if len(query) < 2:
+            return matches
+
+        user_cats = self.dbcache.pref('user_categories')
+        c = set(candidates)
+
+        if query.startswith('.'):
+            check_subcats = True
+            query = query[1:]
+        else:
+            check_subcats = False
+
+        for key in user_cats:
+            if key == location or (check_subcats and key.startswith(location + '.')):
+                for (item, category, ign) in user_cats[key]:
+                    s = self.get_matches(category, '=' + item, candidates=c)
+                    c -= s
+                    matches |= s
+        if query == 'false':
+            return candidates - matches
+        return matches

 class Search(object):

@ -353,6 +561,8 @@ class Search(object):
        self.all_search_locations = all_search_locations
        self.date_search = DateSearch()
        self.num_search = NumericSearch()
+        self.bool_search = BooleanSearch()
+        self.keypair_search = KeyPairSearch()

    def change_locations(self, newlocs):
        self.all_search_locations = newlocs
@ -374,16 +584,22 @@ class Search(object):
        if not q:
            return all_book_ids

+        if not isinstance(q, type(u'')):
+            q = q.decode('utf-8')
+
        # We construct a new parser instance per search as pyparsing is not
        # thread safe. On my desktop, constructing a SearchQueryParser instance
        # takes 0.000975 seconds and restoring it from a pickle takes
        # 0.000974 seconds.
        sqp = Parser(
            dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
-            self.date_search, self.num_search, prefs[ 'limit_search_columns' ],
+            self.date_search, self.num_search, self.bool_search,
+            self.keypair_search,
+            prefs[ 'limit_search_columns' ],
            prefs[ 'limit_search_columns_to' ], self.all_search_locations)
+
        try:
-            ret = sqp.parse(query)
+            ret = sqp.parse(q)
        finally:
            sqp.dbcache = None
        return ret
--- a/src/calibre/db/tests/metadata.db
+++ b/src/calibre/db/tests/metadata.db
--- a/src/calibre/db/tests/reading.py
+++ b/src/calibre/db/tests/reading.py
@ -208,8 +208,19 @@ class ReadingTest(BaseTest):
            '#float:10.01', 'series_index:1', 'series_index:<3', 'id:1',
            'id:>2',

+            # Bool tests
+            '#yesno:true', '#yesno:false', '#yesno:yes', '#yesno:no',
+            '#yesno:empty',
+
+            # Keypair tests
+            'identifiers:true', 'identifiers:false', 'identifiers:test',
+            'identifiers:test:false', 'identifiers:test:one',
+            'identifiers:t:n', 'identifiers:=test:=two', 'identifiers:x:y',
+            'identifiers:z',
+
            # TODO: Tests for searching the size column and
            # cover:true|false
+            # TODO: Tests for user categories searching
        )}
        old = None

--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -16,12 +16,12 @@ from calibre.utils.pyparsing import ParseException
 from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
 from calibre.ebooks.metadata.book.base import SafeFormat
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre.utils.config import tweaks, device_prefs
+from calibre.utils.config import tweaks, device_prefs, prefs
 from calibre.utils.date import dt_factory, qt_to_dt, as_local_time
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import SearchQueryParser
-from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH,
-    REGEXP_MATCH, MetadataBackup, force_to_bool)
+from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
+from calibre.library.caches import (MetadataBackup, force_to_bool)
 from calibre.library.save_to_disk import find_plugboard
 from calibre import strftime, isbytestring
 from calibre.constants import filesystem_encoding, DEBUG
@ -1037,6 +1037,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{
             }
        for x in ('author', 'format'):
            q[x+'s'] = q[x]
+        upf = prefs['use_primary_find_in_search']
        for index, row in enumerate(self.model.db):
            for locvalue in locations:
                accessor = q[locvalue]
@ -1063,7 +1064,7 @@ class OnDeviceSearch(SearchQueryParser): # {{{
                        vals = accessor(row).split(',')
                    else:
                        vals = [accessor(row)]
-                    if _match(query, vals, m):
+                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(index)
                        break
                except ValueError: # Unicode errors
--- a/src/calibre/gui2/store/config/chooser/models.py
+++ b/src/calibre/gui2/store/config/chooser/models.py
@ -10,8 +10,8 @@ from PyQt4.Qt import (Qt, QAbstractItemModel, QIcon, QVariant, QModelIndex, QSiz

 from calibre.gui2 import NONE
 from calibre.customize.ui import is_disabled, disable_plugin, enable_plugin
-from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
-    REGEXP_MATCH
+from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
+from calibre.utils.config_base import prefs
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import SearchQueryParser

@ -60,13 +60,13 @@ class Matches(QAbstractItemModel):
            index = self.createIndex(i, 0)
            data = QVariant(True)
            self.setData(index, data, Qt.CheckStateRole)
-    
+
    def enable_none(self):
        for i in xrange(len(self.matches)):
            index = self.createIndex(i, 0)
            data = QVariant(False)
            self.setData(index, data, Qt.CheckStateRole)
-    
+
    def enable_invert(self):
        for i in xrange(len(self.matches)):
            self.toggle_plugin(self.createIndex(i, 0))
@ -243,6 +243,7 @@ class SearchFilter(SearchQueryParser):
             'name': lambda x : x.name.lower(),
        }
        q['formats'] = q['format']
+        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
@ -276,7 +277,7 @@ class SearchFilter(SearchQueryParser):
                        vals = accessor(sr).split(',')
                    else:
                        vals = [accessor(sr)]
-                    if _match(query, vals, m):
+                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError: # Unicode errors
--- a/src/calibre/gui2/store/stores/mobileread/models.py
+++ b/src/calibre/gui2/store/stores/mobileread/models.py
@ -11,13 +11,13 @@ from operator import attrgetter
 from PyQt4.Qt import (Qt, QAbstractItemModel, QModelIndex, QVariant, pyqtSignal)

 from calibre.gui2 import NONE
-from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
-    REGEXP_MATCH
+from calibre.db.search import _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
+from calibre.utils.config_base import prefs
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import SearchQueryParser

 class BooksModel(QAbstractItemModel):
-    
+
    total_changed = pyqtSignal(int)

    HEADERS = [_('Title'), _('Author(s)'), _('Format')]
@ -37,8 +37,8 @@ class BooksModel(QAbstractItemModel):
            return self.books[row]
        else:
            return None
-    
-    def search(self, filter):        
+
+    def search(self, filter):
        self.filter = filter.strip()
        if not self.filter:
            self.books = self.all_books
@ -50,7 +50,7 @@ class BooksModel(QAbstractItemModel):
        self.layoutChanged.emit()
        self.sort(self.sort_col, self.sort_order)
        self.total_changed.emit(self.rowCount())
-    
+
    def index(self, row, column, parent=QModelIndex()):
        return self.createIndex(row, column)

@ -64,7 +64,7 @@ class BooksModel(QAbstractItemModel):

    def columnCount(self, *args):
        return len(self.HEADERS)
-    
+
    def headerData(self, section, orientation, role):
        if role != Qt.DisplayRole:
            return NONE
@ -112,7 +112,7 @@ class BooksModel(QAbstractItemModel):


 class SearchFilter(SearchQueryParser):
-    
+
    USABLE_LOCATIONS = [
        'all',
        'author',
@ -161,6 +161,7 @@ class SearchFilter(SearchQueryParser):
        }
        for x in ('author', 'format'):
            q[x+'s'] = q[x]
+        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
@ -182,7 +183,7 @@ class SearchFilter(SearchQueryParser):
                        m = matchkind

                    vals = [accessor(sr)]
-                    if _match(query, vals, m):
+                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError: # Unicode errors
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, itertools, time, traceback, locale
+import itertools, time, traceback, locale
 from itertools import repeat, izip, imap
 from datetime import timedelta
 from threading import Thread
@ -16,10 +16,10 @@ from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_s
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.pyparsing import ParseException
 from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
+from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre import prints
-from calibre.utils.icu import primary_find

 class MetadataBackup(Thread): # {{{
    '''
@ -118,7 +118,6 @@ class MetadataBackup(Thread): # {{{

 # }}}

-
 ### Global utility function for get_match here and in gui2/library.py
 # This is a global for performance
 pref_use_primary_find_in_search = False
@ -127,47 +126,6 @@ def set_use_primary_find_in_search(toWhat):
    global pref_use_primary_find_in_search
    pref_use_primary_find_in_search = toWhat

-CONTAINS_MATCH = 0
-EQUALS_MATCH   = 1
-REGEXP_MATCH   = 2
-def _match(query, value, matchkind):
-    if query.startswith('..'):
-        query = query[1:]
-        sq = query[1:]
-        internal_match_ok = True
-    else:
-        internal_match_ok = False
-    for t in value:
-        try:     ### ignore regexp exceptions, required because search-ahead tries before typing is finished
-            t = icu_lower(t)
-            if (matchkind == EQUALS_MATCH):
-                if internal_match_ok:
-                    if query == t:
-                        return True
-                    comps = [c.strip() for c in t.split('.') if c.strip()]
-                    for comp in comps:
-                        if sq == comp:
-                            return True
-                elif query[0] == '.':
-                    if t.startswith(query[1:]):
-                        ql = len(query) - 1
-                        if (len(t) == ql) or (t[ql:ql+1] == '.'):
-                            return True
-                elif query == t:
-                    return True
-            elif matchkind == REGEXP_MATCH:
-                if re.search(query, t, re.I|re.UNICODE):
-                    return True
-            elif matchkind == CONTAINS_MATCH:
-                if pref_use_primary_find_in_search:
-                    if primary_find(query, t)[0] != -1:
-                        return True
-                elif query in t:
-                        return True
-        except re.error:
-            pass
-    return False
-
 def force_to_bool(val):
    if isinstance(val, (str, unicode)):
        try:
@ -576,7 +534,8 @@ class ResultCache(SearchQueryParser): # {{{
                    continue
                k = parts[:1]
                v = parts[1:]
-                if keyq and not _match(keyq, k, keyq_mkind):
+                if keyq and not _match(keyq, k, keyq_mkind,
+                                       use_primary_find_in_search=pref_use_primary_find_in_search):
                    continue
                if valq:
                    if valq == 'true':
@ -586,7 +545,8 @@ class ResultCache(SearchQueryParser): # {{{
                        if v:
                            add_if_nothing_matches = False
                            continue
-                    elif not _match(valq, v, valq_mkind):
+                    elif not _match(valq, v, valq_mkind,
+                                    use_primary_find_in_search=pref_use_primary_find_in_search):
                        continue
                matches.add(id_)

@ -851,7 +811,8 @@ class ResultCache(SearchQueryParser): # {{{
                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
                        else:
                            vals = [item[loc]] ### make into list to make _match happy
-                        if _match(q, vals, matchkind):
+                        if _match(q, vals, matchkind,
+                                  use_primary_find_in_search=pref_use_primary_find_in_search):
                            matches.add(item[0])
                            continue
                current_candidates -= matches