Implement #4830 (exact match searches)

This commit is contained in:
Kovid Goyal 2010-02-12 11:10:13 -07:00
parent a7b5f60f6f
commit 8816449cca
7 changed files with 179 additions and 37 deletions

View File

@ -5,21 +5,30 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.search_ui import Ui_Dialog from calibre.gui2.dialogs.search_ui import Ui_Dialog
from calibre.gui2 import qstring_to_unicode from calibre.gui2 import qstring_to_unicode
from calibre.library.database2 import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
class SearchDialog(QDialog, Ui_Dialog): class SearchDialog(QDialog, Ui_Dialog):
def __init__(self, *args): def __init__(self, *args):
QDialog.__init__(self, *args) QDialog.__init__(self, *args)
self.setupUi(self) self.setupUi(self)
self.mc = ''
def tokens(self, raw): def tokens(self, raw):
phrases = re.findall(r'\s+".*?"\s+', raw) phrases = re.findall(r'\s*".*?"\s*', raw)
for f in phrases: for f in phrases:
raw = raw.replace(f, ' ') raw = raw.replace(f, ' ')
return [t.strip() for t in phrases + raw.split()] phrases = [t.strip('" ') for t in phrases]
return ['"' + self.mc + t + '"' for t in phrases + [r.strip() for r in raw.split()]]
def search_string(self): def search_string(self):
mk = self.matchkind.currentIndex()
if mk == CONTAINS_MATCH:
self.mc = ''
elif mk == EQUALS_MATCH:
self.mc = '='
else:
self.mc = '~'
all, any, phrase, none = map(lambda x: unicode(x.text()), all, any, phrase, none = map(lambda x: unicode(x.text()),
(self.all, self.any, self.phrase, self.none)) (self.all, self.any, self.phrase, self.none))
all, any, none = map(self.tokens, (all, any, none)) all, any, none = map(self.tokens, (all, any, none))

View File

@ -103,8 +103,65 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item>
<widget class="QGroupBox" name="groupBox" >
<property name="maximumSize" >
<size>
<width>16777215</width>
<height>60</height>
</size>
</property>
<layout class="QHBoxLayout" name="horizontalLayout_5" >
<item> <item>
<widget class="QLabel" name="label_5" > <widget class="QLabel" name="label_5" >
<property name="text" >
<string>What kind of match to use:</string>
</property>
<property name="buddy" >
<cstring>matchkind</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="matchkind">
<item>
<property name="text">
<string>Contains: the word or phrase matches anywhere in the metadata</string>
</property>
</item>
<item>
<property name="text">
<string>Equals: the word or phrase must match an entire metadata field</string>
</property>
</item>
<item>
<property name="text">
<string>Regular expression: the expression must match anywhere in the metadata</string>
</property>
</item>
</widget>
</item>
<item>
<widget class="QLabel" name="label_51" >
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>40</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text" >
<string> </string>
</property>
<property name="buddy" >
<cstring>matchkind</cstring>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QLabel" name="label_6" >
<property name="maximumSize" > <property name="maximumSize" >
<size> <size>
<width>16777215</width> <width>16777215</width>

View File

@ -17,7 +17,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
from calibre import strftime from calibre import strftime
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
from calibre.library.database2 import FIELD_MAP from calibre.library.database2 import FIELD_MAP, _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \ from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog error_dialog
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
@ -893,7 +893,20 @@ class OnDeviceSearch(SearchQueryParser):
def get_matches(self, location, query): def get_matches(self, location, query):
location = location.lower().strip() location = location.lower().strip()
query = query.lower().strip()
matchkind = CONTAINS_MATCH
if len(query) > 1:
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
query = query.lower()
if location not in ('title', 'author', 'tag', 'all', 'format'): if location not in ('title', 'author', 'tag', 'all', 'format'):
return set([]) return set([])
matches = set([]) matches = set([])
@ -904,13 +917,24 @@ class OnDeviceSearch(SearchQueryParser):
'tag':lambda x: ','.join(getattr(x, 'tags')).lower(), 'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
'format':lambda x: os.path.splitext(x.path)[1].lower() 'format':lambda x: os.path.splitext(x.path)[1].lower()
} }
for i, v in enumerate(locations): for index, row in enumerate(self.model.db):
locations[i] = q[v] for locvalue in locations:
for i, r in enumerate(self.model.db): accessor = q[locvalue]
for loc in locations:
try: try:
if query in loc(r): ### Can't separate authors because comma is used for name sep and author sep
matches.add(i) ### Exact match might not get what you want. For that reason, turn author
### exactmatch searches into contains searches.
if locvalue == 'author' and matchkind == EQUALS_MATCH:
m = CONTAINS_MATCH
else:
m = matchkind
if locvalue == 'tag':
vals = accessor(row).split(',')
else:
vals = [accessor(row)]
if _match(query, vals, m):
matches.add(index)
break break
except ValueError: # Unicode errors except ValueError: # Unicode errors
import traceback import traceback

View File

@ -173,6 +173,8 @@ class TagsModel(QAbstractItemModel):
if len(data[r]) > 0: if len(data[r]) > 0:
self.beginInsertRows(category_index, 0, len(data[r])-1) self.beginInsertRows(category_index, 0, len(data[r])-1)
for tag in data[r]: for tag in data[r]:
if r == 'author':
tag.name = tag.name.replace('|', ',')
tag.state = state_map.get(tag.name, 0) tag.state = state_map.get(tag.name, 0)
t = TagTreeItem(parent=category, data=tag, icon_map=self.icon_map) t = TagTreeItem(parent=category, data=tag, icon_map=self.icon_map)
self.endInsertRows() self.endInsertRows()
@ -278,7 +280,7 @@ class TagsModel(QAbstractItemModel):
category = key if key != 'news' else 'tag' category = key if key != 'news' else 'tag'
if tag.state > 0: if tag.state > 0:
prefix = ' not ' if tag.state == 2 else '' prefix = ' not ' if tag.state == 2 else ''
ans.append('%s%s:"%s"'%(prefix, category, tag.name)) ans.append('%s%s:"=%s"'%(prefix, category, tag.name))
return ans return ans

View File

@ -669,19 +669,19 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if type == 'series': if type == 'series':
series = idx.model().db.series(row) series = idx.model().db.series(row)
if series: if series:
search = ['series:'+series] search = ['series:"'+series+'"']
elif type == 'publisher': elif type == 'publisher':
publisher = idx.model().db.publisher(row) publisher = idx.model().db.publisher(row)
if publisher: if publisher:
search = ['publisher:'+publisher] search = ['publisher:"'+publisher+'"']
elif type == 'tag': elif type == 'tag':
tags = idx.model().db.tags(row) tags = idx.model().db.tags(row)
if tags: if tags:
search = ['tag:'+t for t in tags.split(',')] search = ['tag:"='+t+'"' for t in tags.split(',')]
elif type == 'author': elif type == 'author':
authors = idx.model().db.authors(row) authors = idx.model().db.authors(row)
if authors: if authors:
search = ['author:'+a.strip().replace('|', ',') \ search = ['author:"='+a.strip().replace('|', ',')+'"' \
for a in authors.split(',')] for a in authors.split(',')]
join = ' or ' join = ' or '
if search: if search:

View File

@ -174,6 +174,22 @@ class CoverCache(QThread):
self.load_queue.appendleft(id) self.load_queue.appendleft(id)
self.load_queue_lock.unlock() self.load_queue_lock.unlock()
### Global utility function for get_match here and in gui2/library.py
CONTAINS_MATCH = 0
EQUALS_MATCH = 1
REGEXP_MATCH = 2
def _match(query, value, matchkind):
for t in value:
t = t.lower()
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
if ((matchkind == EQUALS_MATCH and query == t) or
(matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)):
return True
except re.error:
pass
return False
class ResultCache(SearchQueryParser): class ResultCache(SearchQueryParser):
''' '''
@ -202,10 +218,23 @@ class ResultCache(SearchQueryParser):
matches = set([]) matches = set([])
if query and query.strip(): if query and query.strip():
location = location.lower().strip() location = location.lower().strip()
matchkind = CONTAINS_MATCH
if (len(query) > 1):
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
query = query.lower() query = query.lower()
if not isinstance(query, unicode): if not isinstance(query, unicode):
query = query.decode('utf-8') query = query.decode('utf-8')
if location in ('tag', 'author', 'format'): if location in ('tag', 'author', 'format', 'comment'):
location += 's' location += 's'
all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats', 'isbn', 'rating', 'cover') all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats', 'isbn', 'rating', 'cover')
MAP = {} MAP = {}
@ -219,28 +248,40 @@ class ResultCache(SearchQueryParser):
rating_query = int(query) * 2 rating_query = int(query) * 2
except: except:
rating_query = None rating_query = None
for loc in location:
if loc == MAP['authors']:
q = query.replace(',', '|'); ### DB stores authors with commas changed to bars, so change query
else:
q = query
for item in self._data: for item in self._data:
if item is None: continue if item is None: continue
for loc in location: if not item[loc]:
if query == 'false' and not item[loc]: if query == 'false':
if isinstance(item[loc], basestring): if isinstance(item[loc], basestring):
if item[loc].strip() != '': if item[loc].strip() != '':
continue continue
matches.add(item[0]) matches.add(item[0])
break break
if query == 'true' and item[loc]: continue ### item is empty. No possible matches below
if q == 'true':
if isinstance(item[loc], basestring): if isinstance(item[loc], basestring):
if item[loc].strip() == '': if item[loc].strip() == '':
continue continue
matches.add(item[0]) matches.add(item[0])
break continue
if rating_query and item[loc] and loc == MAP['rating'] and rating_query == int(item[loc]): if rating_query and loc == MAP['rating'] and rating_query == int(item[loc]):
matches.add(item[0]) matches.add(item[0])
break continue
if item[loc] and loc not in EXCLUDE_FIELDS and query in item[loc].lower(): if loc not in EXCLUDE_FIELDS:
if loc == MAP['tags'] or loc == MAP['authors']:
vals = item[loc].split(',') ### check individual tags/authors, not the long string
else:
vals = [item[loc]] ### make into list to make _match happy
if _match(q, vals, matchkind):
matches.add(item[0]) matches.add(item[0])
break continue
return matches return matches
def remove(self, id): def remove(self, id):

View File

@ -195,6 +195,15 @@ are available in the LRF format. Some more examples::
title:"The Ring" or "This book is about a ring" title:"The Ring" or "This book is about a ring"
format:epub publisher:feedbooks.com format:epub publisher:feedbooks.com
Searches are by default 'contains'. An item matches if the search string appears anywhere in the indicated metadata.
Two other kinds of searches are available: equality search and search using regular expressions.
Equality searches are indicated by prefixing the search string with an equals sign (=). For example, the query
``tag:"=science"`` will match "science", but not "science fiction". Regular expression searches are
indicated by prefixing the search string with a tilde (~). Any python-compatible regular expression can
be used. Regular expression searches are contains searches unless the expression contains anchors.
Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash.
You can build advanced search queries easily using the :guilabel:`Advanced Search Dialog`, accessed by You can build advanced search queries easily using the :guilabel:`Advanced Search Dialog`, accessed by
clicking the button |sbi|. clicking the button |sbi|.