Implement #4830 (exact match searches)

This commit is contained in:
Kovid Goyal 2010-02-12 11:10:13 -07:00
parent a7b5f60f6f
commit 8816449cca
7 changed files with 179 additions and 37 deletions

View File

@ -5,21 +5,30 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.search_ui import Ui_Dialog
from calibre.gui2 import qstring_to_unicode
from calibre.library.database2 import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
class SearchDialog(QDialog, Ui_Dialog):
def __init__(self, *args):
QDialog.__init__(self, *args)
self.setupUi(self)
self.mc = ''
def tokens(self, raw):
phrases = re.findall(r'\s+".*?"\s+', raw)
phrases = re.findall(r'\s*".*?"\s*', raw)
for f in phrases:
raw = raw.replace(f, ' ')
return [t.strip() for t in phrases + raw.split()]
phrases = [t.strip('" ') for t in phrases]
return ['"' + self.mc + t + '"' for t in phrases + [r.strip() for r in raw.split()]]
def search_string(self):
mk = self.matchkind.currentIndex()
if mk == CONTAINS_MATCH:
self.mc = ''
elif mk == EQUALS_MATCH:
self.mc = '='
else:
self.mc = '~'
all, any, phrase, none = map(lambda x: unicode(x.text()),
(self.all, self.any, self.phrase, self.none))
all, any, none = map(self.tokens, (all, any, none))

View File

@ -104,7 +104,64 @@
</widget>
</item>
<item>
<widget class="QLabel" name="label_5" >
<widget class="QGroupBox" name="groupBox" >
<property name="maximumSize" >
<size>
<width>16777215</width>
<height>60</height>
</size>
</property>
<layout class="QHBoxLayout" name="horizontalLayout_5" >
<item>
<widget class="QLabel" name="label_5" >
<property name="text" >
<string>What kind of match to use:</string>
</property>
<property name="buddy" >
<cstring>matchkind</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="matchkind">
<item>
<property name="text">
<string>Contains: the word or phrase matches anywhere in the metadata</string>
</property>
</item>
<item>
<property name="text">
<string>Equals: the word or phrase must match an entire metadata field</string>
</property>
</item>
<item>
<property name="text">
<string>Regular expression: the expression must match anywhere in the metadata</string>
</property>
</item>
</widget>
</item>
<item>
<widget class="QLabel" name="label_51" >
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>40</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text" >
<string> </string>
</property>
<property name="buddy" >
<cstring>matchkind</cstring>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QLabel" name="label_6" >
<property name="maximumSize" >
<size>
<width>16777215</width>

View File

@ -17,7 +17,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
from calibre import strftime
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.pyparsing import ParseException
from calibre.library.database2 import FIELD_MAP
from calibre.library.database2 import FIELD_MAP, _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
@ -893,7 +893,20 @@ class OnDeviceSearch(SearchQueryParser):
def get_matches(self, location, query):
location = location.lower().strip()
query = query.lower().strip()
matchkind = CONTAINS_MATCH
if len(query) > 1:
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
query = query.lower()
if location not in ('title', 'author', 'tag', 'all', 'format'):
return set([])
matches = set([])
@ -904,13 +917,24 @@ class OnDeviceSearch(SearchQueryParser):
'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
'format':lambda x: os.path.splitext(x.path)[1].lower()
}
for i, v in enumerate(locations):
locations[i] = q[v]
for i, r in enumerate(self.model.db):
for loc in locations:
for index, row in enumerate(self.model.db):
for locvalue in locations:
accessor = q[locvalue]
try:
if query in loc(r):
matches.add(i)
### Can't separate authors because comma is used for name sep and author sep
### Exact match might not get what you want. For that reason, turn author
### exactmatch searches into contains searches.
if locvalue == 'author' and matchkind == EQUALS_MATCH:
m = CONTAINS_MATCH
else:
m = matchkind
if locvalue == 'tag':
vals = accessor(row).split(',')
else:
vals = [accessor(row)]
if _match(query, vals, m):
matches.add(index)
break
except ValueError: # Unicode errors
import traceback

View File

@ -173,6 +173,8 @@ class TagsModel(QAbstractItemModel):
if len(data[r]) > 0:
self.beginInsertRows(category_index, 0, len(data[r])-1)
for tag in data[r]:
if r == 'author':
tag.name = tag.name.replace('|', ',')
tag.state = state_map.get(tag.name, 0)
t = TagTreeItem(parent=category, data=tag, icon_map=self.icon_map)
self.endInsertRows()
@ -278,7 +280,7 @@ class TagsModel(QAbstractItemModel):
category = key if key != 'news' else 'tag'
if tag.state > 0:
prefix = ' not ' if tag.state == 2 else ''
ans.append('%s%s:"%s"'%(prefix, category, tag.name))
ans.append('%s%s:"=%s"'%(prefix, category, tag.name))
return ans

View File

@ -669,19 +669,19 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if type == 'series':
series = idx.model().db.series(row)
if series:
search = ['series:'+series]
search = ['series:"'+series+'"']
elif type == 'publisher':
publisher = idx.model().db.publisher(row)
if publisher:
search = ['publisher:'+publisher]
search = ['publisher:"'+publisher+'"']
elif type == 'tag':
tags = idx.model().db.tags(row)
if tags:
search = ['tag:'+t for t in tags.split(',')]
search = ['tag:"='+t+'"' for t in tags.split(',')]
elif type == 'author':
authors = idx.model().db.authors(row)
if authors:
search = ['author:'+a.strip().replace('|', ',') \
search = ['author:"='+a.strip().replace('|', ',')+'"' \
for a in authors.split(',')]
join = ' or '
if search:

View File

@ -174,6 +174,22 @@ class CoverCache(QThread):
self.load_queue.appendleft(id)
self.load_queue_lock.unlock()
### Global utility function for get_match here and in gui2/library.py
CONTAINS_MATCH = 0
EQUALS_MATCH = 1
REGEXP_MATCH = 2
def _match(query, value, matchkind):
for t in value:
t = t.lower()
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
if ((matchkind == EQUALS_MATCH and query == t) or
(matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)):
return True
except re.error:
pass
return False
class ResultCache(SearchQueryParser):
'''
@ -202,10 +218,23 @@ class ResultCache(SearchQueryParser):
matches = set([])
if query and query.strip():
location = location.lower().strip()
query = query.lower()
matchkind = CONTAINS_MATCH
if (len(query) > 1):
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
query = query.lower()
if not isinstance(query, unicode):
query = query.decode('utf-8')
if location in ('tag', 'author', 'format'):
if location in ('tag', 'author', 'format', 'comment'):
location += 's'
all = ('title', 'authors', 'publisher', 'tags', 'comments', 'series', 'formats', 'isbn', 'rating', 'cover')
MAP = {}
@ -219,29 +248,41 @@ class ResultCache(SearchQueryParser):
rating_query = int(query) * 2
except:
rating_query = None
for item in self._data:
if item is None: continue
for loc in location:
if query == 'false' and not item[loc]:
if isinstance(item[loc], basestring):
if item[loc].strip() != '':
continue
matches.add(item[0])
break
if query == 'true' and item[loc]:
for loc in location:
if loc == MAP['authors']:
q = query.replace(',', '|'); ### DB stores authors with commas changed to bars, so change query
else:
q = query
for item in self._data:
if item is None: continue
if not item[loc]:
if query == 'false':
if isinstance(item[loc], basestring):
if item[loc].strip() != '':
continue
matches.add(item[0])
break
continue ### item is empty. No possible matches below
if q == 'true':
if isinstance(item[loc], basestring):
if item[loc].strip() == '':
continue
matches.add(item[0])
break
if rating_query and item[loc] and loc == MAP['rating'] and rating_query == int(item[loc]):
continue
if rating_query and loc == MAP['rating'] and rating_query == int(item[loc]):
matches.add(item[0])
break
if item[loc] and loc not in EXCLUDE_FIELDS and query in item[loc].lower():
matches.add(item[0])
break
return matches
continue
if loc not in EXCLUDE_FIELDS:
if loc == MAP['tags'] or loc == MAP['authors']:
vals = item[loc].split(',') ### check individual tags/authors, not the long string
else:
vals = [item[loc]] ### make into list to make _match happy
if _match(q, vals, matchkind):
matches.add(item[0])
continue
return matches
def remove(self, id):
self._data[id] = None

View File

@ -195,6 +195,15 @@ are available in the LRF format. Some more examples::
title:"The Ring" or "This book is about a ring"
format:epub publisher:feedbooks.com
Searches are by default 'contains'. An item matches if the search string appears anywhere in the indicated metadata.
Two other kinds of searches are available: equality search and search using regular expressions.
Equality searches are indicated by prefixing the search string with an equals sign (=). For example, the query
``tag:"=science"`` will match "science", but not "science fiction". Regular expression searches are
indicated by prefixing the search string with a tilde (~). Any python-compatible regular expression can
be used. Regular expression searches are contains searches unless the expression contains anchors.
Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash.
You can build advanced search queries easily using the :guilabel:`Advanced Search Dialog`, accessed by
clicking the button |sbi|.