Speed up searching a little by using a dedicated function for testing if a string contains a substring using primary collation (replaces using primary_find()

This commit is contained in:
Kovid Goyal 2014-03-08 22:08:31 +05:30
parent 4eaee89487
commit b76cc3e9ab
6 changed files with 53 additions and 6 deletions

View File

@ -16,7 +16,7 @@ from calibre.constants import preferred_encoding
from calibre.db.utils import force_to_bool
from calibre.utils.config_base import prefs
from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
from calibre.utils.icu import primary_find, sort_key
from calibre.utils.icu import primary_contains, sort_key
from calibre.utils.localization import lang_map, canonicalize_lang
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
@ -73,7 +73,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True):
return True
elif matchkind == CONTAINS_MATCH:
if use_primary_find_in_search:
if primary_find(query, t)[0] != -1:
if primary_contains(query, t):
return True
elif query in t:
return True

View File

@ -14,13 +14,13 @@ from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt, pyqtSignal, QObject,
QApplication, QListView, QPoint, QModelIndex, QFont, QFontInfo)
from calibre.constants import isosx, get_osx_version
from calibre.utils.icu import sort_key, primary_startswith, primary_find
from calibre.utils.icu import sort_key, primary_startswith, primary_contains
from calibre.gui2 import NONE
from calibre.gui2.widgets import EnComboBox, LineEditECM
from calibre.utils.config import tweaks
def containsq(x, prefix):
return primary_find(prefix, x)[0] != -1
return primary_contains(prefix, x)
class CompleteModel(QAbstractListModel): # {{{

View File

@ -9,7 +9,7 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.tag_editor_ui import Ui_TagEditor
from calibre.gui2 import question_dialog, error_dialog, gprefs
from calibre.constants import islinux
from calibre.utils.icu import sort_key, primary_find
from calibre.utils.icu import sort_key, primary_contains
class TagEditor(QDialog, Ui_TagEditor):
@ -178,7 +178,7 @@ class TagEditor(QDialog, Ui_TagEditor):
q = icu_lower(unicode(filter_value))
for i in xrange(collection.count()): # on every available tag
item = collection.item(i)
item.setHidden(bool(q and primary_find(q, unicode(item.text()))[0] == -1))
item.setHidden(bool(q and not primary_contains(q, unicode(item.text()))))
def accept(self):
self.save_state()

View File

@ -229,6 +229,39 @@ end:
return (PyErr_Occurred()) ? NULL : Py_BuildValue("ii", pos, length);
} // }}}
// Collator.contains {{{
static PyObject *
icu_Collator_contains(icu_Collator *self, PyObject *args, PyObject *kwargs) {
PyObject *a_ = NULL, *b_ = NULL;
UChar *a = NULL, *b = NULL;
int32_t asz = 0, bsz = 0, pos = -1;
uint8_t found = 0;
UErrorCode status = U_ZERO_ERROR;
UStringSearch *search = NULL;
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
a = python_to_icu(a_, &asz, 1);
if (a == NULL) goto end;
if (asz == 0) { found = TRUE; goto end; }
b = python_to_icu(b_, &bsz, 1);
if (b == NULL) goto end;
search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
if (U_SUCCESS(status)) {
pos = usearch_first(search, &status);
if (pos != USEARCH_DONE) found = TRUE;
}
end:
if (search != NULL) usearch_close(search);
if (a != NULL) free(a);
if (b != NULL) free(b);
if (PyErr_Occurred()) return NULL;
if (found) Py_RETURN_TRUE;
Py_RETURN_FALSE;
} // }}}
// Collator.contractions {{{
static PyObject *
icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) {
@ -366,6 +399,10 @@ static PyMethodDef icu_Collator_methods[] = {
"find(pattern, source) -> returns the position and length of the first occurrence of pattern in source. Returns (-1, -1) if not found."
},
{"contains", (PyCFunction)icu_Collator_contains, METH_VARARGS,
"contains(pattern, source) -> return True iff the pattern was found in the source."
},
{"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS,
"contractions() -> returns the contractions defined for this collator."
},

View File

@ -203,6 +203,10 @@ find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func=
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')

View File

@ -102,6 +102,12 @@ class TestICU(unittest.TestCase):
self.assertFalse(icu.startswith('xyz', 'a'))
self.assertTrue(icu.startswith('xxx', ''))
self.assertTrue(icu.primary_startswith('pena', 'peña'))
self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
self.assertTrue(icu.contains('something', 'some other something else'))
self.assertTrue(icu.contains('', 'a'))
self.assertTrue(icu.contains('', ''))
self.assertFalse(icu.contains('xxx', 'xx'))
self.assertTrue(icu.primary_contains('pena', 'peña'))
def test_collation_order(self):
'Testing collation ordering'