diff --git a/src/calibre/gui2/tweak_book/widgets.py b/src/calibre/gui2/tweak_book/widgets.py
index f9bde08f87..cb63cff199 100644
--- a/src/calibre/gui2/tweak_book/widgets.py
+++ b/src/calibre/gui2/tweak_book/widgets.py
@@ -17,6 +17,7 @@ from PyQt4.Qt import (
from calibre import prepare_string_for_xml
from calibre.gui2 import error_dialog, choose_files, choose_save_file
from calibre.gui2.tweak_book import tprefs
+from calibre.utils.matcher import get_char, Matcher
class Dialog(QDialog):
@@ -309,7 +310,8 @@ class Results(QWidget):
positions = sorted(set(positions) - {-1}, reverse=True)
text = prepare_string_for_xml(text)
for p in positions:
- text = '%s%s%s' % (text[:p], self.EMPH, text[p], text[p+1:])
+ ch = get_char(text, p)
+ text = '%s%s%s' % (text[:p], self.EMPH, ch, text[p+len(ch):])
text = QStaticText(text)
text.setTextFormat(Qt.RichText)
return text
@@ -363,7 +365,6 @@ class Results(QWidget):
class QuickOpen(Dialog):
def __init__(self, items, parent=None):
- from calibre.utils.matcher import Matcher
self.matcher = Matcher(items)
self.matches = ()
self.selected_result = None
diff --git a/src/calibre/utils/matcher.c b/src/calibre/utils/matcher.c
index 209a7e390d..c2c2210dad 100644
--- a/src/calibre/utils/matcher.c
+++ b/src/calibre/utils/matcher.c
@@ -155,6 +155,10 @@ static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, i
}
static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) {
+#if PY_VERSION_HEX >= 0x03030000
+#error Not implemented for python >= 3.3
+#endif
+
// The positions array stores character positions as byte offsets in string, convert them into character offsets
int32_t i, *end;
@@ -163,7 +167,11 @@ static void convert_positions(int32_t *positions, int32_t *final_positions, UCha
end = final_positions + char_len;
for (i = 0; i < byte_len && final_positions < end; i++) {
if (positions[i] == -1) continue;
+#ifdef Py_UNICODE_WIDE
*final_positions = u_countChar32(string, positions[i]);
+#else
+ *final_positions = positions[i];
+#endif
final_positions += 1;
}
}
diff --git a/src/calibre/utils/matcher.py b/src/calibre/utils/matcher.py
index 7ee20a9501..bf74d3b42d 100644
--- a/src/calibre/utils/matcher.py
+++ b/src/calibre/utils/matcher.py
@@ -139,6 +139,7 @@ class FilesystemMatcher(Matcher):
def __init__(self, basedir, *args, **kwargs):
Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs)
+# Python implementation of the scoring algorithm {{{
def calc_score_for_char(ctx, prev, current, distance):
factor = 1.0
ans = ctx.max_score_per_char
@@ -202,11 +203,11 @@ class PyScorer(object):
self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0
self.memory = {}
yield process_item(self, item, needle)
+# }}}
class CScorer(object):
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
-
speedup, err = plugins['matcher']
if speedup is None:
raise PluginFailed('Failed to load the matcher plugin with error: %s' % err)
@@ -217,14 +218,6 @@ class CScorer(object):
for score, pos in izip(scores, positions):
yield score, pos
-def test2():
- items = ['.driveinfo.calibre', 'Suspense.xls', 'p/parsed/content.opf', 'ns.html']
- for q in (PyScorer, CScorer):
- print (q)
- m = Matcher(items, scorer=q)
- for item, positions in m('ns').iteritems():
- print ('\tns', item, positions)
-
def test():
items = ['mx\U0001f431nxox']
for q in (PyScorer, CScorer):
@@ -237,7 +230,6 @@ def test():
print (item[p], end=' ')
print ()
-
def test_mem():
from calibre.utils.mem import gc_histogram, diff_hists
m = Matcher(['a'])
@@ -255,6 +247,13 @@ def test_mem():
h2 = gc_histogram()
diff_hists(h1, h2)
+if sys.maxunicode >= 0x10ffff:
+ get_char = lambda string, pos: string[pos]
+else:
+ def get_char(string, pos):
+ chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds
+ return string[pos:pos+chs]
+
def main(basedir=None, query=None):
from calibre import prints
from calibre.utils.terminal import ColoredStream
@@ -279,11 +278,12 @@ def main(basedir=None, query=None):
while positions:
pos = positions.pop(0)
if pos == -1:
- break
+ continue
prints(path[p:pos], end='')
+ ch = get_char(path, pos)
with emph:
- prints(path[pos], end='')
- p = pos + 1
+ prints(ch, end='')
+ p = pos + len(ch)
prints(path[p:])
query = None