diff --git a/src/calibre/gui2/tweak_book/widgets.py b/src/calibre/gui2/tweak_book/widgets.py index f9bde08f87..cb63cff199 100644 --- a/src/calibre/gui2/tweak_book/widgets.py +++ b/src/calibre/gui2/tweak_book/widgets.py @@ -17,6 +17,7 @@ from PyQt4.Qt import ( from calibre import prepare_string_for_xml from calibre.gui2 import error_dialog, choose_files, choose_save_file from calibre.gui2.tweak_book import tprefs +from calibre.utils.matcher import get_char, Matcher class Dialog(QDialog): @@ -309,7 +310,8 @@ class Results(QWidget): positions = sorted(set(positions) - {-1}, reverse=True) text = prepare_string_for_xml(text) for p in positions: - text = '%s%s%s' % (text[:p], self.EMPH, text[p], text[p+1:]) + ch = get_char(text, p) + text = '%s%s%s' % (text[:p], self.EMPH, ch, text[p+len(ch):]) text = QStaticText(text) text.setTextFormat(Qt.RichText) return text @@ -363,7 +365,6 @@ class Results(QWidget): class QuickOpen(Dialog): def __init__(self, items, parent=None): - from calibre.utils.matcher import Matcher self.matcher = Matcher(items) self.matches = () self.selected_result = None diff --git a/src/calibre/utils/matcher.c b/src/calibre/utils/matcher.c index 209a7e390d..c2c2210dad 100644 --- a/src/calibre/utils/matcher.c +++ b/src/calibre/utils/matcher.c @@ -155,6 +155,10 @@ static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, i } static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) { +#if PY_VERSION_HEX >= 0x03030000 +#error Not implemented for python >= 3.3 +#endif + // The positions array stores character positions as byte offsets in string, convert them into character offsets int32_t i, *end; @@ -163,7 +167,11 @@ static void convert_positions(int32_t *positions, int32_t *final_positions, UCha end = final_positions + char_len; for (i = 0; i < byte_len && final_positions < end; i++) { if (positions[i] == -1) continue; +#ifdef Py_UNICODE_WIDE *final_positions = u_countChar32(string, positions[i]); +#else + *final_positions = positions[i]; +#endif final_positions += 1; } } diff --git a/src/calibre/utils/matcher.py b/src/calibre/utils/matcher.py index 7ee20a9501..bf74d3b42d 100644 --- a/src/calibre/utils/matcher.py +++ b/src/calibre/utils/matcher.py @@ -139,6 +139,7 @@ class FilesystemMatcher(Matcher): def __init__(self, basedir, *args, **kwargs): Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs) +# Python implementation of the scoring algorithm {{{ def calc_score_for_char(ctx, prev, current, distance): factor = 1.0 ans = ctx.max_score_per_char @@ -202,11 +203,11 @@ class PyScorer(object): self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0 self.memory = {} yield process_item(self, item, needle) +# }}} class CScorer(object): def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3): - speedup, err = plugins['matcher'] if speedup is None: raise PluginFailed('Failed to load the matcher plugin with error: %s' % err) @@ -217,14 +218,6 @@ class CScorer(object): for score, pos in izip(scores, positions): yield score, pos -def test2(): - items = ['.driveinfo.calibre', 'Suspense.xls', 'p/parsed/content.opf', 'ns.html'] - for q in (PyScorer, CScorer): - print (q) - m = Matcher(items, scorer=q) - for item, positions in m('ns').iteritems(): - print ('\tns', item, positions) - def test(): items = ['mx\U0001f431nxox'] for q in (PyScorer, CScorer): @@ -237,7 +230,6 @@ def test(): print (item[p], end=' ') print () - def test_mem(): from calibre.utils.mem import gc_histogram, diff_hists m = Matcher(['a']) @@ -255,6 +247,13 @@ def test_mem(): h2 = gc_histogram() diff_hists(h1, h2) +if sys.maxunicode >= 0x10ffff: + get_char = lambda string, pos: string[pos] +else: + def get_char(string, pos): + chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds + return string[pos:pos+chs] + def main(basedir=None, query=None): from calibre import prints from calibre.utils.terminal import ColoredStream @@ -279,11 +278,12 @@ def main(basedir=None, query=None): while positions: pos = positions.pop(0) if pos == -1: - break + continue prints(path[p:pos], end='') + ch = get_char(path, pos) with emph: - prints(path[pos], end='') - p = pos + 1 + prints(ch, end='') + p = pos + len(ch) prints(path[p:]) query = None