From b8e414f18bedb21ff6e5badb19e9d3851d824551 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Mar 2014 21:12:38 +0530 Subject: [PATCH] Revert a part of the previous commit that was left in by mistake and also add a test for handling of positions when the haystack contains non-BMP chars --- src/calibre/utils/matcher.c | 2 -- src/calibre/utils/matcher.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/calibre/utils/matcher.c b/src/calibre/utils/matcher.c index bf5dead056..209a7e390d 100644 --- a/src/calibre/utils/matcher.c +++ b/src/calibre/utils/matcher.c @@ -160,8 +160,6 @@ static void convert_positions(int32_t *positions, int32_t *final_positions, UCha if (score == 0.0) { for (i = 0; i < char_len; i++) final_positions[i] = -1; return; } - if (char_len == byte_len) { memcpy(final_positions, positions, sizeof(*positions) * char_len); return; } - end = final_positions + char_len; for (i = 0; i < byte_len && final_positions < end; i++) { if (positions[i] == -1) continue; diff --git a/src/calibre/utils/matcher.py b/src/calibre/utils/matcher.py index a07aa75875..de7ed95f4b 100644 --- a/src/calibre/utils/matcher.py +++ b/src/calibre/utils/matcher.py @@ -217,14 +217,17 @@ def test2(): print ('\tns', item, positions) def test(): - items = ['m1mn34o/mno', 'xxx/XXX', 'mxnxox'] + items = ['mx\U0001f431nxox'] for q in (PyScorer, CScorer): print (q) m = Matcher(items, scorer=q) for item, positions in m('MNO').iteritems(): print ('\tMNO', item, positions) - for item, positions in m('xxx').iteritems(): - print ('\txxx', item, positions) + if -1 not in positions: + for p in positions: + print (item[p], end=' ') + print () + def test_mem(): from calibre.utils.mem import gc_histogram, diff_hists @@ -277,4 +280,5 @@ def main(basedir=None, query=None): if __name__ == '__main__': # main(basedir='/t', query='ns') + # test() main()