mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle positions when matching on non BMP chars on narrow python builds correctly
This commit is contained in:
parent
b4e2b9e93f
commit
bdbc6ccfaa
@ -17,6 +17,7 @@ from PyQt4.Qt import (
|
||||
from calibre import prepare_string_for_xml
|
||||
from calibre.gui2 import error_dialog, choose_files, choose_save_file
|
||||
from calibre.gui2.tweak_book import tprefs
|
||||
from calibre.utils.matcher import get_char, Matcher
|
||||
|
||||
class Dialog(QDialog):
|
||||
|
||||
@ -309,7 +310,8 @@ class Results(QWidget):
|
||||
positions = sorted(set(positions) - {-1}, reverse=True)
|
||||
text = prepare_string_for_xml(text)
|
||||
for p in positions:
|
||||
text = '%s<span style="%s">%s</span>%s' % (text[:p], self.EMPH, text[p], text[p+1:])
|
||||
ch = get_char(text, p)
|
||||
text = '%s<span style="%s">%s</span>%s' % (text[:p], self.EMPH, ch, text[p+len(ch):])
|
||||
text = QStaticText(text)
|
||||
text.setTextFormat(Qt.RichText)
|
||||
return text
|
||||
@ -363,7 +365,6 @@ class Results(QWidget):
|
||||
class QuickOpen(Dialog):
|
||||
|
||||
def __init__(self, items, parent=None):
|
||||
from calibre.utils.matcher import Matcher
|
||||
self.matcher = Matcher(items)
|
||||
self.matches = ()
|
||||
self.selected_result = None
|
||||
|
@ -155,6 +155,10 @@ static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, i
|
||||
}
|
||||
|
||||
static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
#error Not implemented for python >= 3.3
|
||||
#endif
|
||||
|
||||
// The positions array stores character positions as byte offsets in string, convert them into character offsets
|
||||
int32_t i, *end;
|
||||
|
||||
@ -163,7 +167,11 @@ static void convert_positions(int32_t *positions, int32_t *final_positions, UCha
|
||||
end = final_positions + char_len;
|
||||
for (i = 0; i < byte_len && final_positions < end; i++) {
|
||||
if (positions[i] == -1) continue;
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
*final_positions = u_countChar32(string, positions[i]);
|
||||
#else
|
||||
*final_positions = positions[i];
|
||||
#endif
|
||||
final_positions += 1;
|
||||
}
|
||||
}
|
||||
|
@ -139,6 +139,7 @@ class FilesystemMatcher(Matcher):
|
||||
def __init__(self, basedir, *args, **kwargs):
|
||||
Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs)
|
||||
|
||||
# Python implementation of the scoring algorithm {{{
|
||||
def calc_score_for_char(ctx, prev, current, distance):
|
||||
factor = 1.0
|
||||
ans = ctx.max_score_per_char
|
||||
@ -202,11 +203,11 @@ class PyScorer(object):
|
||||
self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0
|
||||
self.memory = {}
|
||||
yield process_item(self, item, needle)
|
||||
# }}}
|
||||
|
||||
class CScorer(object):
|
||||
|
||||
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
|
||||
|
||||
speedup, err = plugins['matcher']
|
||||
if speedup is None:
|
||||
raise PluginFailed('Failed to load the matcher plugin with error: %s' % err)
|
||||
@ -217,14 +218,6 @@ class CScorer(object):
|
||||
for score, pos in izip(scores, positions):
|
||||
yield score, pos
|
||||
|
||||
def test2():
|
||||
items = ['.driveinfo.calibre', 'Suspense.xls', 'p/parsed/content.opf', 'ns.html']
|
||||
for q in (PyScorer, CScorer):
|
||||
print (q)
|
||||
m = Matcher(items, scorer=q)
|
||||
for item, positions in m('ns').iteritems():
|
||||
print ('\tns', item, positions)
|
||||
|
||||
def test():
|
||||
items = ['mx\U0001f431nxox']
|
||||
for q in (PyScorer, CScorer):
|
||||
@ -237,7 +230,6 @@ def test():
|
||||
print (item[p], end=' ')
|
||||
print ()
|
||||
|
||||
|
||||
def test_mem():
|
||||
from calibre.utils.mem import gc_histogram, diff_hists
|
||||
m = Matcher(['a'])
|
||||
@ -255,6 +247,13 @@ def test_mem():
|
||||
h2 = gc_histogram()
|
||||
diff_hists(h1, h2)
|
||||
|
||||
if sys.maxunicode >= 0x10ffff:
|
||||
get_char = lambda string, pos: string[pos]
|
||||
else:
|
||||
def get_char(string, pos):
|
||||
chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds
|
||||
return string[pos:pos+chs]
|
||||
|
||||
def main(basedir=None, query=None):
|
||||
from calibre import prints
|
||||
from calibre.utils.terminal import ColoredStream
|
||||
@ -279,11 +278,12 @@ def main(basedir=None, query=None):
|
||||
while positions:
|
||||
pos = positions.pop(0)
|
||||
if pos == -1:
|
||||
break
|
||||
continue
|
||||
prints(path[p:pos], end='')
|
||||
ch = get_char(path, pos)
|
||||
with emph:
|
||||
prints(path[pos], end='')
|
||||
p = pos + 1
|
||||
prints(ch, end='')
|
||||
p = pos + len(ch)
|
||||
prints(path[p:])
|
||||
query = None
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user