mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle positions when matching on non BMP chars on narrow python builds correctly
This commit is contained in:
parent
b4e2b9e93f
commit
bdbc6ccfaa
@ -17,6 +17,7 @@ from PyQt4.Qt import (
|
|||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
from calibre.gui2 import error_dialog, choose_files, choose_save_file
|
from calibre.gui2 import error_dialog, choose_files, choose_save_file
|
||||||
from calibre.gui2.tweak_book import tprefs
|
from calibre.gui2.tweak_book import tprefs
|
||||||
|
from calibre.utils.matcher import get_char, Matcher
|
||||||
|
|
||||||
class Dialog(QDialog):
|
class Dialog(QDialog):
|
||||||
|
|
||||||
@ -309,7 +310,8 @@ class Results(QWidget):
|
|||||||
positions = sorted(set(positions) - {-1}, reverse=True)
|
positions = sorted(set(positions) - {-1}, reverse=True)
|
||||||
text = prepare_string_for_xml(text)
|
text = prepare_string_for_xml(text)
|
||||||
for p in positions:
|
for p in positions:
|
||||||
text = '%s<span style="%s">%s</span>%s' % (text[:p], self.EMPH, text[p], text[p+1:])
|
ch = get_char(text, p)
|
||||||
|
text = '%s<span style="%s">%s</span>%s' % (text[:p], self.EMPH, ch, text[p+len(ch):])
|
||||||
text = QStaticText(text)
|
text = QStaticText(text)
|
||||||
text.setTextFormat(Qt.RichText)
|
text.setTextFormat(Qt.RichText)
|
||||||
return text
|
return text
|
||||||
@ -363,7 +365,6 @@ class Results(QWidget):
|
|||||||
class QuickOpen(Dialog):
|
class QuickOpen(Dialog):
|
||||||
|
|
||||||
def __init__(self, items, parent=None):
|
def __init__(self, items, parent=None):
|
||||||
from calibre.utils.matcher import Matcher
|
|
||||||
self.matcher = Matcher(items)
|
self.matcher = Matcher(items)
|
||||||
self.matches = ()
|
self.matches = ()
|
||||||
self.selected_result = None
|
self.selected_result = None
|
||||||
|
@ -155,6 +155,10 @@ static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) {
|
static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) {
|
||||||
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
|
#error Not implemented for python >= 3.3
|
||||||
|
#endif
|
||||||
|
|
||||||
// The positions array stores character positions as byte offsets in string, convert them into character offsets
|
// The positions array stores character positions as byte offsets in string, convert them into character offsets
|
||||||
int32_t i, *end;
|
int32_t i, *end;
|
||||||
|
|
||||||
@ -163,7 +167,11 @@ static void convert_positions(int32_t *positions, int32_t *final_positions, UCha
|
|||||||
end = final_positions + char_len;
|
end = final_positions + char_len;
|
||||||
for (i = 0; i < byte_len && final_positions < end; i++) {
|
for (i = 0; i < byte_len && final_positions < end; i++) {
|
||||||
if (positions[i] == -1) continue;
|
if (positions[i] == -1) continue;
|
||||||
|
#ifdef Py_UNICODE_WIDE
|
||||||
*final_positions = u_countChar32(string, positions[i]);
|
*final_positions = u_countChar32(string, positions[i]);
|
||||||
|
#else
|
||||||
|
*final_positions = positions[i];
|
||||||
|
#endif
|
||||||
final_positions += 1;
|
final_positions += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,6 +139,7 @@ class FilesystemMatcher(Matcher):
|
|||||||
def __init__(self, basedir, *args, **kwargs):
|
def __init__(self, basedir, *args, **kwargs):
|
||||||
Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs)
|
Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs)
|
||||||
|
|
||||||
|
# Python implementation of the scoring algorithm {{{
|
||||||
def calc_score_for_char(ctx, prev, current, distance):
|
def calc_score_for_char(ctx, prev, current, distance):
|
||||||
factor = 1.0
|
factor = 1.0
|
||||||
ans = ctx.max_score_per_char
|
ans = ctx.max_score_per_char
|
||||||
@ -202,11 +203,11 @@ class PyScorer(object):
|
|||||||
self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0
|
self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0
|
||||||
self.memory = {}
|
self.memory = {}
|
||||||
yield process_item(self, item, needle)
|
yield process_item(self, item, needle)
|
||||||
|
# }}}
|
||||||
|
|
||||||
class CScorer(object):
|
class CScorer(object):
|
||||||
|
|
||||||
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
|
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
|
||||||
|
|
||||||
speedup, err = plugins['matcher']
|
speedup, err = plugins['matcher']
|
||||||
if speedup is None:
|
if speedup is None:
|
||||||
raise PluginFailed('Failed to load the matcher plugin with error: %s' % err)
|
raise PluginFailed('Failed to load the matcher plugin with error: %s' % err)
|
||||||
@ -217,14 +218,6 @@ class CScorer(object):
|
|||||||
for score, pos in izip(scores, positions):
|
for score, pos in izip(scores, positions):
|
||||||
yield score, pos
|
yield score, pos
|
||||||
|
|
||||||
def test2():
|
|
||||||
items = ['.driveinfo.calibre', 'Suspense.xls', 'p/parsed/content.opf', 'ns.html']
|
|
||||||
for q in (PyScorer, CScorer):
|
|
||||||
print (q)
|
|
||||||
m = Matcher(items, scorer=q)
|
|
||||||
for item, positions in m('ns').iteritems():
|
|
||||||
print ('\tns', item, positions)
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
items = ['mx\U0001f431nxox']
|
items = ['mx\U0001f431nxox']
|
||||||
for q in (PyScorer, CScorer):
|
for q in (PyScorer, CScorer):
|
||||||
@ -237,7 +230,6 @@ def test():
|
|||||||
print (item[p], end=' ')
|
print (item[p], end=' ')
|
||||||
print ()
|
print ()
|
||||||
|
|
||||||
|
|
||||||
def test_mem():
|
def test_mem():
|
||||||
from calibre.utils.mem import gc_histogram, diff_hists
|
from calibre.utils.mem import gc_histogram, diff_hists
|
||||||
m = Matcher(['a'])
|
m = Matcher(['a'])
|
||||||
@ -255,6 +247,13 @@ def test_mem():
|
|||||||
h2 = gc_histogram()
|
h2 = gc_histogram()
|
||||||
diff_hists(h1, h2)
|
diff_hists(h1, h2)
|
||||||
|
|
||||||
|
if sys.maxunicode >= 0x10ffff:
|
||||||
|
get_char = lambda string, pos: string[pos]
|
||||||
|
else:
|
||||||
|
def get_char(string, pos):
|
||||||
|
chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds
|
||||||
|
return string[pos:pos+chs]
|
||||||
|
|
||||||
def main(basedir=None, query=None):
|
def main(basedir=None, query=None):
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.utils.terminal import ColoredStream
|
from calibre.utils.terminal import ColoredStream
|
||||||
@ -279,11 +278,12 @@ def main(basedir=None, query=None):
|
|||||||
while positions:
|
while positions:
|
||||||
pos = positions.pop(0)
|
pos = positions.pop(0)
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
break
|
continue
|
||||||
prints(path[p:pos], end='')
|
prints(path[p:pos], end='')
|
||||||
|
ch = get_char(path, pos)
|
||||||
with emph:
|
with emph:
|
||||||
prints(path[pos], end='')
|
prints(ch, end='')
|
||||||
p = pos + 1
|
p = pos + len(ch)
|
||||||
prints(path[p:])
|
prints(path[p:])
|
||||||
query = None
|
query = None
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user