pep8 and dont fail matcher memleak test is used memory for 10 rounds is zero

This commit is contained in:
Kovid Goyal 2017-02-02 20:30:32 +05:30
parent 8c6433d843
commit dab1bd61e9

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
@ -52,6 +51,8 @@ class Worker(Thread):
self.results.put((False, as_unicode(e)))
# import traceback
# traceback.print_exc()
wlock = Lock()
workers = []
@ -82,7 +83,14 @@ def default_scorer(*args, **kwargs):
class Matcher(object):
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3, scorer=None):
def __init__(
self,
items,
level1=DEFAULT_LEVEL1,
level2=DEFAULT_LEVEL2,
level3=DEFAULT_LEVEL3,
scorer=None
):
with wlock:
if not workers:
requests, results = Queue(), Queue()
@ -94,7 +102,9 @@ class Matcher(object):
tasks = split(items, len(workers))
self.task_maps = [{j: i for j, (i, _) in enumerate(task)} for task in tasks]
scorer = scorer or default_scorer
self.scorers = [scorer(tuple(map(itemgetter(1), task_items))) for task_items in tasks]
self.scorers = [
scorer(tuple(map(itemgetter(1), task_items))) for task_items in tasks
]
self.sort_keys = None
def __call__(self, query, limit=None):
@ -103,7 +113,10 @@ class Matcher(object):
for i, scorer in enumerate(self.scorers):
workers[0].requests.put((i, scorer, query))
if self.sort_keys is None:
self.sort_keys = {i:primary_sort_key(x) for i, x in enumerate(self.items)}
self.sort_keys = {
i: primary_sort_key(x)
for i, x in enumerate(self.items)
}
num = len(self.task_maps)
scores, positions = {}, {}
error = None
@ -122,7 +135,8 @@ class Matcher(object):
if error is not None:
raise Exception('Failed to score items: %s' % error)
items = sorted(((-scores[i], item, positions[i]) for i, item in enumerate(self.items)),
items = sorted(((-scores[i], item, positions[i])
for i, item in enumerate(self.items)),
key=itemgetter(0))
if limit is not None:
del items[limit:]
@ -148,6 +162,7 @@ class FilesystemMatcher(Matcher):
def __init__(self, basedir, *args, **kwargs):
Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs)
# Python implementation of the scoring algorithm {{{
@ -157,7 +172,9 @@ def calc_score_for_char(ctx, prev, current, distance):
if prev in ctx.level1:
factor = 0.9
elif prev in ctx.level2 or (icu_lower(prev) == prev and icu_upper(current) == current):
elif prev in ctx.level2 or (
icu_lower(prev) == prev and icu_upper(current) == current
):
factor = 0.8
elif prev in ctx.level3:
factor = 0.7
@ -189,7 +206,9 @@ def process_item(ctx, haystack, needle):
pos += hidx
distance = pos - last_idx
score_for_char = ctx.max_score_per_char if distance <= 1 else calc_score_for_char(ctx, haystack[pos-1], haystack[pos], distance)
score_for_char = ctx.max_score_per_char if distance <= 1 else calc_score_for_char(
ctx, haystack[pos - 1], haystack[pos], distance
)
hidx = pos + 1
push((hidx, i, last_idx, score, list(positions)))
last_idx = positions[i] = pos
@ -204,9 +223,17 @@ def process_item(ctx, haystack, needle):
class PyScorer(object):
__slots__ = ('level1', 'level2', 'level3', 'max_score_per_char', 'items', 'memory')
__slots__ = (
'level1', 'level2', 'level3', 'max_score_per_char', 'items', 'memory'
)
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
def __init__(
self,
items,
level1=DEFAULT_LEVEL1,
level2=DEFAULT_LEVEL2,
level3=DEFAULT_LEVEL3
):
self.level1, self.level2, self.level3 = level1, level2, level3
self.max_score_per_char = 0
self.items = items
@ -216,16 +243,30 @@ class PyScorer(object):
self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0
self.memory = {}
yield process_item(self, item, needle)
# }}}
class CScorer(object):
def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3):
def __init__(
self,
items,
level1=DEFAULT_LEVEL1,
level2=DEFAULT_LEVEL2,
level3=DEFAULT_LEVEL3
):
speedup, err = plugins['matcher']
if speedup is None:
raise PluginFailed('Failed to load the matcher plugin with error: %s' % err)
self.m = speedup.Matcher(items, primary_collator().capsule, unicode(level1), unicode(level2), unicode(level3))
raise PluginFailed(
'Failed to load the matcher plugin with error: %s' % err
)
self.m = speedup.Matcher(
items,
primary_collator().capsule,
unicode(level1), unicode(level2), unicode(level3)
)
def __call__(self, query):
scores, positions = self.m.calculate_scores(query)
@ -245,8 +286,14 @@ def test(return_tests=False):
m('a')
def doit(c):
m = Matcher([c+'im/one.gif', c+'im/two.gif', c+'text/one.html',], scorer=CScorer)
m = Matcher([
c + 'im/one.gif',
c + 'im/two.gif',
c + 'text/one.html',
],
scorer=CScorer)
m('one')
start = memory()
for i in xrange(10):
doit(str(i))
@ -257,14 +304,16 @@ def test(return_tests=False):
doit(str(i))
gc.collect()
used100 = memory() - start
if used100 > 0 and used10 >= 0:
if used100 > 0 and used10 > 0:
self.assertLessEqual(used100, 2 * used10)
def test_non_bmp(self):
raw = '_\U0001f431-'
m = Matcher([raw], scorer=CScorer)
positions = next(m(raw).itervalues())
self.assertEqual(positions, (0, 1, (2 if sys.maxunicode >= 0x10ffff else 3)))
self.assertEqual(
positions, (0, 1, (2 if sys.maxunicode >= 0x10ffff else 3))
)
if return_tests:
return unittest.TestLoader().loadTestsFromTestCase(Test)
@ -277,11 +326,14 @@ def test(return_tests=False):
TestRunner(verbosity=4)
if sys.maxunicode >= 0x10ffff:
get_char = lambda string, pos: string[pos]
else:
def get_char(string, pos):
chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds
chs = 2 if ('\ud800' <= string[pos] <= '\udbff'
) else 1 # UTF-16 surrogate pair in python narrow builds
return string[pos:pos + chs]
@ -290,7 +342,8 @@ def main(basedir=None, query=None):
from calibre.utils.terminal import ColoredStream
if basedir is None:
try:
basedir = raw_input('Enter directory to scan [%s]: ' % os.getcwdu()).decode(sys.stdin.encoding).strip() or os.getcwdu()
basedir = raw_input('Enter directory to scan [%s]: ' % os.getcwdu()
).decode(sys.stdin.encoding).strip() or os.getcwdu()
except (EOFError, KeyboardInterrupt):
return
m = FilesystemMatcher(basedir)
@ -318,6 +371,7 @@ def main(basedir=None, query=None):
prints(path[p:])
query = None
if __name__ == '__main__':
# main(basedir='/t', query='ns')
# test()