mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
...
This commit is contained in:
parent
3e1a43e86a
commit
d1859b0f78
@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
|
||||
msprefs.defaults['txt_comments'] = False
|
||||
msprefs.defaults['ignore_fields'] = []
|
||||
msprefs.defaults['max_tags'] = 10
|
||||
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
||||
|
||||
def create_log(ostream=None):
|
||||
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
||||
|
@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.utils.html2text import html2text
|
||||
|
||||
# How long to wait for more results after first result is found
|
||||
WAIT_AFTER_FIRST_RESULT = 30 # seconds
|
||||
|
||||
# Download worker {{{
|
||||
class Worker(Thread):
|
||||
|
||||
def __init__(self, plugin, kwargs, abort):
|
||||
@ -47,99 +45,9 @@ def is_worker_alive(workers):
|
||||
return True
|
||||
return False
|
||||
|
||||
def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
||||
start_time = time.time()
|
||||
plugins = list(metadata_plugins['identify'])
|
||||
|
||||
kwargs = {
|
||||
'title': title,
|
||||
'authors': authors,
|
||||
'identifiers': identifiers,
|
||||
'timeout': timeout,
|
||||
}
|
||||
|
||||
log('Running identify query with parameters:')
|
||||
log(kwargs)
|
||||
log('Using plugins:', ', '.join([p.name for p in plugins]))
|
||||
log('The log (if any) from individual plugins is below')
|
||||
|
||||
workers = [Worker(p, kwargs, abort) for p in plugins]
|
||||
for w in workers:
|
||||
w.start()
|
||||
|
||||
first_result_at = None
|
||||
results = dict.fromkeys(plugins, [])
|
||||
|
||||
def get_results():
|
||||
found = False
|
||||
for w in workers:
|
||||
try:
|
||||
result = w.rq.get_nowait()
|
||||
except Empty:
|
||||
pass
|
||||
else:
|
||||
results[w.plugin].append(result)
|
||||
found = True
|
||||
return found
|
||||
|
||||
while True:
|
||||
time.sleep(0.2)
|
||||
|
||||
if get_results() and first_result_at is None:
|
||||
first_result_at = time.time()
|
||||
|
||||
if not is_worker_alive(workers):
|
||||
break
|
||||
|
||||
if (first_result_at is not None and time.time() - first_result_at <
|
||||
WAIT_AFTER_FIRST_RESULT):
|
||||
log('Not waiting any longer for more results')
|
||||
abort.set()
|
||||
break
|
||||
|
||||
get_results()
|
||||
sort_kwargs = dict(kwargs)
|
||||
for k in list(sort_kwargs.iterkeys()):
|
||||
if k not in ('title', 'authors', 'identifiers'):
|
||||
sort_kwargs.pop(k)
|
||||
|
||||
for plugin, results in results.iteritems():
|
||||
results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
|
||||
plog = plugin.buf.getvalue().strip()
|
||||
if plog:
|
||||
log('\n'+'*'*35, plugin.name, '*'*35)
|
||||
log('Found %d results'%len(results))
|
||||
log(plog)
|
||||
log('\n'+'*'*80)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
result.relevance_in_source = i
|
||||
result.has_cached_cover_url = \
|
||||
plugin.get_cached_cover_url(result.identifiers) is not None
|
||||
result.identify_plugin = plugin
|
||||
|
||||
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
||||
log('Merging results from different sources and finding earliest',
|
||||
'publication dates')
|
||||
start_time = time.time()
|
||||
results = merge_identify_results(results, log)
|
||||
log('We have %d merged results, merging took: %.2f seconds' %
|
||||
(len(results), time.time() - start_time))
|
||||
|
||||
if msprefs['txt_comments']:
|
||||
for r in results:
|
||||
if r.plugin.has_html_comments and r.comments:
|
||||
r.comments = html2text(r.comments)
|
||||
|
||||
dummy = Metadata(_('Unknown'))
|
||||
max_tags = msprefs['max_tags']
|
||||
for f in msprefs['ignore_fields']:
|
||||
for r in results:
|
||||
setattr(r, f, getattr(dummy, f))
|
||||
r.tags = r.tags[:max_tags]
|
||||
|
||||
return results
|
||||
# }}}
|
||||
|
||||
# Merge results from different sources {{{
|
||||
|
||||
class ISBNMerge(object):
|
||||
|
||||
@ -298,6 +206,102 @@ def merge_identify_results(result_map, log):
|
||||
|
||||
return isbn_merge.finalize()
|
||||
|
||||
# }}}
|
||||
|
||||
def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
||||
start_time = time.time()
|
||||
plugins = list(metadata_plugins['identify'])
|
||||
|
||||
kwargs = {
|
||||
'title': title,
|
||||
'authors': authors,
|
||||
'identifiers': identifiers,
|
||||
'timeout': timeout,
|
||||
}
|
||||
|
||||
log('Running identify query with parameters:')
|
||||
log(kwargs)
|
||||
log('Using plugins:', ', '.join([p.name for p in plugins]))
|
||||
log('The log (if any) from individual plugins is below')
|
||||
|
||||
workers = [Worker(p, kwargs, abort) for p in plugins]
|
||||
for w in workers:
|
||||
w.start()
|
||||
|
||||
first_result_at = None
|
||||
results = dict.fromkeys(plugins, [])
|
||||
|
||||
def get_results():
|
||||
found = False
|
||||
for w in workers:
|
||||
try:
|
||||
result = w.rq.get_nowait()
|
||||
except Empty:
|
||||
pass
|
||||
else:
|
||||
results[w.plugin].append(result)
|
||||
found = True
|
||||
return found
|
||||
|
||||
wait_time = msprefs['wait_after_first_identify_result']
|
||||
while True:
|
||||
time.sleep(0.2)
|
||||
|
||||
if get_results() and first_result_at is None:
|
||||
first_result_at = time.time()
|
||||
|
||||
if not is_worker_alive(workers):
|
||||
break
|
||||
|
||||
if (first_result_at is not None and time.time() - first_result_at <
|
||||
wait_time):
|
||||
log('Not waiting any longer for more results')
|
||||
abort.set()
|
||||
break
|
||||
|
||||
get_results()
|
||||
sort_kwargs = dict(kwargs)
|
||||
for k in list(sort_kwargs.iterkeys()):
|
||||
if k not in ('title', 'authors', 'identifiers'):
|
||||
sort_kwargs.pop(k)
|
||||
|
||||
for plugin, results in results.iteritems():
|
||||
results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
|
||||
plog = plugin.buf.getvalue().strip()
|
||||
if plog:
|
||||
log('\n'+'*'*35, plugin.name, '*'*35)
|
||||
log('Found %d results'%len(results))
|
||||
log(plog)
|
||||
log('\n'+'*'*80)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
result.relevance_in_source = i
|
||||
result.has_cached_cover_url = \
|
||||
plugin.get_cached_cover_url(result.identifiers) is not None
|
||||
result.identify_plugin = plugin
|
||||
|
||||
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
||||
log('Merging results from different sources and finding earliest',
|
||||
'publication dates')
|
||||
start_time = time.time()
|
||||
results = merge_identify_results(results, log)
|
||||
log('We have %d merged results, merging took: %.2f seconds' %
|
||||
(len(results), time.time() - start_time))
|
||||
|
||||
if msprefs['txt_comments']:
|
||||
for r in results:
|
||||
if r.plugin.has_html_comments and r.comments:
|
||||
r.comments = html2text(r.comments)
|
||||
|
||||
dummy = Metadata(_('Unknown'))
|
||||
max_tags = msprefs['max_tags']
|
||||
for f in msprefs['ignore_fields']:
|
||||
for r in results:
|
||||
setattr(r, f, getattr(dummy, f))
|
||||
r.tags = r.tags[:max_tags]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user