mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
...
This commit is contained in:
parent
3e1a43e86a
commit
d1859b0f78
@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
|
|||||||
msprefs.defaults['txt_comments'] = False
|
msprefs.defaults['txt_comments'] = False
|
||||||
msprefs.defaults['ignore_fields'] = []
|
msprefs.defaults['ignore_fields'] = []
|
||||||
msprefs.defaults['max_tags'] = 10
|
msprefs.defaults['max_tags'] = 10
|
||||||
|
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
||||||
|
|
||||||
def create_log(ostream=None):
|
def create_log(ostream=None):
|
||||||
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
||||||
|
@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.utils.date import utc_tz
|
from calibre.utils.date import utc_tz
|
||||||
from calibre.utils.html2text import html2text
|
from calibre.utils.html2text import html2text
|
||||||
|
|
||||||
# How long to wait for more results after first result is found
|
# Download worker {{{
|
||||||
WAIT_AFTER_FIRST_RESULT = 30 # seconds
|
|
||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread):
|
||||||
|
|
||||||
def __init__(self, plugin, kwargs, abort):
|
def __init__(self, plugin, kwargs, abort):
|
||||||
@ -47,99 +45,9 @@ def is_worker_alive(workers):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
# }}}
|
||||||
start_time = time.time()
|
|
||||||
plugins = list(metadata_plugins['identify'])
|
|
||||||
|
|
||||||
kwargs = {
|
|
||||||
'title': title,
|
|
||||||
'authors': authors,
|
|
||||||
'identifiers': identifiers,
|
|
||||||
'timeout': timeout,
|
|
||||||
}
|
|
||||||
|
|
||||||
log('Running identify query with parameters:')
|
|
||||||
log(kwargs)
|
|
||||||
log('Using plugins:', ', '.join([p.name for p in plugins]))
|
|
||||||
log('The log (if any) from individual plugins is below')
|
|
||||||
|
|
||||||
workers = [Worker(p, kwargs, abort) for p in plugins]
|
|
||||||
for w in workers:
|
|
||||||
w.start()
|
|
||||||
|
|
||||||
first_result_at = None
|
|
||||||
results = dict.fromkeys(plugins, [])
|
|
||||||
|
|
||||||
def get_results():
|
|
||||||
found = False
|
|
||||||
for w in workers:
|
|
||||||
try:
|
|
||||||
result = w.rq.get_nowait()
|
|
||||||
except Empty:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
results[w.plugin].append(result)
|
|
||||||
found = True
|
|
||||||
return found
|
|
||||||
|
|
||||||
while True:
|
|
||||||
time.sleep(0.2)
|
|
||||||
|
|
||||||
if get_results() and first_result_at is None:
|
|
||||||
first_result_at = time.time()
|
|
||||||
|
|
||||||
if not is_worker_alive(workers):
|
|
||||||
break
|
|
||||||
|
|
||||||
if (first_result_at is not None and time.time() - first_result_at <
|
|
||||||
WAIT_AFTER_FIRST_RESULT):
|
|
||||||
log('Not waiting any longer for more results')
|
|
||||||
abort.set()
|
|
||||||
break
|
|
||||||
|
|
||||||
get_results()
|
|
||||||
sort_kwargs = dict(kwargs)
|
|
||||||
for k in list(sort_kwargs.iterkeys()):
|
|
||||||
if k not in ('title', 'authors', 'identifiers'):
|
|
||||||
sort_kwargs.pop(k)
|
|
||||||
|
|
||||||
for plugin, results in results.iteritems():
|
|
||||||
results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
|
|
||||||
plog = plugin.buf.getvalue().strip()
|
|
||||||
if plog:
|
|
||||||
log('\n'+'*'*35, plugin.name, '*'*35)
|
|
||||||
log('Found %d results'%len(results))
|
|
||||||
log(plog)
|
|
||||||
log('\n'+'*'*80)
|
|
||||||
|
|
||||||
for i, result in enumerate(results):
|
|
||||||
result.relevance_in_source = i
|
|
||||||
result.has_cached_cover_url = \
|
|
||||||
plugin.get_cached_cover_url(result.identifiers) is not None
|
|
||||||
result.identify_plugin = plugin
|
|
||||||
|
|
||||||
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
|
||||||
log('Merging results from different sources and finding earliest',
|
|
||||||
'publication dates')
|
|
||||||
start_time = time.time()
|
|
||||||
results = merge_identify_results(results, log)
|
|
||||||
log('We have %d merged results, merging took: %.2f seconds' %
|
|
||||||
(len(results), time.time() - start_time))
|
|
||||||
|
|
||||||
if msprefs['txt_comments']:
|
|
||||||
for r in results:
|
|
||||||
if r.plugin.has_html_comments and r.comments:
|
|
||||||
r.comments = html2text(r.comments)
|
|
||||||
|
|
||||||
dummy = Metadata(_('Unknown'))
|
|
||||||
max_tags = msprefs['max_tags']
|
|
||||||
for f in msprefs['ignore_fields']:
|
|
||||||
for r in results:
|
|
||||||
setattr(r, f, getattr(dummy, f))
|
|
||||||
r.tags = r.tags[:max_tags]
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
# Merge results from different sources {{{
|
||||||
|
|
||||||
class ISBNMerge(object):
|
class ISBNMerge(object):
|
||||||
|
|
||||||
@ -298,6 +206,102 @@ def merge_identify_results(result_map, log):
|
|||||||
|
|
||||||
return isbn_merge.finalize()
|
return isbn_merge.finalize()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
||||||
|
start_time = time.time()
|
||||||
|
plugins = list(metadata_plugins['identify'])
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
'title': title,
|
||||||
|
'authors': authors,
|
||||||
|
'identifiers': identifiers,
|
||||||
|
'timeout': timeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
log('Running identify query with parameters:')
|
||||||
|
log(kwargs)
|
||||||
|
log('Using plugins:', ', '.join([p.name for p in plugins]))
|
||||||
|
log('The log (if any) from individual plugins is below')
|
||||||
|
|
||||||
|
workers = [Worker(p, kwargs, abort) for p in plugins]
|
||||||
|
for w in workers:
|
||||||
|
w.start()
|
||||||
|
|
||||||
|
first_result_at = None
|
||||||
|
results = dict.fromkeys(plugins, [])
|
||||||
|
|
||||||
|
def get_results():
|
||||||
|
found = False
|
||||||
|
for w in workers:
|
||||||
|
try:
|
||||||
|
result = w.rq.get_nowait()
|
||||||
|
except Empty:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
results[w.plugin].append(result)
|
||||||
|
found = True
|
||||||
|
return found
|
||||||
|
|
||||||
|
wait_time = msprefs['wait_after_first_identify_result']
|
||||||
|
while True:
|
||||||
|
time.sleep(0.2)
|
||||||
|
|
||||||
|
if get_results() and first_result_at is None:
|
||||||
|
first_result_at = time.time()
|
||||||
|
|
||||||
|
if not is_worker_alive(workers):
|
||||||
|
break
|
||||||
|
|
||||||
|
if (first_result_at is not None and time.time() - first_result_at <
|
||||||
|
wait_time):
|
||||||
|
log('Not waiting any longer for more results')
|
||||||
|
abort.set()
|
||||||
|
break
|
||||||
|
|
||||||
|
get_results()
|
||||||
|
sort_kwargs = dict(kwargs)
|
||||||
|
for k in list(sort_kwargs.iterkeys()):
|
||||||
|
if k not in ('title', 'authors', 'identifiers'):
|
||||||
|
sort_kwargs.pop(k)
|
||||||
|
|
||||||
|
for plugin, results in results.iteritems():
|
||||||
|
results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
|
||||||
|
plog = plugin.buf.getvalue().strip()
|
||||||
|
if plog:
|
||||||
|
log('\n'+'*'*35, plugin.name, '*'*35)
|
||||||
|
log('Found %d results'%len(results))
|
||||||
|
log(plog)
|
||||||
|
log('\n'+'*'*80)
|
||||||
|
|
||||||
|
for i, result in enumerate(results):
|
||||||
|
result.relevance_in_source = i
|
||||||
|
result.has_cached_cover_url = \
|
||||||
|
plugin.get_cached_cover_url(result.identifiers) is not None
|
||||||
|
result.identify_plugin = plugin
|
||||||
|
|
||||||
|
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
||||||
|
log('Merging results from different sources and finding earliest',
|
||||||
|
'publication dates')
|
||||||
|
start_time = time.time()
|
||||||
|
results = merge_identify_results(results, log)
|
||||||
|
log('We have %d merged results, merging took: %.2f seconds' %
|
||||||
|
(len(results), time.time() - start_time))
|
||||||
|
|
||||||
|
if msprefs['txt_comments']:
|
||||||
|
for r in results:
|
||||||
|
if r.plugin.has_html_comments and r.comments:
|
||||||
|
r.comments = html2text(r.comments)
|
||||||
|
|
||||||
|
dummy = Metadata(_('Unknown'))
|
||||||
|
max_tags = msprefs['max_tags']
|
||||||
|
for f in msprefs['ignore_fields']:
|
||||||
|
for r in results:
|
||||||
|
setattr(r, f, getattr(dummy, f))
|
||||||
|
r.tags = r.tags[:max_tags]
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user