...

2025-08-11 09:13:57 -04:00 · 2011-04-04 11:14:12 -06:00 · 2011-04-04 11:14:12 -06:00 · d1859b0f78
commit d1859b0f78
parent 3e1a43e86a
2 changed files with 100 additions and 95 deletions
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 10
+msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds

 def create_log(ostream=None):
    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text

-# How long to wait for more results after first result is found
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
-
+# Download worker {{{
 class Worker(Thread):

    def __init__(self, plugin, kwargs, abort):
@ -47,99 +45,9 @@ def is_worker_alive(workers):
            return True
    return False

-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
-    start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
-
-    kwargs = {
-            'title': title,
-            'authors': authors,
-            'identifiers': identifiers,
-            'timeout': timeout,
-    }
-
-    log('Running identify query with parameters:')
-    log(kwargs)
-    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
-
-    workers = [Worker(p, kwargs, abort) for p in plugins]
-    for w in workers:
-        w.start()
-
-    first_result_at = None
-    results = dict.fromkeys(plugins, [])
-
-    def get_results():
-        found = False
-        for w in workers:
-            try:
-                result = w.rq.get_nowait()
-            except Empty:
-                pass
-            else:
-                results[w.plugin].append(result)
-                found = True
-        return found
-
-    while True:
-        time.sleep(0.2)
-
-        if get_results() and first_result_at is None:
-            first_result_at = time.time()
-
-        if not is_worker_alive(workers):
-            break
-
-        if (first_result_at is not None and time.time() - first_result_at <
-                WAIT_AFTER_FIRST_RESULT):
-            log('Not waiting any longer for more results')
-            abort.set()
-            break
-
-    get_results()
-    sort_kwargs = dict(kwargs)
-    for k in list(sort_kwargs.iterkeys()):
-        if k not in ('title', 'authors', 'identifiers'):
-            sort_kwargs.pop(k)
-
-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
-        if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
-            log(plog)
-            log('\n'+'*'*80)
-
-        for i, result in enumerate(results):
-            result.relevance_in_source = i
-            result.has_cached_cover_url = \
-                plugin.get_cached_cover_url(result.identifiers) is not None
-            result.identify_plugin = plugin
-
-    log('The identify phase took %.2f seconds'%(time.time() - start_time))
-    log('Merging results from different sources and finding earliest',
-            'publication dates')
-    start_time = time.time()
-    results = merge_identify_results(results, log)
-    log('We have %d merged results, merging took: %.2f seconds' %
-            (len(results), time.time() - start_time))
-
-    if msprefs['txt_comments']:
-        for r in results:
-            if r.plugin.has_html_comments and r.comments:
-                r.comments = html2text(r.comments)
-
-    dummy = Metadata(_('Unknown'))
-    max_tags = msprefs['max_tags']
-    for f in msprefs['ignore_fields']:
-        for r in results:
-            setattr(r, f, getattr(dummy, f))
-            r.tags = r.tags[:max_tags]
-
-    return results
+# }}}

+# Merge results from different sources {{{

 class ISBNMerge(object):

@ -298,6 +206,102 @@ def merge_identify_results(result_map, log):

    return isbn_merge.finalize()

+# }}}
+
+def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+    start_time = time.time()
+    plugins = list(metadata_plugins['identify'])
+
+    kwargs = {
+            'title': title,
+            'authors': authors,
+            'identifiers': identifiers,
+            'timeout': timeout,
+    }
+
+    log('Running identify query with parameters:')
+    log(kwargs)
+    log('Using plugins:', ', '.join([p.name for p in plugins]))
+    log('The log (if any) from individual plugins is below')
+
+    workers = [Worker(p, kwargs, abort) for p in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    results = dict.fromkeys(plugins, [])
+
+    def get_results():
+        found = False
+        for w in workers:
+            try:
+                result = w.rq.get_nowait()
+            except Empty:
+                pass
+            else:
+                results[w.plugin].append(result)
+                found = True
+        return found
+
+    wait_time = msprefs['wait_after_first_identify_result']
+    while True:
+        time.sleep(0.2)
+
+        if get_results() and first_result_at is None:
+            first_result_at = time.time()
+
+        if not is_worker_alive(workers):
+            break
+
+        if (first_result_at is not None and time.time() - first_result_at <
+                wait_time):
+            log('Not waiting any longer for more results')
+            abort.set()
+            break
+
+    get_results()
+    sort_kwargs = dict(kwargs)
+    for k in list(sort_kwargs.iterkeys()):
+        if k not in ('title', 'authors', 'identifiers'):
+            sort_kwargs.pop(k)
+
+    for plugin, results in results.iteritems():
+        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = plugin.buf.getvalue().strip()
+        if plog:
+            log('\n'+'*'*35, plugin.name, '*'*35)
+            log('Found %d results'%len(results))
+            log(plog)
+            log('\n'+'*'*80)
+
+        for i, result in enumerate(results):
+            result.relevance_in_source = i
+            result.has_cached_cover_url = \
+                plugin.get_cached_cover_url(result.identifiers) is not None
+            result.identify_plugin = plugin
+
+    log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('Merging results from different sources and finding earliest',
+            'publication dates')
+    start_time = time.time()
+    results = merge_identify_results(results, log)
+    log('We have %d merged results, merging took: %.2f seconds' %
+            (len(results), time.time() - start_time))
+
+    if msprefs['txt_comments']:
+        for r in results:
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)
+
+    dummy = Metadata(_('Unknown'))
+    max_tags = msprefs['max_tags']
+    for f in msprefs['ignore_fields']:
+        for r in results:
+            setattr(r, f, getattr(dummy, f))
+            r.tags = r.tags[:max_tags]
+
+    return results
+