More work on metadata identify

2025-07-09 03:04:10 -04:00 · 2011-04-04 16:47:08 -06:00 · 2011-04-04 16:47:08 -06:00 · 608cf75dc0
commit 608cf75dc0
parent 461c128bc2
4 changed files with 132 additions and 8 deletions
--- a/8
+++ b/8
@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
-the facilities of the calibre-debug command to hack on the calibre source. 
+
+Note that you *do not* need to install from source to hack on
+the calibre source code. To get started with calibre development,
+use a normal calibre install and follow the instructions at
+http://calibre-ebook.com/user_manual/develop.html

 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
    sudo python setup.py develop

 Use the -h flag for help on the develop command.
+
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):

 # }}}

+def get_cached_cover_urls(mi):
+    from calibre.customize.ui import metadata_plugins
+    plugins = list(metadata_plugins['identify'])
+    for p in plugins:
+        url = p.get_cached_cover_url(mi.identifiers)
+        if url:
+            yield (p, url)
+
+
 class Source(Plugin):

    type = _('Metadata source')
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):

    return results

+if __name__ == '__main__': # tests {{{
+    # To run these test use: calibre-debug -e
+    # src/calibre/ebooks/metadata/sources/identify.py
+    from calibre.ebooks.metadata.sources.test import (test_identify,
+            title_test, authors_test)
+    test_identify(
+        [

+            ( # An e-book ISBN not on Amazon, one of the authors is
+              # unknown to Amazon
+                {'identifiers':{'isbn': '9780307459671'},
+                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
+                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]

+            ),
+
+            (  # This isbn not on amazon
+                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+                    'authors':['Lutz']},
+                [title_test('Learning Python, 3rd Edition',
+                    exact=True), authors_test(['Mark Lutz'])
+                 ]
+
+            ),
+
+            ( # Sophisticated comment formatting
+                {'identifiers':{'isbn': '9781416580829'}},
+                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                    exact=True), authors_test(['Dan Brown'])]
+            ),
+
+            ( # No specific problems
+                {'identifiers':{'isbn': '0743273567'}},
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['F. Scott Fitzgerald'])]
+            ),
+
+            (  # A newer book
+                {'identifiers':{'isbn': '9780316044981'}},
+                [title_test('The Heroes', exact=True),
+                    authors_test(['Joe Abercrombie'])]
+
+            ),
+
+        ])
+# }}}

--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
+        get_cached_cover_urls)

 def isbn_test(isbn):
    isbn_ = check_isbn(isbn)
@ -45,8 +46,75 @@ def authors_test(authors):

    return test

+def init_test(tdir_name):
+    tdir = tempfile.gettempdir()
+    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
+    log = create_log(open(lf, 'wb'))
+    abort = Event()
+    return tdir, lf, log, abort

-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
+    '''
+    :param tests: List of 2-tuples. Each two tuple is of the form (args,
+                  test_funcs). args is a dict of keyword arguments to pass to
+                  the identify method. test_funcs are callables that accept a
+                  Metadata object and return True iff the object passes the
+                  test.
+    '''
+    from calibre.ebooks.metadata.sources.identify import identify
+
+    tdir, lf, log, abort = init_test('Full Identify')
+
+    times = []
+
+    for kwargs, test_funcs in tests:
+        prints('Running test with:', kwargs)
+        args = (log, abort)
+        start_time = time.time()
+        results = identify(*args, **kwargs)
+        total_time = time.time() - start_time
+        times.append(total_time)
+        if not results:
+            prints('identify failed to find any results')
+            break
+
+        prints('Found', len(results), 'matches:', end=' ')
+        prints('Smaller relevance means better match')
+
+        for i, mi in enumerate(results):
+            prints('*'*30, 'Relevance:', i, '*'*30)
+            prints(mi)
+            prints('\nCached cover URLs    :',
+                    [x[0].name for x in get_cached_cover_urls(mi)])
+            prints('*'*75, '\n\n')
+
+        possibles = []
+        for mi in results:
+            test_failed = False
+            for tfunc in test_funcs:
+                if not tfunc(mi):
+                    test_failed = True
+                    break
+            if not test_failed:
+                possibles.append(mi)
+
+        if not possibles:
+            prints('ERROR: No results that passed all tests were found')
+            prints('Log saved to', lf)
+            raise SystemExit(1)
+
+        if results[0] is not possibles[0]:
+            prints('Most relevant result failed the tests')
+            raise SystemExit(1)
+
+    prints('Average time per query', sum(times)/len(times))
+
+    if os.stat(lf).st_size > 10:
+        prints('There were some errors/warnings, see log', lf)
+
+# }}}
+
+def test_identify_plugin(name, tests): # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
            break
    prints('Testing the identify function of', plugin.name)

-    tdir = tempfile.gettempdir()
-    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
-    log = create_log(open(lf, 'wb'))
-    abort = Event()
+    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
+# }}}