More work on metadata identify

2025-07-09 03:04:10 -04:00 · 2011-04-04 16:47:08 -06:00 · 2011-04-04 16:47:08 -06:00 · 608cf75dc0
commit 608cf75dc0
parent 461c128bc2
4 changed files with 132 additions and 8 deletions
--- a/8
+++ b/8
@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
+
-the facilities of the calibre-debug command to hack on the calibre source. 
+Note that you *do not* need to install from source to hack on
 the calibre source code. To get started with calibre development,
 use a normal calibre install and follow the instructions at
 http://calibre-ebook.com/user_manual/develop.html
 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
    sudo python setup.py develop
 Use the -h flag for help on the develop command.
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
 # }}}
 def get_cached_cover_urls(mi):
    from calibre.customize.ui import metadata_plugins
    plugins = list(metadata_plugins['identify'])
    for p in plugins:
        url = p.get_cached_cover_url(mi.identifiers)
        if url:
            yield (p, url)
 class Source(Plugin):
    type = _('Metadata source')
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
    return results
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/identify.py
    from calibre.ebooks.metadata.sources.test import (test_identify,
            title_test, authors_test)
    test_identify(
        [
            ( # An e-book ISBN not on Amazon, one of the authors is
              # unknown to Amazon
                {'identifiers':{'isbn': '9780307459671'},
                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
            ),
            (  # This isbn not on amazon
                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
                    'authors':['Lutz']},
                [title_test('Learning Python, 3rd Edition',
                    exact=True), authors_test(['Mark Lutz'])
                 ]
            ),
            ( # Sophisticated comment formatting
                {'identifiers':{'isbn': '9781416580829'}},
                [title_test('Angels & Demons - Movie Tie-In: A Novel',
                    exact=True), authors_test(['Dan Brown'])]
            ),
            ( # No specific problems
                {'identifiers':{'isbn': '0743273567'}},
                [title_test('The great gatsby', exact=True),
                    authors_test(['F. Scott Fitzgerald'])]
            ),
            (  # A newer book
                {'identifiers':{'isbn': '9780316044981'}},
                [title_test('The Heroes', exact=True),
                    authors_test(['Joe Abercrombie'])]
            ),
        ])
 # }}}
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
        get_cached_cover_urls)
 def isbn_test(isbn):
    isbn_ = check_isbn(isbn)
@ -45,8 +46,75 @@ def authors_test(authors):
    return test
 def init_test(tdir_name):
    tdir = tempfile.gettempdir()
    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    return tdir, lf, log, abort
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
    '''
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    from calibre.ebooks.metadata.sources.identify import identify
    tdir, lf, log, abort = init_test('Full Identify')
    times = []
    for kwargs, test_funcs in tests:
        prints('Running test with:', kwargs)
        args = (log, abort)
        start_time = time.time()
        results = identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if not results:
            prints('identify failed to find any results')
            break
        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')
        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URLs    :',
                    [x[0].name for x in get_cached_cover_urls(mi)])
            prints('*'*75, '\n\n')
        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)
        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)
        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)
    prints('Average time per query', sum(times)/len(times))
    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
 # }}}
 def test_identify_plugin(name, tests): # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
            break
    prints('Testing the identify function of', plugin.name)
-    tdir = tempfile.gettempdir()
+    tdir, lf, log, abort = init_test(plugin.name)
    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    prints('Log saved to', lf)
    times = []
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
 # }}}