More work on metadata identify

This commit is contained in:
Kovid Goyal 2011-04-04 16:47:08 -06:00
parent 461c128bc2
commit 608cf75dc0
4 changed files with 132 additions and 8 deletions

View File

@ -1,6 +1,9 @@
calibre supports installation from source, only on Linux.
On Windows and OS X use the provided installers and use
the facilities of the calibre-debug command to hack on the calibre source.
Note that you *do not* need to install from source to hack on
the calibre source code. To get started with calibre development,
use a normal calibre install and follow the instructions at
http://calibre-ebook.com/user_manual/develop.html
On Linux, there are two kinds of installation from source possible.
Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
sudo python setup.py develop
Use the -h flag for help on the develop command.

View File

@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
# }}}
def get_cached_cover_urls(mi):
from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins['identify'])
for p in plugins:
url = p.get_cached_cover_url(mi.identifiers)
if url:
yield (p, url)
class Source(Plugin):
type = _('Metadata source')

View File

@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
return results
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e
# src/calibre/ebooks/metadata/sources/identify.py
from calibre.ebooks.metadata.sources.test import (test_identify,
title_test, authors_test)
test_identify(
[
( # An e-book ISBN not on Amazon, one of the authors is
# unknown to Amazon
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
),
( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']},
[title_test('Learning Python, 3rd Edition',
exact=True), authors_test(['Mark Lutz'])
]
),
( # Sophisticated comment formatting
{'identifiers':{'isbn': '9781416580829'}},
[title_test('Angels & Demons - Movie Tie-In: A Novel',
exact=True), authors_test(['Dan Brown'])]
),
( # No specific problems
{'identifiers':{'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])]
),
( # A newer book
{'identifiers':{'isbn': '9780316044981'}},
[title_test('The Heroes', exact=True),
authors_test(['Joe Abercrombie'])]
),
])
# }}}

View File

@ -14,7 +14,8 @@ from threading import Event
from calibre.customize.ui import metadata_plugins
from calibre import prints, sanitize_file_name2
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import create_log
from calibre.ebooks.metadata.sources.base import (create_log,
get_cached_cover_urls)
def isbn_test(isbn):
isbn_ = check_isbn(isbn)
@ -45,8 +46,75 @@ def authors_test(authors):
return test
def init_test(tdir_name):
tdir = tempfile.gettempdir()
lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
log = create_log(open(lf, 'wb'))
abort = Event()
return tdir, lf, log, abort
def test_identify_plugin(name, tests):
def test_identify(tests): # {{{
'''
:param tests: List of 2-tuples. Each two tuple is of the form (args,
test_funcs). args is a dict of keyword arguments to pass to
the identify method. test_funcs are callables that accept a
Metadata object and return True iff the object passes the
test.
'''
from calibre.ebooks.metadata.sources.identify import identify
tdir, lf, log, abort = init_test('Full Identify')
times = []
for kwargs, test_funcs in tests:
prints('Running test with:', kwargs)
args = (log, abort)
start_time = time.time()
results = identify(*args, **kwargs)
total_time = time.time() - start_time
times.append(total_time)
if not results:
prints('identify failed to find any results')
break
prints('Found', len(results), 'matches:', end=' ')
prints('Smaller relevance means better match')
for i, mi in enumerate(results):
prints('*'*30, 'Relevance:', i, '*'*30)
prints(mi)
prints('\nCached cover URLs :',
[x[0].name for x in get_cached_cover_urls(mi)])
prints('*'*75, '\n\n')
possibles = []
for mi in results:
test_failed = False
for tfunc in test_funcs:
if not tfunc(mi):
test_failed = True
break
if not test_failed:
possibles.append(mi)
if not possibles:
prints('ERROR: No results that passed all tests were found')
prints('Log saved to', lf)
raise SystemExit(1)
if results[0] is not possibles[0]:
prints('Most relevant result failed the tests')
raise SystemExit(1)
prints('Average time per query', sum(times)/len(times))
if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf)
# }}}
def test_identify_plugin(name, tests): # {{{
'''
:param name: Plugin name
:param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
break
prints('Testing the identify function of', plugin.name)
tdir = tempfile.gettempdir()
lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
log = create_log(open(lf, 'wb'))
abort = Event()
tdir, lf, log, abort = init_test(plugin.name)
prints('Log saved to', lf)
times = []
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf)
# }}}