More work on metadata identify

This commit is contained in:
Kovid Goyal 2011-04-04 16:47:08 -06:00
parent 461c128bc2
commit 608cf75dc0
4 changed files with 132 additions and 8 deletions

View File

@ -1,6 +1,9 @@
calibre supports installation from source, only on Linux. calibre supports installation from source, only on Linux.
On Windows and OS X use the provided installers and use
the facilities of the calibre-debug command to hack on the calibre source. Note that you *do not* need to install from source to hack on
the calibre source code. To get started with calibre development,
use a normal calibre install and follow the instructions at
http://calibre-ebook.com/user_manual/develop.html
On Linux, there are two kinds of installation from source possible. On Linux, there are two kinds of installation from source possible.
Note that both kinds require lots of dependencies as well as a Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
sudo python setup.py develop sudo python setup.py develop
Use the -h flag for help on the develop command. Use the -h flag for help on the develop command.

View File

@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
# }}} # }}}
def get_cached_cover_urls(mi):
from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins['identify'])
for p in plugins:
url = p.get_cached_cover_url(mi.identifiers)
if url:
yield (p, url)
class Source(Plugin): class Source(Plugin):
type = _('Metadata source') type = _('Metadata source')

View File

@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
return results return results
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e
# src/calibre/ebooks/metadata/sources/identify.py
from calibre.ebooks.metadata.sources.test import (test_identify,
title_test, authors_test)
test_identify(
[
( # An e-book ISBN not on Amazon, one of the authors is
# unknown to Amazon
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
),
( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']},
[title_test('Learning Python, 3rd Edition',
exact=True), authors_test(['Mark Lutz'])
]
),
( # Sophisticated comment formatting
{'identifiers':{'isbn': '9781416580829'}},
[title_test('Angels & Demons - Movie Tie-In: A Novel',
exact=True), authors_test(['Dan Brown'])]
),
( # No specific problems
{'identifiers':{'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])]
),
( # A newer book
{'identifiers':{'isbn': '9780316044981'}},
[title_test('The Heroes', exact=True),
authors_test(['Joe Abercrombie'])]
),
])
# }}}

View File

@ -14,7 +14,8 @@ from threading import Event
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
from calibre import prints, sanitize_file_name2 from calibre import prints, sanitize_file_name2
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import create_log from calibre.ebooks.metadata.sources.base import (create_log,
get_cached_cover_urls)
def isbn_test(isbn): def isbn_test(isbn):
isbn_ = check_isbn(isbn) isbn_ = check_isbn(isbn)
@ -45,8 +46,75 @@ def authors_test(authors):
return test return test
def init_test(tdir_name):
tdir = tempfile.gettempdir()
lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
log = create_log(open(lf, 'wb'))
abort = Event()
return tdir, lf, log, abort
def test_identify_plugin(name, tests): def test_identify(tests): # {{{
'''
:param tests: List of 2-tuples. Each two tuple is of the form (args,
test_funcs). args is a dict of keyword arguments to pass to
the identify method. test_funcs are callables that accept a
Metadata object and return True iff the object passes the
test.
'''
from calibre.ebooks.metadata.sources.identify import identify
tdir, lf, log, abort = init_test('Full Identify')
times = []
for kwargs, test_funcs in tests:
prints('Running test with:', kwargs)
args = (log, abort)
start_time = time.time()
results = identify(*args, **kwargs)
total_time = time.time() - start_time
times.append(total_time)
if not results:
prints('identify failed to find any results')
break
prints('Found', len(results), 'matches:', end=' ')
prints('Smaller relevance means better match')
for i, mi in enumerate(results):
prints('*'*30, 'Relevance:', i, '*'*30)
prints(mi)
prints('\nCached cover URLs :',
[x[0].name for x in get_cached_cover_urls(mi)])
prints('*'*75, '\n\n')
possibles = []
for mi in results:
test_failed = False
for tfunc in test_funcs:
if not tfunc(mi):
test_failed = True
break
if not test_failed:
possibles.append(mi)
if not possibles:
prints('ERROR: No results that passed all tests were found')
prints('Log saved to', lf)
raise SystemExit(1)
if results[0] is not possibles[0]:
prints('Most relevant result failed the tests')
raise SystemExit(1)
prints('Average time per query', sum(times)/len(times))
if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf)
# }}}
def test_identify_plugin(name, tests): # {{{
''' '''
:param name: Plugin name :param name: Plugin name
:param tests: List of 2-tuples. Each two tuple is of the form (args, :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
break break
prints('Testing the identify function of', plugin.name) prints('Testing the identify function of', plugin.name)
tdir = tempfile.gettempdir() tdir, lf, log, abort = init_test(plugin.name)
lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
log = create_log(open(lf, 'wb'))
abort = Event()
prints('Log saved to', lf) prints('Log saved to', lf)
times = [] times = []
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
if os.stat(lf).st_size > 10: if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf) prints('There were some errors/warnings, see log', lf)
# }}}