mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on metadata identify
This commit is contained in:
parent
461c128bc2
commit
608cf75dc0
8
INSTALL
8
INSTALL
@ -1,6 +1,9 @@
|
||||
calibre supports installation from source, only on Linux.
|
||||
On Windows and OS X use the provided installers and use
|
||||
the facilities of the calibre-debug command to hack on the calibre source.
|
||||
|
||||
Note that you *do not* need to install from source to hack on
|
||||
the calibre source code. To get started with calibre development,
|
||||
use a normal calibre install and follow the instructions at
|
||||
http://calibre-ebook.com/user_manual/develop.html
|
||||
|
||||
On Linux, there are two kinds of installation from source possible.
|
||||
Note that both kinds require lots of dependencies as well as a
|
||||
@ -45,3 +48,4 @@ This type of install can be run with the command::
|
||||
sudo python setup.py develop
|
||||
|
||||
Use the -h flag for help on the develop command.
|
||||
|
||||
|
@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
|
||||
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_urls(mi):
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
plugins = list(metadata_plugins['identify'])
|
||||
for p in plugins:
|
||||
url = p.get_cached_cover_url(mi.identifiers)
|
||||
if url:
|
||||
yield (p, url)
|
||||
|
||||
|
||||
class Source(Plugin):
|
||||
|
||||
type = _('Metadata source')
|
||||
|
@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == '__main__': # tests {{{
|
||||
# To run these test use: calibre-debug -e
|
||||
# src/calibre/ebooks/metadata/sources/identify.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify,
|
||||
title_test, authors_test)
|
||||
test_identify(
|
||||
[
|
||||
|
||||
( # An e-book ISBN not on Amazon, one of the authors is
|
||||
# unknown to Amazon
|
||||
{'identifiers':{'isbn': '9780307459671'},
|
||||
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
|
||||
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
|
||||
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
|
||||
|
||||
),
|
||||
|
||||
( # This isbn not on amazon
|
||||
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
|
||||
'authors':['Lutz']},
|
||||
[title_test('Learning Python, 3rd Edition',
|
||||
exact=True), authors_test(['Mark Lutz'])
|
||||
]
|
||||
|
||||
),
|
||||
|
||||
( # Sophisticated comment formatting
|
||||
{'identifiers':{'isbn': '9781416580829'}},
|
||||
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||
exact=True), authors_test(['Dan Brown'])]
|
||||
),
|
||||
|
||||
( # No specific problems
|
||||
{'identifiers':{'isbn': '0743273567'}},
|
||||
[title_test('The great gatsby', exact=True),
|
||||
authors_test(['F. Scott Fitzgerald'])]
|
||||
),
|
||||
|
||||
( # A newer book
|
||||
{'identifiers':{'isbn': '9780316044981'}},
|
||||
[title_test('The Heroes', exact=True),
|
||||
authors_test(['Joe Abercrombie'])]
|
||||
|
||||
),
|
||||
|
||||
])
|
||||
# }}}
|
||||
|
||||
|
@ -14,7 +14,8 @@ from threading import Event
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre import prints, sanitize_file_name2
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import create_log
|
||||
from calibre.ebooks.metadata.sources.base import (create_log,
|
||||
get_cached_cover_urls)
|
||||
|
||||
def isbn_test(isbn):
|
||||
isbn_ = check_isbn(isbn)
|
||||
@ -45,8 +46,75 @@ def authors_test(authors):
|
||||
|
||||
return test
|
||||
|
||||
def init_test(tdir_name):
|
||||
tdir = tempfile.gettempdir()
|
||||
lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
|
||||
log = create_log(open(lf, 'wb'))
|
||||
abort = Event()
|
||||
return tdir, lf, log, abort
|
||||
|
||||
def test_identify_plugin(name, tests):
|
||||
def test_identify(tests): # {{{
|
||||
'''
|
||||
:param tests: List of 2-tuples. Each two tuple is of the form (args,
|
||||
test_funcs). args is a dict of keyword arguments to pass to
|
||||
the identify method. test_funcs are callables that accept a
|
||||
Metadata object and return True iff the object passes the
|
||||
test.
|
||||
'''
|
||||
from calibre.ebooks.metadata.sources.identify import identify
|
||||
|
||||
tdir, lf, log, abort = init_test('Full Identify')
|
||||
|
||||
times = []
|
||||
|
||||
for kwargs, test_funcs in tests:
|
||||
prints('Running test with:', kwargs)
|
||||
args = (log, abort)
|
||||
start_time = time.time()
|
||||
results = identify(*args, **kwargs)
|
||||
total_time = time.time() - start_time
|
||||
times.append(total_time)
|
||||
if not results:
|
||||
prints('identify failed to find any results')
|
||||
break
|
||||
|
||||
prints('Found', len(results), 'matches:', end=' ')
|
||||
prints('Smaller relevance means better match')
|
||||
|
||||
for i, mi in enumerate(results):
|
||||
prints('*'*30, 'Relevance:', i, '*'*30)
|
||||
prints(mi)
|
||||
prints('\nCached cover URLs :',
|
||||
[x[0].name for x in get_cached_cover_urls(mi)])
|
||||
prints('*'*75, '\n\n')
|
||||
|
||||
possibles = []
|
||||
for mi in results:
|
||||
test_failed = False
|
||||
for tfunc in test_funcs:
|
||||
if not tfunc(mi):
|
||||
test_failed = True
|
||||
break
|
||||
if not test_failed:
|
||||
possibles.append(mi)
|
||||
|
||||
if not possibles:
|
||||
prints('ERROR: No results that passed all tests were found')
|
||||
prints('Log saved to', lf)
|
||||
raise SystemExit(1)
|
||||
|
||||
if results[0] is not possibles[0]:
|
||||
prints('Most relevant result failed the tests')
|
||||
raise SystemExit(1)
|
||||
|
||||
prints('Average time per query', sum(times)/len(times))
|
||||
|
||||
if os.stat(lf).st_size > 10:
|
||||
prints('There were some errors/warnings, see log', lf)
|
||||
|
||||
# }}}
|
||||
|
||||
def test_identify_plugin(name, tests): # {{{
|
||||
'''
|
||||
:param name: Plugin name
|
||||
:param tests: List of 2-tuples. Each two tuple is of the form (args,
|
||||
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
|
||||
break
|
||||
prints('Testing the identify function of', plugin.name)
|
||||
|
||||
tdir = tempfile.gettempdir()
|
||||
lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
|
||||
log = create_log(open(lf, 'wb'))
|
||||
abort = Event()
|
||||
tdir, lf, log, abort = init_test(plugin.name)
|
||||
prints('Log saved to', lf)
|
||||
|
||||
times = []
|
||||
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
|
||||
|
||||
if os.stat(lf).st_size > 10:
|
||||
prints('There were some errors/warnings, see log', lf)
|
||||
# }}}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user