mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on metadata identify
This commit is contained in:
parent
461c128bc2
commit
608cf75dc0
8
INSTALL
8
INSTALL
@ -1,6 +1,9 @@
|
|||||||
calibre supports installation from source, only on Linux.
|
calibre supports installation from source, only on Linux.
|
||||||
On Windows and OS X use the provided installers and use
|
|
||||||
the facilities of the calibre-debug command to hack on the calibre source.
|
Note that you *do not* need to install from source to hack on
|
||||||
|
the calibre source code. To get started with calibre development,
|
||||||
|
use a normal calibre install and follow the instructions at
|
||||||
|
http://calibre-ebook.com/user_manual/develop.html
|
||||||
|
|
||||||
On Linux, there are two kinds of installation from source possible.
|
On Linux, there are two kinds of installation from source possible.
|
||||||
Note that both kinds require lots of dependencies as well as a
|
Note that both kinds require lots of dependencies as well as a
|
||||||
@ -45,3 +48,4 @@ This type of install can be run with the command::
|
|||||||
sudo python setup.py develop
|
sudo python setup.py develop
|
||||||
|
|
||||||
Use the -h flag for help on the develop command.
|
Use the -h flag for help on the develop command.
|
||||||
|
|
||||||
|
@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def get_cached_cover_urls(mi):
|
||||||
|
from calibre.customize.ui import metadata_plugins
|
||||||
|
plugins = list(metadata_plugins['identify'])
|
||||||
|
for p in plugins:
|
||||||
|
url = p.get_cached_cover_url(mi.identifiers)
|
||||||
|
if url:
|
||||||
|
yield (p, url)
|
||||||
|
|
||||||
|
|
||||||
class Source(Plugin):
|
class Source(Plugin):
|
||||||
|
|
||||||
type = _('Metadata source')
|
type = _('Metadata source')
|
||||||
|
@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
if __name__ == '__main__': # tests {{{
|
||||||
|
# To run these test use: calibre-debug -e
|
||||||
|
# src/calibre/ebooks/metadata/sources/identify.py
|
||||||
|
from calibre.ebooks.metadata.sources.test import (test_identify,
|
||||||
|
title_test, authors_test)
|
||||||
|
test_identify(
|
||||||
|
[
|
||||||
|
|
||||||
|
( # An e-book ISBN not on Amazon, one of the authors is
|
||||||
|
# unknown to Amazon
|
||||||
|
{'identifiers':{'isbn': '9780307459671'},
|
||||||
|
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
|
||||||
|
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
|
||||||
|
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
|
||||||
|
|
||||||
|
),
|
||||||
|
|
||||||
|
( # This isbn not on amazon
|
||||||
|
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
|
||||||
|
'authors':['Lutz']},
|
||||||
|
[title_test('Learning Python, 3rd Edition',
|
||||||
|
exact=True), authors_test(['Mark Lutz'])
|
||||||
|
]
|
||||||
|
|
||||||
|
),
|
||||||
|
|
||||||
|
( # Sophisticated comment formatting
|
||||||
|
{'identifiers':{'isbn': '9781416580829'}},
|
||||||
|
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||||
|
exact=True), authors_test(['Dan Brown'])]
|
||||||
|
),
|
||||||
|
|
||||||
|
( # No specific problems
|
||||||
|
{'identifiers':{'isbn': '0743273567'}},
|
||||||
|
[title_test('The great gatsby', exact=True),
|
||||||
|
authors_test(['F. Scott Fitzgerald'])]
|
||||||
|
),
|
||||||
|
|
||||||
|
( # A newer book
|
||||||
|
{'identifiers':{'isbn': '9780316044981'}},
|
||||||
|
[title_test('The Heroes', exact=True),
|
||||||
|
authors_test(['Joe Abercrombie'])]
|
||||||
|
|
||||||
|
),
|
||||||
|
|
||||||
|
])
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
@ -14,7 +14,8 @@ from threading import Event
|
|||||||
from calibre.customize.ui import metadata_plugins
|
from calibre.customize.ui import metadata_plugins
|
||||||
from calibre import prints, sanitize_file_name2
|
from calibre import prints, sanitize_file_name2
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.sources.base import create_log
|
from calibre.ebooks.metadata.sources.base import (create_log,
|
||||||
|
get_cached_cover_urls)
|
||||||
|
|
||||||
def isbn_test(isbn):
|
def isbn_test(isbn):
|
||||||
isbn_ = check_isbn(isbn)
|
isbn_ = check_isbn(isbn)
|
||||||
@ -45,8 +46,75 @@ def authors_test(authors):
|
|||||||
|
|
||||||
return test
|
return test
|
||||||
|
|
||||||
|
def init_test(tdir_name):
|
||||||
|
tdir = tempfile.gettempdir()
|
||||||
|
lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
|
||||||
|
log = create_log(open(lf, 'wb'))
|
||||||
|
abort = Event()
|
||||||
|
return tdir, lf, log, abort
|
||||||
|
|
||||||
def test_identify_plugin(name, tests):
|
def test_identify(tests): # {{{
|
||||||
|
'''
|
||||||
|
:param tests: List of 2-tuples. Each two tuple is of the form (args,
|
||||||
|
test_funcs). args is a dict of keyword arguments to pass to
|
||||||
|
the identify method. test_funcs are callables that accept a
|
||||||
|
Metadata object and return True iff the object passes the
|
||||||
|
test.
|
||||||
|
'''
|
||||||
|
from calibre.ebooks.metadata.sources.identify import identify
|
||||||
|
|
||||||
|
tdir, lf, log, abort = init_test('Full Identify')
|
||||||
|
|
||||||
|
times = []
|
||||||
|
|
||||||
|
for kwargs, test_funcs in tests:
|
||||||
|
prints('Running test with:', kwargs)
|
||||||
|
args = (log, abort)
|
||||||
|
start_time = time.time()
|
||||||
|
results = identify(*args, **kwargs)
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
times.append(total_time)
|
||||||
|
if not results:
|
||||||
|
prints('identify failed to find any results')
|
||||||
|
break
|
||||||
|
|
||||||
|
prints('Found', len(results), 'matches:', end=' ')
|
||||||
|
prints('Smaller relevance means better match')
|
||||||
|
|
||||||
|
for i, mi in enumerate(results):
|
||||||
|
prints('*'*30, 'Relevance:', i, '*'*30)
|
||||||
|
prints(mi)
|
||||||
|
prints('\nCached cover URLs :',
|
||||||
|
[x[0].name for x in get_cached_cover_urls(mi)])
|
||||||
|
prints('*'*75, '\n\n')
|
||||||
|
|
||||||
|
possibles = []
|
||||||
|
for mi in results:
|
||||||
|
test_failed = False
|
||||||
|
for tfunc in test_funcs:
|
||||||
|
if not tfunc(mi):
|
||||||
|
test_failed = True
|
||||||
|
break
|
||||||
|
if not test_failed:
|
||||||
|
possibles.append(mi)
|
||||||
|
|
||||||
|
if not possibles:
|
||||||
|
prints('ERROR: No results that passed all tests were found')
|
||||||
|
prints('Log saved to', lf)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
if results[0] is not possibles[0]:
|
||||||
|
prints('Most relevant result failed the tests')
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
prints('Average time per query', sum(times)/len(times))
|
||||||
|
|
||||||
|
if os.stat(lf).st_size > 10:
|
||||||
|
prints('There were some errors/warnings, see log', lf)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def test_identify_plugin(name, tests): # {{{
|
||||||
'''
|
'''
|
||||||
:param name: Plugin name
|
:param name: Plugin name
|
||||||
:param tests: List of 2-tuples. Each two tuple is of the form (args,
|
:param tests: List of 2-tuples. Each two tuple is of the form (args,
|
||||||
@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
|
|||||||
break
|
break
|
||||||
prints('Testing the identify function of', plugin.name)
|
prints('Testing the identify function of', plugin.name)
|
||||||
|
|
||||||
tdir = tempfile.gettempdir()
|
tdir, lf, log, abort = init_test(plugin.name)
|
||||||
lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
|
|
||||||
log = create_log(open(lf, 'wb'))
|
|
||||||
abort = Event()
|
|
||||||
prints('Log saved to', lf)
|
prints('Log saved to', lf)
|
||||||
|
|
||||||
times = []
|
times = []
|
||||||
@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
|
|||||||
|
|
||||||
if os.stat(lf).st_size > 10:
|
if os.stat(lf).st_size > 10:
|
||||||
prints('There were some errors/warnings, see log', lf)
|
prints('There were some errors/warnings, see log', lf)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user