mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Add cover downloading to the new fetch-ebook-metadata
This commit is contained in:
parent
2828ba5276
commit
6773cf71af
@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
|
||||
|
||||
class Amazon(Source):
|
||||
|
||||
name = 'Amazon'
|
||||
name = 'Amazon Metadata'
|
||||
description = _('Downloads metadata from Amazon')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
@ -493,9 +493,10 @@ class Amazon(Source):
|
||||
if abort.is_set():
|
||||
return
|
||||
br = self.browser
|
||||
log('Downloading cover from:', cached_url)
|
||||
try:
|
||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||
result_queue.put(cdata)
|
||||
result_queue.put((self, cdata))
|
||||
except:
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
# }}}
|
||||
|
@ -22,6 +22,12 @@ msprefs.defaults['txt_comments'] = False
|
||||
msprefs.defaults['ignore_fields'] = []
|
||||
msprefs.defaults['max_tags'] = 20
|
||||
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
||||
msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
|
||||
|
||||
# Google covers are often poor quality (scans/errors) but they have high
|
||||
# resolution, so they trump covers from better sources. So make sure they
|
||||
# are only used if no other covers are found.
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2}
|
||||
|
||||
def create_log(ostream=None):
|
||||
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
||||
@ -340,7 +346,8 @@ class Source(Plugin):
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
'''
|
||||
Download a cover and put it into result_queue. The parameters all have
|
||||
the same meaning as for :meth:`identify`.
|
||||
the same meaning as for :meth:`identify`. Put (self, cover_data) into
|
||||
result_queue.
|
||||
|
||||
This method should use cached cover URLs for efficiency whenever
|
||||
possible. When cached data is not present, most plugins simply call
|
||||
|
@ -13,10 +13,13 @@ from threading import Event
|
||||
|
||||
from calibre import prints
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre.ebooks.metadata import string_to_authors
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.metadata.sources.base import create_log
|
||||
from calibre.ebooks.metadata.sources.identify import identify
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(textwrap.dedent(
|
||||
@ -33,6 +36,8 @@ def option_parser():
|
||||
parser.add_option('-v', '--verbose', default=False, action='store_true',
|
||||
help='Print the log to the console (stderr)')
|
||||
parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
|
||||
parser.add_option('-c', '--cover',
|
||||
help='Specify a filename. The cover, if available, will be saved to it')
|
||||
parser.add_option('-d', '--timeout', default='30',
|
||||
help='Timeout in seconds. Default is 30')
|
||||
|
||||
@ -57,14 +62,26 @@ def main(args=sys.argv):
|
||||
results = identify(log, abort, title=opts.title, authors=authors,
|
||||
identifiers=identifiers, timeout=int(opts.timeout))
|
||||
|
||||
log = buf.getvalue()
|
||||
|
||||
if not results:
|
||||
print (log, file=sys.stderr)
|
||||
prints('No results found', file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
result = results[0]
|
||||
|
||||
cf = None
|
||||
if opts.cover and results:
|
||||
cover = download_cover(log, title=opts.title, authors=authors,
|
||||
identifiers=result.identifiers, timeout=int(opts.timeout))
|
||||
if cover is None:
|
||||
prints('No cover found', file=sys.stderr)
|
||||
else:
|
||||
save_cover_data_to(cover[-1], opts.cover)
|
||||
result.cover = cf = opts.cover
|
||||
|
||||
|
||||
log = buf.getvalue()
|
||||
|
||||
|
||||
result = (metadata_to_opf(result) if opts.opf else
|
||||
unicode(result).encode('utf-8'))
|
||||
|
||||
@ -72,6 +89,8 @@ def main(args=sys.argv):
|
||||
print (log, file=sys.stderr)
|
||||
|
||||
print (result)
|
||||
if not opts.opf:
|
||||
prints('Cover :', cf)
|
||||
|
||||
return 0
|
||||
|
||||
|
178
src/calibre/ebooks/metadata/sources/covers.py
Normal file
178
src/calibre/ebooks/metadata/sources/covers.py
Normal file
@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time
|
||||
from Queue import Queue, Empty
|
||||
from threading import Thread, Event
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.base import msprefs, create_log
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to
|
||||
|
||||
class Worker(Thread):
|
||||
|
||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
self.plugin = plugin
|
||||
self.abort = abort
|
||||
self.buf = BytesIO()
|
||||
self.log = create_log(self.buf)
|
||||
self.title, self.authors, self.identifiers = (title, authors,
|
||||
identifiers)
|
||||
self.timeout, self.rq = timeout, rq
|
||||
self.time_spent = None
|
||||
|
||||
def run(self):
|
||||
start_time = time.time()
|
||||
if not self.abort.is_set():
|
||||
try:
|
||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||
title=self.title, authors=self.authors,
|
||||
identifiers=self.identifiers, timeout=self.timeout)
|
||||
except:
|
||||
self.log.exception('Failed to download cover from',
|
||||
self.plugin.name)
|
||||
self.time_spent = time.time() - start_time
|
||||
|
||||
def is_worker_alive(workers):
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
return True
|
||||
return False
|
||||
|
||||
def process_result(log, result):
|
||||
plugin, data = result
|
||||
try:
|
||||
im = Image()
|
||||
im.load(data)
|
||||
im.trim(10)
|
||||
width, height = im.size
|
||||
fmt = im.format
|
||||
|
||||
if width < 50 or height < 50:
|
||||
raise ValueError('Image too small')
|
||||
data = save_cover_data_to(im, '/cover.jpg', return_data=True)
|
||||
except:
|
||||
log.exception('Invalid cover from', plugin.name)
|
||||
return None
|
||||
return (plugin, width, height, fmt, data)
|
||||
|
||||
def run_download(log, results, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
'''
|
||||
Run the cover download, putting results into the queue :param:`results`.
|
||||
|
||||
Each result is a tuple of the form:
|
||||
|
||||
(plugin, width, height, fmt, bytes)
|
||||
|
||||
'''
|
||||
plugins = list(metadata_plugins(['cover']))
|
||||
|
||||
rq = Queue()
|
||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
|
||||
in plugins]
|
||||
for w in workers:
|
||||
w.start()
|
||||
|
||||
first_result_at = None
|
||||
wait_time = msprefs['wait_after_first_cover_result']
|
||||
found_results = {}
|
||||
|
||||
while True:
|
||||
time.sleep(0.1)
|
||||
try:
|
||||
x = rq.get_nowait()
|
||||
result = process_result(log, x)
|
||||
if result is not None:
|
||||
results.put(result)
|
||||
found_results[result[0]] = result
|
||||
if first_result_at is not None:
|
||||
first_result_at = time.time()
|
||||
except Empty:
|
||||
pass
|
||||
|
||||
if not is_worker_alive(workers):
|
||||
break
|
||||
|
||||
if first_result_at is not None and time.time() - first_result_at > wait_time:
|
||||
log('Not waiting for any more results')
|
||||
abort.set()
|
||||
|
||||
if abort.is_set():
|
||||
break
|
||||
|
||||
while True:
|
||||
try:
|
||||
x = rq.get_nowait()
|
||||
result = process_result(log, x)
|
||||
if result is not None:
|
||||
results.put(result)
|
||||
found_results[result[0]] = result
|
||||
except Empty:
|
||||
break
|
||||
|
||||
for w in workers:
|
||||
wlog = w.buf.getvalue().strip()
|
||||
log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
|
||||
log('Request extra headers:', w.plugin.browser.addheaders)
|
||||
if w.plugin in found_results:
|
||||
result = found_results[w.plugin]
|
||||
log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
|
||||
else:
|
||||
log('Failed to download valid cover')
|
||||
if w.time_spent is None:
|
||||
log('Download aborted')
|
||||
else:
|
||||
log('Took', w.time_spent, 'seconds')
|
||||
if wlog:
|
||||
log(wlog)
|
||||
log('\n'+'*'*80)
|
||||
|
||||
|
||||
def download_cover(log,
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
'''
|
||||
Synchronous cover download. Returns the "best" cover as per user
|
||||
prefs/cover resolution.
|
||||
|
||||
Return cover is a tuple: (plugin, width, height, fmt, data)
|
||||
|
||||
Returns None if no cover is found.
|
||||
'''
|
||||
rq = Queue()
|
||||
abort = Event()
|
||||
|
||||
run_download(log, rq, abort, title=title, authors=authors,
|
||||
identifiers=identifiers, timeout=timeout)
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
try:
|
||||
results.append(rq.get_nowait())
|
||||
except Empty:
|
||||
break
|
||||
|
||||
cp = msprefs['cover_priorities']
|
||||
|
||||
def keygen(result):
|
||||
plugin, width, height, fmt, data = result
|
||||
return (cp.get(plugin.name, 1), 1/(width*height))
|
||||
|
||||
results.sort(key=keygen)
|
||||
|
||||
return results[0] if results else None
|
||||
|
||||
|
||||
|
||||
|
@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
log.exception('Failed to parse rating')
|
||||
|
||||
# Cover
|
||||
mi.has_google_cover = len(extra.xpath(
|
||||
'//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
|
||||
mi.has_google_cover = None
|
||||
for x in extra.xpath(
|
||||
'//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
|
||||
mi.has_google_cover = x.get('href')
|
||||
break
|
||||
|
||||
return mi
|
||||
# }}}
|
||||
|
||||
class GoogleBooks(Source):
|
||||
|
||||
name = 'Google Books'
|
||||
name = 'Google'
|
||||
description = _('Downloads metadata from Google Books')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
@ -213,7 +216,7 @@ class GoogleBooks(Source):
|
||||
results.sort(key=self.identify_results_keygen(
|
||||
title=title, authors=authors, identifiers=identifiers))
|
||||
for mi in results:
|
||||
cached_url = self.cover_url_from_identifiers(mi.identifiers)
|
||||
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||
if cached_url is not None:
|
||||
break
|
||||
if cached_url is None:
|
||||
@ -223,9 +226,10 @@ class GoogleBooks(Source):
|
||||
if abort.is_set():
|
||||
return
|
||||
br = self.browser
|
||||
log('Downloading cover from:', cached_url)
|
||||
try:
|
||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||
result_queue.put(cdata)
|
||||
result_queue.put((self, cdata))
|
||||
except:
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
|
||||
@ -254,9 +258,9 @@ class GoogleBooks(Source):
|
||||
goog = ans.identifiers['google']
|
||||
for isbn in getattr(ans, 'all_isbns', []):
|
||||
self.cache_isbn_to_identifier(isbn, goog)
|
||||
if ans.has_google_cover:
|
||||
self.cache_identifier_to_cover_url(goog,
|
||||
self.GOOGLE_COVER%goog)
|
||||
if ans.has_google_cover:
|
||||
self.cache_identifier_to_cover_url(goog,
|
||||
self.GOOGLE_COVER%goog)
|
||||
self.clean_downloaded_metadata(ans)
|
||||
result_queue.put(ans)
|
||||
except:
|
||||
|
@ -26,7 +26,7 @@ class OpenLibrary(Source):
|
||||
br = self.browser
|
||||
try:
|
||||
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
|
||||
result_queue.put(ans)
|
||||
result_queue.put((self, ans))
|
||||
except Exception as e:
|
||||
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
|
||||
log.error('No cover for ISBN: %r found'%isbn)
|
||||
|
Loading…
x
Reference in New Issue
Block a user