mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add cover downloading to the new fetch-ebook-metadata
This commit is contained in:
parent
2828ba5276
commit
6773cf71af
@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
|
|
||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon'
|
name = 'Amazon Metadata'
|
||||||
description = _('Downloads metadata from Amazon')
|
description = _('Downloads metadata from Amazon')
|
||||||
|
|
||||||
capabilities = frozenset(['identify', 'cover'])
|
capabilities = frozenset(['identify', 'cover'])
|
||||||
@ -493,9 +493,10 @@ class Amazon(Source):
|
|||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
return
|
return
|
||||||
br = self.browser
|
br = self.browser
|
||||||
|
log('Downloading cover from:', cached_url)
|
||||||
try:
|
try:
|
||||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||||
result_queue.put(cdata)
|
result_queue.put((self, cdata))
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to download cover from:', cached_url)
|
log.exception('Failed to download cover from:', cached_url)
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -22,6 +22,12 @@ msprefs.defaults['txt_comments'] = False
|
|||||||
msprefs.defaults['ignore_fields'] = []
|
msprefs.defaults['ignore_fields'] = []
|
||||||
msprefs.defaults['max_tags'] = 20
|
msprefs.defaults['max_tags'] = 20
|
||||||
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
||||||
|
msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
|
||||||
|
|
||||||
|
# Google covers are often poor quality (scans/errors) but they have high
|
||||||
|
# resolution, so they trump covers from better sources. So make sure they
|
||||||
|
# are only used if no other covers are found.
|
||||||
|
msprefs.defaults['cover_priorities'] = {'Google':2}
|
||||||
|
|
||||||
def create_log(ostream=None):
|
def create_log(ostream=None):
|
||||||
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
||||||
@ -340,7 +346,8 @@ class Source(Plugin):
|
|||||||
title=None, authors=None, identifiers={}, timeout=30):
|
title=None, authors=None, identifiers={}, timeout=30):
|
||||||
'''
|
'''
|
||||||
Download a cover and put it into result_queue. The parameters all have
|
Download a cover and put it into result_queue. The parameters all have
|
||||||
the same meaning as for :meth:`identify`.
|
the same meaning as for :meth:`identify`. Put (self, cover_data) into
|
||||||
|
result_queue.
|
||||||
|
|
||||||
This method should use cached cover URLs for efficiency whenever
|
This method should use cached cover URLs for efficiency whenever
|
||||||
possible. When cached data is not present, most plugins simply call
|
possible. When cached data is not present, most plugins simply call
|
||||||
|
@ -13,10 +13,13 @@ from threading import Event
|
|||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.magick.draw import save_cover_data_to
|
||||||
from calibre.ebooks.metadata import string_to_authors
|
from calibre.ebooks.metadata import string_to_authors
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.ebooks.metadata.sources.base import create_log
|
from calibre.ebooks.metadata.sources.base import create_log
|
||||||
from calibre.ebooks.metadata.sources.identify import identify
|
from calibre.ebooks.metadata.sources.identify import identify
|
||||||
|
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = OptionParser(textwrap.dedent(
|
parser = OptionParser(textwrap.dedent(
|
||||||
@ -33,6 +36,8 @@ def option_parser():
|
|||||||
parser.add_option('-v', '--verbose', default=False, action='store_true',
|
parser.add_option('-v', '--verbose', default=False, action='store_true',
|
||||||
help='Print the log to the console (stderr)')
|
help='Print the log to the console (stderr)')
|
||||||
parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
|
parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
|
||||||
|
parser.add_option('-c', '--cover',
|
||||||
|
help='Specify a filename. The cover, if available, will be saved to it')
|
||||||
parser.add_option('-d', '--timeout', default='30',
|
parser.add_option('-d', '--timeout', default='30',
|
||||||
help='Timeout in seconds. Default is 30')
|
help='Timeout in seconds. Default is 30')
|
||||||
|
|
||||||
@ -57,14 +62,26 @@ def main(args=sys.argv):
|
|||||||
results = identify(log, abort, title=opts.title, authors=authors,
|
results = identify(log, abort, title=opts.title, authors=authors,
|
||||||
identifiers=identifiers, timeout=int(opts.timeout))
|
identifiers=identifiers, timeout=int(opts.timeout))
|
||||||
|
|
||||||
log = buf.getvalue()
|
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
print (log, file=sys.stderr)
|
print (log, file=sys.stderr)
|
||||||
prints('No results found', file=sys.stderr)
|
prints('No results found', file=sys.stderr)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
result = results[0]
|
result = results[0]
|
||||||
|
|
||||||
|
cf = None
|
||||||
|
if opts.cover and results:
|
||||||
|
cover = download_cover(log, title=opts.title, authors=authors,
|
||||||
|
identifiers=result.identifiers, timeout=int(opts.timeout))
|
||||||
|
if cover is None:
|
||||||
|
prints('No cover found', file=sys.stderr)
|
||||||
|
else:
|
||||||
|
save_cover_data_to(cover[-1], opts.cover)
|
||||||
|
result.cover = cf = opts.cover
|
||||||
|
|
||||||
|
|
||||||
|
log = buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
result = (metadata_to_opf(result) if opts.opf else
|
result = (metadata_to_opf(result) if opts.opf else
|
||||||
unicode(result).encode('utf-8'))
|
unicode(result).encode('utf-8'))
|
||||||
|
|
||||||
@ -72,6 +89,8 @@ def main(args=sys.argv):
|
|||||||
print (log, file=sys.stderr)
|
print (log, file=sys.stderr)
|
||||||
|
|
||||||
print (result)
|
print (result)
|
||||||
|
if not opts.opf:
|
||||||
|
prints('Cover :', cf)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
178
src/calibre/ebooks/metadata/sources/covers.py
Normal file
178
src/calibre/ebooks/metadata/sources/covers.py
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import time
|
||||||
|
from Queue import Queue, Empty
|
||||||
|
from threading import Thread, Event
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from calibre.customize.ui import metadata_plugins
|
||||||
|
from calibre.ebooks.metadata.sources.base import msprefs, create_log
|
||||||
|
from calibre.utils.magick.draw import Image, save_cover_data_to
|
||||||
|
|
||||||
|
class Worker(Thread):
|
||||||
|
|
||||||
|
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
|
||||||
|
self.plugin = plugin
|
||||||
|
self.abort = abort
|
||||||
|
self.buf = BytesIO()
|
||||||
|
self.log = create_log(self.buf)
|
||||||
|
self.title, self.authors, self.identifiers = (title, authors,
|
||||||
|
identifiers)
|
||||||
|
self.timeout, self.rq = timeout, rq
|
||||||
|
self.time_spent = None
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
start_time = time.time()
|
||||||
|
if not self.abort.is_set():
|
||||||
|
try:
|
||||||
|
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||||
|
title=self.title, authors=self.authors,
|
||||||
|
identifiers=self.identifiers, timeout=self.timeout)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to download cover from',
|
||||||
|
self.plugin.name)
|
||||||
|
self.time_spent = time.time() - start_time
|
||||||
|
|
||||||
|
def is_worker_alive(workers):
|
||||||
|
for w in workers:
|
||||||
|
if w.is_alive():
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_result(log, result):
|
||||||
|
plugin, data = result
|
||||||
|
try:
|
||||||
|
im = Image()
|
||||||
|
im.load(data)
|
||||||
|
im.trim(10)
|
||||||
|
width, height = im.size
|
||||||
|
fmt = im.format
|
||||||
|
|
||||||
|
if width < 50 or height < 50:
|
||||||
|
raise ValueError('Image too small')
|
||||||
|
data = save_cover_data_to(im, '/cover.jpg', return_data=True)
|
||||||
|
except:
|
||||||
|
log.exception('Invalid cover from', plugin.name)
|
||||||
|
return None
|
||||||
|
return (plugin, width, height, fmt, data)
|
||||||
|
|
||||||
|
def run_download(log, results, abort,
|
||||||
|
title=None, authors=None, identifiers={}, timeout=30):
|
||||||
|
'''
|
||||||
|
Run the cover download, putting results into the queue :param:`results`.
|
||||||
|
|
||||||
|
Each result is a tuple of the form:
|
||||||
|
|
||||||
|
(plugin, width, height, fmt, bytes)
|
||||||
|
|
||||||
|
'''
|
||||||
|
plugins = list(metadata_plugins(['cover']))
|
||||||
|
|
||||||
|
rq = Queue()
|
||||||
|
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
|
||||||
|
in plugins]
|
||||||
|
for w in workers:
|
||||||
|
w.start()
|
||||||
|
|
||||||
|
first_result_at = None
|
||||||
|
wait_time = msprefs['wait_after_first_cover_result']
|
||||||
|
found_results = {}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
time.sleep(0.1)
|
||||||
|
try:
|
||||||
|
x = rq.get_nowait()
|
||||||
|
result = process_result(log, x)
|
||||||
|
if result is not None:
|
||||||
|
results.put(result)
|
||||||
|
found_results[result[0]] = result
|
||||||
|
if first_result_at is not None:
|
||||||
|
first_result_at = time.time()
|
||||||
|
except Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not is_worker_alive(workers):
|
||||||
|
break
|
||||||
|
|
||||||
|
if first_result_at is not None and time.time() - first_result_at > wait_time:
|
||||||
|
log('Not waiting for any more results')
|
||||||
|
abort.set()
|
||||||
|
|
||||||
|
if abort.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
x = rq.get_nowait()
|
||||||
|
result = process_result(log, x)
|
||||||
|
if result is not None:
|
||||||
|
results.put(result)
|
||||||
|
found_results[result[0]] = result
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
for w in workers:
|
||||||
|
wlog = w.buf.getvalue().strip()
|
||||||
|
log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
|
||||||
|
log('Request extra headers:', w.plugin.browser.addheaders)
|
||||||
|
if w.plugin in found_results:
|
||||||
|
result = found_results[w.plugin]
|
||||||
|
log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
|
||||||
|
else:
|
||||||
|
log('Failed to download valid cover')
|
||||||
|
if w.time_spent is None:
|
||||||
|
log('Download aborted')
|
||||||
|
else:
|
||||||
|
log('Took', w.time_spent, 'seconds')
|
||||||
|
if wlog:
|
||||||
|
log(wlog)
|
||||||
|
log('\n'+'*'*80)
|
||||||
|
|
||||||
|
|
||||||
|
def download_cover(log,
|
||||||
|
title=None, authors=None, identifiers={}, timeout=30):
|
||||||
|
'''
|
||||||
|
Synchronous cover download. Returns the "best" cover as per user
|
||||||
|
prefs/cover resolution.
|
||||||
|
|
||||||
|
Return cover is a tuple: (plugin, width, height, fmt, data)
|
||||||
|
|
||||||
|
Returns None if no cover is found.
|
||||||
|
'''
|
||||||
|
rq = Queue()
|
||||||
|
abort = Event()
|
||||||
|
|
||||||
|
run_download(log, rq, abort, title=title, authors=authors,
|
||||||
|
identifiers=identifiers, timeout=timeout)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
results.append(rq.get_nowait())
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
cp = msprefs['cover_priorities']
|
||||||
|
|
||||||
|
def keygen(result):
|
||||||
|
plugin, width, height, fmt, data = result
|
||||||
|
return (cp.get(plugin.name, 1), 1/(width*height))
|
||||||
|
|
||||||
|
results.sort(key=keygen)
|
||||||
|
|
||||||
|
return results[0] if results else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
log.exception('Failed to parse rating')
|
log.exception('Failed to parse rating')
|
||||||
|
|
||||||
# Cover
|
# Cover
|
||||||
mi.has_google_cover = len(extra.xpath(
|
mi.has_google_cover = None
|
||||||
'//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
|
for x in extra.xpath(
|
||||||
|
'//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
|
||||||
|
mi.has_google_cover = x.get('href')
|
||||||
|
break
|
||||||
|
|
||||||
return mi
|
return mi
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class GoogleBooks(Source):
|
class GoogleBooks(Source):
|
||||||
|
|
||||||
name = 'Google Books'
|
name = 'Google'
|
||||||
description = _('Downloads metadata from Google Books')
|
description = _('Downloads metadata from Google Books')
|
||||||
|
|
||||||
capabilities = frozenset(['identify', 'cover'])
|
capabilities = frozenset(['identify', 'cover'])
|
||||||
@ -213,7 +216,7 @@ class GoogleBooks(Source):
|
|||||||
results.sort(key=self.identify_results_keygen(
|
results.sort(key=self.identify_results_keygen(
|
||||||
title=title, authors=authors, identifiers=identifiers))
|
title=title, authors=authors, identifiers=identifiers))
|
||||||
for mi in results:
|
for mi in results:
|
||||||
cached_url = self.cover_url_from_identifiers(mi.identifiers)
|
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||||
if cached_url is not None:
|
if cached_url is not None:
|
||||||
break
|
break
|
||||||
if cached_url is None:
|
if cached_url is None:
|
||||||
@ -223,9 +226,10 @@ class GoogleBooks(Source):
|
|||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
return
|
return
|
||||||
br = self.browser
|
br = self.browser
|
||||||
|
log('Downloading cover from:', cached_url)
|
||||||
try:
|
try:
|
||||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||||
result_queue.put(cdata)
|
result_queue.put((self, cdata))
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to download cover from:', cached_url)
|
log.exception('Failed to download cover from:', cached_url)
|
||||||
|
|
||||||
@ -254,9 +258,9 @@ class GoogleBooks(Source):
|
|||||||
goog = ans.identifiers['google']
|
goog = ans.identifiers['google']
|
||||||
for isbn in getattr(ans, 'all_isbns', []):
|
for isbn in getattr(ans, 'all_isbns', []):
|
||||||
self.cache_isbn_to_identifier(isbn, goog)
|
self.cache_isbn_to_identifier(isbn, goog)
|
||||||
if ans.has_google_cover:
|
if ans.has_google_cover:
|
||||||
self.cache_identifier_to_cover_url(goog,
|
self.cache_identifier_to_cover_url(goog,
|
||||||
self.GOOGLE_COVER%goog)
|
self.GOOGLE_COVER%goog)
|
||||||
self.clean_downloaded_metadata(ans)
|
self.clean_downloaded_metadata(ans)
|
||||||
result_queue.put(ans)
|
result_queue.put(ans)
|
||||||
except:
|
except:
|
||||||
|
@ -26,7 +26,7 @@ class OpenLibrary(Source):
|
|||||||
br = self.browser
|
br = self.browser
|
||||||
try:
|
try:
|
||||||
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
|
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
|
||||||
result_queue.put(ans)
|
result_queue.put((self, ans))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
|
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
|
||||||
log.error('No cover for ISBN: %r found'%isbn)
|
log.error('No cover for ISBN: %r found'%isbn)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user