Cover download API for new plugins

This commit is contained in:
Kovid Goyal 2011-03-23 14:34:58 -06:00
parent 0e6557fe43
commit 90a53c6dcf
3 changed files with 100 additions and 3 deletions

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import socket, time, re
from urllib import urlencode
from threading import Thread
from Queue import Queue, Empty
from lxml.html import soupparser, tostring
@ -276,7 +277,7 @@ class Amazon(Source):
name = 'Amazon'
description = _('Downloads metadata from Amazon')
capabilities = frozenset(['identify'])
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
@ -444,6 +445,43 @@ class Amazon(Source):
return None
# }}}
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found for')
return
if abort.is_set():
return
br = self.browser
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
result_queue.put(cdata)
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e
# src/calibre/ebooks/metadata/sources/amazon.py

View File

@ -93,8 +93,12 @@ class Source(Plugin):
supported_platforms = ['windows', 'osx', 'linux']
#: Set of capabilites supported by this plugin.
#: Useful capabilities are: 'identify', 'cover'
capabilities = frozenset()
#: List of metadata fields that can potentially be download by this plugin
#: during the identify phase
touched_fields = frozenset()
def __init__(self, *args, **kwargs):
@ -240,7 +244,7 @@ class Source(Plugin):
return keygen
def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5):
identifiers={}, timeout=30):
'''
Identify a book by its title/author/isbn/etc.
@ -280,5 +284,17 @@ class Source(Plugin):
'''
return None
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30):
'''
Download a cover and put it into result_queue. The parameters all have
the same meaning as for :meth:`identify`.
This method should use cached cover URLs for efficiency whenever
possible. When cached data is not present, most plugins simply call
identify and use its results.
'''
pass
# }}}

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import time
from urllib import urlencode
from functools import partial
from Queue import Queue, Empty
from lxml import etree
@ -139,7 +140,7 @@ class GoogleBooks(Source):
name = 'Google Books'
description = _('Downloads metadata from Google Books')
capabilities = frozenset(['identify'])
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'identifier:isbn',
'identifier:google']) # language currently disabled
@ -183,6 +184,48 @@ class GoogleBooks(Source):
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
goog)
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.cover_url_from_identifiers(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.cover_url_from_identifiers(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found')
return
if abort.is_set():
return
br = self.browser
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
if self.is_cover_image_valid(cdata):
result_queue.put(cdata)
else:
log.error('No cover found for %r'%identifiers)
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def is_cover_image_valid(self, raw):
# When no cover is present, returns a PNG saying image not available
# Try for example google identifier llNqPwAACAAJ