Cover download API for new plugins

This commit is contained in:
Kovid Goyal 2011-03-23 14:34:58 -06:00
parent 0e6557fe43
commit 90a53c6dcf
3 changed files with 100 additions and 3 deletions

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import socket, time, re import socket, time, re
from urllib import urlencode from urllib import urlencode
from threading import Thread from threading import Thread
from Queue import Queue, Empty
from lxml.html import soupparser, tostring from lxml.html import soupparser, tostring
@ -276,7 +277,7 @@ class Amazon(Source):
name = 'Amazon' name = 'Amazon'
description = _('Downloads metadata from Amazon') description = _('Downloads metadata from Amazon')
capabilities = frozenset(['identify']) capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'identifier:amazon', touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate']) 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
@ -444,6 +445,43 @@ class Amazon(Source):
return None return None
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found for')
return
if abort.is_set():
return
br = self.browser
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
result_queue.put(cdata)
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
if __name__ == '__main__': # tests {{{ if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e # To run these test use: calibre-debug -e
# src/calibre/ebooks/metadata/sources/amazon.py # src/calibre/ebooks/metadata/sources/amazon.py

View File

@ -93,8 +93,12 @@ class Source(Plugin):
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
#: Set of capabilites supported by this plugin.
#: Useful capabilities are: 'identify', 'cover'
capabilities = frozenset() capabilities = frozenset()
#: List of metadata fields that can potentially be download by this plugin
#: during the identify phase
touched_fields = frozenset() touched_fields = frozenset()
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -240,7 +244,7 @@ class Source(Plugin):
return keygen return keygen
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5): identifiers={}, timeout=30):
''' '''
Identify a book by its title/author/isbn/etc. Identify a book by its title/author/isbn/etc.
@ -280,5 +284,17 @@ class Source(Plugin):
''' '''
return None return None
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30):
'''
Download a cover and put it into result_queue. The parameters all have
the same meaning as for :meth:`identify`.
This method should use cached cover URLs for efficiency whenever
possible. When cached data is not present, most plugins simply call
identify and use its results.
'''
pass
# }}} # }}}

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import time import time
from urllib import urlencode from urllib import urlencode
from functools import partial from functools import partial
from Queue import Queue, Empty
from lxml import etree from lxml import etree
@ -139,7 +140,7 @@ class GoogleBooks(Source):
name = 'Google Books' name = 'Google Books'
description = _('Downloads metadata from Google Books') description = _('Downloads metadata from Google Books')
capabilities = frozenset(['identify']) capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'identifier:isbn', 'comments', 'publisher', 'identifier:isbn',
'identifier:google']) # language currently disabled 'identifier:google']) # language currently disabled
@ -183,6 +184,48 @@ class GoogleBooks(Source):
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' % return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
goog) goog)
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.cover_url_from_identifiers(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.cover_url_from_identifiers(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found')
return
if abort.is_set():
return
br = self.browser
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
if self.is_cover_image_valid(cdata):
result_queue.put(cdata)
else:
log.error('No cover found for %r'%identifiers)
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def is_cover_image_valid(self, raw): def is_cover_image_valid(self, raw):
# When no cover is present, returns a PNG saying image not available # When no cover is present, returns a PNG saying image not available
# Try for example google identifier llNqPwAACAAJ # Try for example google identifier llNqPwAACAAJ