From 90a53c6dcfd7966a616a1f49bf34ddd181edf8dc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Mar 2011 14:34:58 -0600 Subject: [PATCH] Cover download API for new plugins --- src/calibre/ebooks/metadata/sources/amazon.py | 40 ++++++++++++++++- src/calibre/ebooks/metadata/sources/base.py | 18 +++++++- src/calibre/ebooks/metadata/sources/google.py | 45 ++++++++++++++++++- 3 files changed, 100 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 6a1f151d62..b16fd81243 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import socket, time, re from urllib import urlencode from threading import Thread +from Queue import Queue, Empty from lxml.html import soupparser, tostring @@ -276,7 +277,7 @@ class Amazon(Source): name = 'Amazon' description = _('Downloads metadata from Amazon') - capabilities = frozenset(['identify']) + capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'identifier:amazon', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate']) @@ -444,6 +445,43 @@ class Amazon(Source): return None # }}} + def download_cover(self, log, result_queue, abort, # {{{ + title=None, authors=None, identifiers={}, timeout=30): + cached_url = self.get_cached_cover_url(identifiers) + if cached_url is None: + log.info('No cached cover found, running identify') + rq = Queue() + self.identify(log, rq, abort, title=title, authors=authors, + identifiers=identifiers) + if abort.is_set(): + return + results = [] + while True: + try: + results.append(rq.get_nowait()) + except Empty: + break + results.sort(key=self.identify_results_keygen( + title=title, authors=authors, identifiers=identifiers)) + for mi in results: + cached_url = self.get_cached_cover_url(mi.identifiers) + if cached_url is not None: + break + if cached_url is None: + log.info('No cover found for') + return + + if abort.is_set(): + return + br = self.browser + try: + cdata = br.open_novisit(cached_url, timeout=timeout).read() + result_queue.put(cdata) + except: + log.exception('Failed to download cover from:', cached_url) + # }}} + + if __name__ == '__main__': # tests {{{ # To run these test use: calibre-debug -e # src/calibre/ebooks/metadata/sources/amazon.py diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index d2dff0d9e6..9845007068 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -93,8 +93,12 @@ class Source(Plugin): supported_platforms = ['windows', 'osx', 'linux'] + #: Set of capabilites supported by this plugin. + #: Useful capabilities are: 'identify', 'cover' capabilities = frozenset() + #: List of metadata fields that can potentially be download by this plugin + #: during the identify phase touched_fields = frozenset() def __init__(self, *args, **kwargs): @@ -240,7 +244,7 @@ class Source(Plugin): return keygen def identify(self, log, result_queue, abort, title=None, authors=None, - identifiers={}, timeout=5): + identifiers={}, timeout=30): ''' Identify a book by its title/author/isbn/etc. @@ -280,5 +284,17 @@ class Source(Plugin): ''' return None + def download_cover(self, log, result_queue, abort, + title=None, authors=None, identifiers={}, timeout=30): + ''' + Download a cover and put it into result_queue. The parameters all have + the same meaning as for :meth:`identify`. + + This method should use cached cover URLs for efficiency whenever + possible. When cached data is not present, most plugins simply call + identify and use its results. + ''' + pass + # }}} diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 06362cf8b8..9677027662 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import time from urllib import urlencode from functools import partial +from Queue import Queue, Empty from lxml import etree @@ -139,7 +140,7 @@ class GoogleBooks(Source): name = 'Google Books' description = _('Downloads metadata from Google Books') - capabilities = frozenset(['identify']) + capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', 'identifier:isbn', 'identifier:google']) # language currently disabled @@ -183,6 +184,48 @@ class GoogleBooks(Source): return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' % goog) + def download_cover(self, log, result_queue, abort, # {{{ + title=None, authors=None, identifiers={}, timeout=30): + cached_url = self.cover_url_from_identifiers(identifiers) + if cached_url is None: + log.info('No cached cover found, running identify') + rq = Queue() + self.identify(log, rq, abort, title=title, authors=authors, + identifiers=identifiers) + if abort.is_set(): + return + results = [] + while True: + try: + results.append(rq.get_nowait()) + except Empty: + break + results.sort(key=self.identify_results_keygen( + title=title, authors=authors, identifiers=identifiers)) + for mi in results: + cached_url = self.cover_url_from_identifiers(mi.identifiers) + if cached_url is not None: + break + if cached_url is None: + log.info('No cover found') + return + + if abort.is_set(): + return + br = self.browser + try: + cdata = br.open_novisit(cached_url, timeout=timeout).read() + if self.is_cover_image_valid(cdata): + result_queue.put(cdata) + else: + log.error('No cover found for %r'%identifiers) + except: + log.exception('Failed to download cover from:', cached_url) + + + # }}} + + def is_cover_image_valid(self, raw): # When no cover is present, returns a PNG saying image not available # Try for example google identifier llNqPwAACAAJ