Cover download API for new plugins

2025-07-09 03:04:10 -04:00 · 2011-03-23 14:34:58 -06:00 · 2011-03-23 14:34:58 -06:00 · 90a53c6dcf
commit 90a53c6dcf
parent 0e6557fe43
3 changed files with 100 additions and 3 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import socket, time, re
 from urllib import urlencode
 from threading import Thread
 from Queue import Queue, Empty
 from lxml.html import soupparser, tostring
@ -276,7 +277,7 @@ class Amazon(Source):
    name = 'Amazon'
    description = _('Downloads metadata from Amazon')
-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
@ -444,6 +445,43 @@ class Amazon(Source):
        return None
    # }}}
    def download_cover(self, log, result_queue, abort, # {{{
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found for')
            return
        if abort.is_set():
            return
        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put(cdata)
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/amazon.py
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -93,8 +93,12 @@ class Source(Plugin):
    supported_platforms = ['windows', 'osx', 'linux']
    #: Set of capabilites supported by this plugin.
    #: Useful capabilities are: 'identify', 'cover'
    capabilities = frozenset()
    #: List of metadata fields that can potentially be download by this plugin
    #: during the identify phase
    touched_fields = frozenset()
    def __init__(self, *args, **kwargs):
@ -240,7 +244,7 @@ class Source(Plugin):
        return keygen
    def identify(self, log, result_queue, abort, title=None, authors=None,
-            identifiers={}, timeout=5):
+            identifiers={}, timeout=30):
        '''
        Identify a book by its title/author/isbn/etc.
@ -280,5 +284,17 @@ class Source(Plugin):
        '''
        return None
    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        '''
        Download a cover and put it into result_queue. The parameters all have
        the same meaning as for :meth:`identify`.
        This method should use cached cover URLs for efficiency whenever
        possible. When cached data is not present, most plugins simply call
        identify and use its results.
        '''
        pass
    # }}}
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
 from Queue import Queue, Empty
 from lxml import etree
@ -139,7 +140,7 @@ class GoogleBooks(Source):
    name = 'Google Books'
    description = _('Downloads metadata from Google Books')
-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
        'comments', 'publisher', 'identifier:isbn',
        'identifier:google']) # language currently disabled
@ -183,6 +184,48 @@ class GoogleBooks(Source):
            return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
                goog)
    def download_cover(self, log, result_queue, abort, # {{{
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.cover_url_from_identifiers(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.cover_url_from_identifiers(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return
        if abort.is_set():
            return
        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if self.is_cover_image_valid(cdata):
                result_queue.put(cdata)
            else:
                log.error('No cover found for %r'%identifiers)
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
    def is_cover_image_valid(self, raw):
        # When no cover is present, returns a PNG saying image not available
        # Try for example google identifier llNqPwAACAAJ