Cover download API for new plugins

2025-08-11 09:13:57 -04:00 · 2011-03-23 14:34:58 -06:00 · 2011-03-23 14:34:58 -06:00 · 90a53c6dcf
commit 90a53c6dcf
parent 0e6557fe43
3 changed files with 100 additions and 3 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import socket, time, re
 from urllib import urlencode
 from threading import Thread
+from Queue import Queue, Empty

 from lxml.html import soupparser, tostring

@ -276,7 +277,7 @@ class Amazon(Source):
    name = 'Amazon'
    description = _('Downloads metadata from Amazon')

-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])

@ -444,6 +445,43 @@ class Amazon(Source):
        return None
    # }}}

+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.get_cached_cover_url(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found for')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            result_queue.put(cdata)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+    # }}}
+
+
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/amazon.py
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -93,8 +93,12 @@ class Source(Plugin):

    supported_platforms = ['windows', 'osx', 'linux']

+    #: Set of capabilites supported by this plugin.
+    #: Useful capabilities are: 'identify', 'cover'
    capabilities = frozenset()

+    #: List of metadata fields that can potentially be download by this plugin
+    #: during the identify phase
    touched_fields = frozenset()

    def __init__(self, *args, **kwargs):
@ -240,7 +244,7 @@ class Source(Plugin):
        return keygen

    def identify(self, log, result_queue, abort, title=None, authors=None,
-            identifiers={}, timeout=5):
+            identifiers={}, timeout=30):
        '''
        Identify a book by its title/author/isbn/etc.

@ -280,5 +284,17 @@ class Source(Plugin):
        '''
        return None

+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30):
+        '''
+        Download a cover and put it into result_queue. The parameters all have
+        the same meaning as for :meth:`identify`.
+
+        This method should use cached cover URLs for efficiency whenever
+        possible. When cached data is not present, most plugins simply call
+        identify and use its results.
+        '''
+        pass
+
    # }}}

--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
+from Queue import Queue, Empty

 from lxml import etree

@ -139,7 +140,7 @@ class GoogleBooks(Source):
    name = 'Google Books'
    description = _('Downloads metadata from Google Books')

-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
        'comments', 'publisher', 'identifier:isbn',
        'identifier:google']) # language currently disabled
@ -183,6 +184,48 @@ class GoogleBooks(Source):
            return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
                goog)

+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.cover_url_from_identifiers(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            if self.is_cover_image_valid(cdata):
+                result_queue.put(cdata)
+            else:
+                log.error('No cover found for %r'%identifiers)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+
+
+    # }}}
+
+
    def is_cover_image_valid(self, raw):
        # When no cover is present, returns a PNG saying image not available
        # Try for example google identifier llNqPwAACAAJ