From 90a53c6dcfd7966a616a1f49bf34ddd181edf8dc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 23 Mar 2011 14:34:58 -0600
Subject: [PATCH] Cover download API for new plugins

---
 src/calibre/ebooks/metadata/sources/amazon.py | 40 ++++++++++++++++-
 src/calibre/ebooks/metadata/sources/base.py   | 18 +++++++-
 src/calibre/ebooks/metadata/sources/google.py | 45 ++++++++++++++++++-
 3 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 6a1f151d62..b16fd81243 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import socket, time, re
 from urllib import urlencode
 from threading import Thread
+from Queue import Queue, Empty
 
 from lxml.html import soupparser, tostring
 
@@ -276,7 +277,7 @@ class Amazon(Source):
     name = 'Amazon'
     description = _('Downloads metadata from Amazon')
 
-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
     touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
         'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
 
@@ -444,6 +445,43 @@ class Amazon(Source):
         return None
     # }}}
 
+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.get_cached_cover_url(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found for')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            result_queue.put(cdata)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+    # }}}
+
+
 if __name__ == '__main__': # tests {{{
     # To run these test use: calibre-debug -e
     # src/calibre/ebooks/metadata/sources/amazon.py
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index d2dff0d9e6..9845007068 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -93,8 +93,12 @@ class Source(Plugin):
 
     supported_platforms = ['windows', 'osx', 'linux']
 
+    #: Set of capabilites supported by this plugin.
+    #: Useful capabilities are: 'identify', 'cover'
     capabilities = frozenset()
 
+    #: List of metadata fields that can potentially be download by this plugin
+    #: during the identify phase
     touched_fields = frozenset()
 
     def __init__(self, *args, **kwargs):
@@ -240,7 +244,7 @@ class Source(Plugin):
         return keygen
 
     def identify(self, log, result_queue, abort, title=None, authors=None,
-            identifiers={}, timeout=5):
+            identifiers={}, timeout=30):
         '''
         Identify a book by its title/author/isbn/etc.
 
@@ -280,5 +284,17 @@ class Source(Plugin):
         '''
         return None
 
+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30):
+        '''
+        Download a cover and put it into result_queue. The parameters all have
+        the same meaning as for :meth:`identify`.
+
+        This method should use cached cover URLs for efficiency whenever
+        possible. When cached data is not present, most plugins simply call
+        identify and use its results.
+        '''
+        pass
+
     # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 06362cf8b8..9677027662 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
+from Queue import Queue, Empty
 
 from lxml import etree
 
@@ -139,7 +140,7 @@ class GoogleBooks(Source):
     name = 'Google Books'
     description = _('Downloads metadata from Google Books')
 
-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
     touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
         'comments', 'publisher', 'identifier:isbn',
         'identifier:google']) # language currently disabled
@@ -183,6 +184,48 @@ class GoogleBooks(Source):
             return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
                 goog)
 
+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.cover_url_from_identifiers(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            if self.is_cover_image_valid(cdata):
+                result_queue.put(cdata)
+            else:
+                log.error('No cover found for %r'%identifiers)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+
+
+    # }}}
+
+
     def is_cover_image_valid(self, raw):
         # When no cover is present, returns a PNG saying image not available
         # Try for example google identifier llNqPwAACAAJ