Improve downloading of covers from the google books metadata source

2025-07-09 03:04:10 -04:00 · 2017-02-28 14:19:19 +05:30 · 2017-02-28 14:19:19 +05:30 · c525f5587f
commit c525f5587f
parent b5010e7060
1 changed files with 19 additions and 14 deletions
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -59,6 +59,7 @@ def to_metadata(browser, log, entry_, timeout):  # {{{
    description = XPath('descendant::dc:description')
    language = XPath('descendant::dc:language')
    rating = XPath('descendant::gd:rating[@average]')
    # print(etree.tostring(entry_, pretty_print=True))
    def get_text(extra, x):
        try:
@ -170,14 +171,14 @@ class GoogleBooks(Source):
    capabilities = frozenset({'identify', 'cover'})
    touched_fields = frozenset({
        'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher',
-        'identifier:isbn', 'rating', 'identifier:google', 'languages'
+        'identifier:isbn', 'identifier:google', 'languages'
    })
    supports_gzip_transfer_encoding = True
    cached_cover_url_is_reliable = False
    GOOGLE_COVER = 'https://books.google.com/books?id=%s&printsec=frontcover&img=1'
-    DUMMY_IMAGE_MD5 = frozenset({'0de4383ebad0adad5eeb8975cd796657'})
+    DUMMY_IMAGE_MD5 = frozenset({'0de4383ebad0adad5eeb8975cd796657', 'a64fa89d7ebc97075c1d363fc5fea71f'})
    def get_book_url(self, identifiers):  # {{{
        goog = identifiers.get('google', None)
@ -262,19 +263,22 @@ class GoogleBooks(Source):
            log.info('No cover found')
            return
        if abort.is_set():
            return
        br = self.browser
-        log('Downloading cover from:', cached_url)
+        for candidate in (0, 1):
-        try:
+            if abort.is_set():
-            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+                return
-            if cdata:
+            url = cached_url + '&zoom={}'.format(candidate)
-                if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
+            log('Downloading cover from:', cached_url)
-                    log.warning('Google returned a dummy image, ignoring')
+            try:
-                else:
+                cdata = br.open_novisit(url, timeout=timeout).read()
-                    result_queue.put((self, cdata))
+                if cdata:
-        except:
+                    if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
-            log.exception('Failed to download cover from:', cached_url)
+                        log.warning('Google returned a dummy image, ignoring')
                    else:
                        result_queue.put((self, cdata))
                        break
            except Exception:
                log.exception('Failed to download cover from:', cached_url)
    # }}}
@ -406,4 +410,5 @@ if __name__ == '__main__':  # tests {{{
            }, [title_test('Flatland', exact=False)]),
        ]
    )
 # }}}