Open library covers plugin migrated. Google plugin adds ratings and can now detect when an entry has a cover

2025-07-09 03:04:10 -04:00 · 2011-03-23 19:10:22 -06:00 · 2011-03-23 19:10:22 -06:00 · d8e1dcf8e5
commit d8e1dcf8e5
parent 2848e0d2f1
6 changed files with 104 additions and 42 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1032,7 +1032,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 # New metadata download plugins {{{
 from calibre.ebooks.metadata.sources.google import GoogleBooks
 from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
-plugins += [GoogleBooks, Amazon]
+plugins += [GoogleBooks, Amazon, OpenLibrary]
 # }}}
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -468,7 +468,7 @@ class Amazon(Source):
                if cached_url is not None:
                    break
        if cached_url is None:
-            log.info('No cover found for')
+            log.info('No cover found')
            return
        if abort.is_set():
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -47,12 +47,12 @@ class InternalMetadataCompareKeyGen(object):
    The algorithm is:
-        1. Prefer results that have the same ISBN as specified in the query
+        * Prefer results that have the same ISBN as specified in the query
-        2. Prefer results with all available fields filled in
+        * Prefer results with all available fields filled in
-        3. Prefer results that are an exact title match to the query
+        * Prefer results that are an exact title match to the query
-        4. Prefer results with longer comments (greater than 10 % longer)
+        * Prefer results with a cached cover URL
-        5. Prefer results with a cached cover URL
+        * Prefer results with longer comments (greater than 10 % longer)
-        6. Use the relevance of the result as reported by the metadata source's search
+        * Use the relevance of the result as reported by the metadata source's search
           engine
    '''
@ -67,9 +67,9 @@ class InternalMetadataCompareKeyGen(object):
        has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\
                is None else 1
-        self.base = (isbn, all_fields, exact_title)
+        self.base = (isbn, all_fields, exact_title, has_cover)
        self.comments_len = len(mi.comments.strip() if mi.comments else '')
-        self.extra = (has_cover, getattr(mi, 'source_relevance', 0))
+        self.extra = (getattr(mi, 'source_relevance', 0), )
    def __cmp__(self, other):
        result = cmp(self.base, other.base)
@ -130,6 +130,12 @@ class Source(Plugin):
    # Utility functions {{{
    def get_related_isbns(self, id_):
        with self.cache_lock:
            for isbn, q in self._isbn_to_identifier_cache.iteritems():
                if q == id_:
                    yield isbn
    def cache_isbn_to_identifier(self, isbn, identifier):
        with self.cache_lock:
            self._isbn_to_identifier_cache[isbn] = identifier
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -25,7 +25,8 @@ from calibre import as_unicode
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
-              'dc': 'http://purl.org/dc/terms'
+              'dc'   : 'http://purl.org/dc/terms',
              'gd'   : 'http://schemas.google.com/g/2005'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
@ -42,6 +43,7 @@ publisher      = XPath('descendant::dc:publisher')
 subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
 rating         = XPath('descendant::gd:rating[@average]')
 def get_details(browser, url, timeout): # {{{
    try:
@ -114,8 +116,10 @@ def to_metadata(browser, log, entry_, timeout): # {{{
        btags = [x.text for x in subject(extra) if x.text]
        tags = []
        for t in btags:
-            tags.extend([y.strip() for y in t.split('/')])
+            atags = [y.strip() for y in t.split('/')]
-        tags = list(sorted(list(set(tags))))
+            for tag in atags:
                if tag not in tags:
                    tags.append(tag)
    except:
        log.exception('Failed to parse tags:')
        tags = []
@ -131,6 +135,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
        except:
            log.exception('Failed to parse pubdate')
    # Ratings
    for x in rating(extra):
        try:
            mi.rating = float(x.get('average'))
            if mi.rating > 5:
                mi.rating /= 2
        except:
            log.exception('Failed to parse rating')
    # Cover
    mi.has_google_cover = len(extra.xpath(
        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
    return mi
 # }}}
@ -142,9 +158,11 @@ class GoogleBooks(Source):
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
-        'comments', 'publisher', 'identifier:isbn',
+        'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:google']) # language currently disabled
    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
@ -175,18 +193,9 @@ class GoogleBooks(Source):
            })
    # }}}
    def cover_url_from_identifiers(self, identifiers):
        goog = identifiers.get('google', None)
        if goog is None:
            isbn = identifiers.get('isbn', None)
            goog = self.cached_isbn_to_identifier(isbn)
        if goog is not None:
            return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
                goog)
    def download_cover(self, log, result_queue, abort, # {{{
            title=None, authors=None, identifiers={}, timeout=30):
-        cached_url = self.cover_url_from_identifiers(identifiers)
+        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
@ -215,32 +224,38 @@ class GoogleBooks(Source):
        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if self.is_cover_image_valid(cdata):
            result_queue.put(cdata)
            else:
                log.error('No cover found for %r'%identifiers)
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
    def get_cached_cover_url(self, identifiers): # {{{
        url = None
        goog = identifiers.get('google', None)
        if goog is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                goog = self.cached_isbn_to_identifier(isbn)
        if goog is not None:
            url = self.cached_identifier_to_cover_url(goog)
-    def is_cover_image_valid(self, raw):
+        return url
-        # When no cover is present, returns a PNG saying image not available
+    # }}}
        # Try for example google identifier llNqPwAACAAJ
        # I have yet to see an actual cover in PNG format
        return raw and len(raw) > 17000 and raw[1:4] != b'PNG'
-    def get_all_details(self, br, log, entries, abort, result_queue, timeout):
+    def get_all_details(self, br, log, entries, abort, # {{{
            result_queue, timeout):
        for relevance, i in enumerate(entries):
            try:
                ans = to_metadata(br, log, i, timeout)
                if isinstance(ans, Metadata):
                    ans.source_relevance = relevance
                    goog = ans.identifiers['google']
                    for isbn in getattr(ans, 'all_isbns', []):
-                        self.cache_isbn_to_identifier(isbn,
+                        self.cache_isbn_to_identifier(isbn, goog)
-                                ans.identifiers['google'])
+                        if ans.has_google_cover:
                            self.cache_identifier_to_cover_url(goog,
                                    self.GOOGLE_COVER%goog)
                    result_queue.put(ans)
            except:
                log.exception(
@ -248,6 +263,7 @@ class GoogleBooks(Source):
                    etree.tostring(i))
            if abort.is_set():
                break
    # }}}
    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
@ -281,7 +297,7 @@ class GoogleBooks(Source):
        return None
    # }}}
-if __name__ == '__main__':
+if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test)
@ -296,8 +312,10 @@ if __name__ == '__main__':
                    authors_test(['Francis Scott Fitzgerald'])]
            ),
-            #(
+            (
-            #    {'title': 'Great Expectations', 'authors':['Charles Dickens']},
+                {'title': 'Flatland', 'authors':['Abbott']},
-            #    [title_test('Great Expectations', exact=True)]
+                [title_test('Flatland', exact=False)]
-            #),
+            ),
    ])
 # }}}
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@ -0,0 +1,35 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.metadata.sources.base import Source
 class OpenLibrary(Source):
    name = 'Open Library'
    description = _('Downloads metadata from The Open Library')
    capabilities = frozenset(['cover'])
    OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        if 'isbn' not in identifiers:
            return
        isbn = identifiers['isbn']
        br = self.browser
        try:
            ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
            result_queue.put(ans)
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                log.error('No cover for ISBN: %r found'%isbn)
            else:
                log.exception('Failed to download cover for ISBN:', isbn)
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -99,6 +99,8 @@ def test_identify_plugin(name, tests):
        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URL    :',
                    plugin.get_cached_cover_url(mi.identifiers))
            prints('*'*75, '\n\n')
        possibles = []