diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index b3d435165b..994d73d33b 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1032,7 +1032,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions, # New metadata download plugins {{{ from calibre.ebooks.metadata.sources.google import GoogleBooks from calibre.ebooks.metadata.sources.amazon import Amazon +from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary -plugins += [GoogleBooks, Amazon] +plugins += [GoogleBooks, Amazon, OpenLibrary] # }}} diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index b16fd81243..c180850915 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -468,7 +468,7 @@ class Amazon(Source): if cached_url is not None: break if cached_url is None: - log.info('No cover found for') + log.info('No cover found') return if abort.is_set(): diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 9845007068..8b524a93e7 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -47,12 +47,12 @@ class InternalMetadataCompareKeyGen(object): The algorithm is: - 1. Prefer results that have the same ISBN as specified in the query - 2. Prefer results with all available fields filled in - 3. Prefer results that are an exact title match to the query - 4. Prefer results with longer comments (greater than 10 % longer) - 5. Prefer results with a cached cover URL - 6. Use the relevance of the result as reported by the metadata source's search + * Prefer results that have the same ISBN as specified in the query + * Prefer results with all available fields filled in + * Prefer results that are an exact title match to the query + * Prefer results with a cached cover URL + * Prefer results with longer comments (greater than 10 % longer) + * Use the relevance of the result as reported by the metadata source's search engine ''' @@ -67,9 +67,9 @@ class InternalMetadataCompareKeyGen(object): has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\ is None else 1 - self.base = (isbn, all_fields, exact_title) + self.base = (isbn, all_fields, exact_title, has_cover) self.comments_len = len(mi.comments.strip() if mi.comments else '') - self.extra = (has_cover, getattr(mi, 'source_relevance', 0)) + self.extra = (getattr(mi, 'source_relevance', 0), ) def __cmp__(self, other): result = cmp(self.base, other.base) @@ -130,6 +130,12 @@ class Source(Plugin): # Utility functions {{{ + def get_related_isbns(self, id_): + with self.cache_lock: + for isbn, q in self._isbn_to_identifier_cache.iteritems(): + if q == id_: + yield isbn + def cache_isbn_to_identifier(self, isbn, identifier): with self.cache_lock: self._isbn_to_identifier_cache[isbn] = identifier diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 4d61fdc7ab..b03dccb766 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -25,7 +25,8 @@ from calibre import as_unicode NAMESPACES = { 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', 'atom' : 'http://www.w3.org/2005/Atom', - 'dc': 'http://purl.org/dc/terms' + 'dc' : 'http://purl.org/dc/terms', + 'gd' : 'http://schemas.google.com/g/2005' } XPath = partial(etree.XPath, namespaces=NAMESPACES) @@ -42,6 +43,7 @@ publisher = XPath('descendant::dc:publisher') subject = XPath('descendant::dc:subject') description = XPath('descendant::dc:description') language = XPath('descendant::dc:language') +rating = XPath('descendant::gd:rating[@average]') def get_details(browser, url, timeout): # {{{ try: @@ -114,8 +116,10 @@ def to_metadata(browser, log, entry_, timeout): # {{{ btags = [x.text for x in subject(extra) if x.text] tags = [] for t in btags: - tags.extend([y.strip() for y in t.split('/')]) - tags = list(sorted(list(set(tags)))) + atags = [y.strip() for y in t.split('/')] + for tag in atags: + if tag not in tags: + tags.append(tag) except: log.exception('Failed to parse tags:') tags = [] @@ -131,6 +135,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{ except: log.exception('Failed to parse pubdate') + # Ratings + for x in rating(extra): + try: + mi.rating = float(x.get('average')) + if mi.rating > 5: + mi.rating /= 2 + except: + log.exception('Failed to parse rating') + + # Cover + mi.has_google_cover = len(extra.xpath( + '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0 return mi # }}} @@ -142,9 +158,11 @@ class GoogleBooks(Source): capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', - 'comments', 'publisher', 'identifier:isbn', + 'comments', 'publisher', 'identifier:isbn', 'rating', 'identifier:google']) # language currently disabled + GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1' + def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ BASE_URL = 'http://books.google.com/books/feeds/volumes?' isbn = check_isbn(identifiers.get('isbn', None)) @@ -175,18 +193,9 @@ class GoogleBooks(Source): }) # }}} - def cover_url_from_identifiers(self, identifiers): - goog = identifiers.get('google', None) - if goog is None: - isbn = identifiers.get('isbn', None) - goog = self.cached_isbn_to_identifier(isbn) - if goog is not None: - return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' % - goog) - def download_cover(self, log, result_queue, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30): - cached_url = self.cover_url_from_identifiers(identifiers) + cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') rq = Queue() @@ -215,32 +224,38 @@ class GoogleBooks(Source): br = self.browser try: cdata = br.open_novisit(cached_url, timeout=timeout).read() - if self.is_cover_image_valid(cdata): - result_queue.put(cdata) - else: - log.error('No cover found for %r'%identifiers) + result_queue.put(cdata) except: log.exception('Failed to download cover from:', cached_url) - # }}} + def get_cached_cover_url(self, identifiers): # {{{ + url = None + goog = identifiers.get('google', None) + if goog is None: + isbn = identifiers.get('isbn', None) + if isbn is not None: + goog = self.cached_isbn_to_identifier(isbn) + if goog is not None: + url = self.cached_identifier_to_cover_url(goog) - def is_cover_image_valid(self, raw): - # When no cover is present, returns a PNG saying image not available - # Try for example google identifier llNqPwAACAAJ - # I have yet to see an actual cover in PNG format - return raw and len(raw) > 17000 and raw[1:4] != b'PNG' + return url + # }}} - def get_all_details(self, br, log, entries, abort, result_queue, timeout): + def get_all_details(self, br, log, entries, abort, # {{{ + result_queue, timeout): for relevance, i in enumerate(entries): try: ans = to_metadata(br, log, i, timeout) if isinstance(ans, Metadata): ans.source_relevance = relevance + goog = ans.identifiers['google'] for isbn in getattr(ans, 'all_isbns', []): - self.cache_isbn_to_identifier(isbn, - ans.identifiers['google']) + self.cache_isbn_to_identifier(isbn, goog) + if ans.has_google_cover: + self.cache_identifier_to_cover_url(goog, + self.GOOGLE_COVER%goog) result_queue.put(ans) except: log.exception( @@ -248,6 +263,7 @@ class GoogleBooks(Source): etree.tostring(i)) if abort.is_set(): break + # }}} def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=30): @@ -281,7 +297,7 @@ class GoogleBooks(Source): return None # }}} -if __name__ == '__main__': +if __name__ == '__main__': # tests {{{ # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py from calibre.ebooks.metadata.sources.test import (test_identify_plugin, title_test, authors_test) @@ -296,8 +312,10 @@ if __name__ == '__main__': authors_test(['Francis Scott Fitzgerald'])] ), - #( - # {'title': 'Great Expectations', 'authors':['Charles Dickens']}, - # [title_test('Great Expectations', exact=True)] - #), + ( + {'title': 'Flatland', 'authors':['Abbott']}, + [title_test('Flatland', exact=False)] + ), ]) +# }}} + diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py new file mode 100644 index 0000000000..1fcb33e35f --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/openlibrary.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.ebooks.metadata.sources.base import Source + +class OpenLibrary(Source): + + name = 'Open Library' + description = _('Downloads metadata from The Open Library') + + capabilities = frozenset(['cover']) + + OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' + + def download_cover(self, log, result_queue, abort, + title=None, authors=None, identifiers={}, timeout=30): + if 'isbn' not in identifiers: + return + isbn = identifiers['isbn'] + br = self.browser + try: + ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read() + result_queue.put(ans) + except Exception as e: + if callable(getattr(e, 'getcode', None)) and e.getcode() == 404: + log.error('No cover for ISBN: %r found'%isbn) + else: + log.exception('Failed to download cover for ISBN:', isbn) + diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index d4344c7f49..de95a9b887 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -99,6 +99,8 @@ def test_identify_plugin(name, tests): for i, mi in enumerate(results): prints('*'*30, 'Relevance:', i, '*'*30) prints(mi) + prints('\nCached cover URL :', + plugin.get_cached_cover_url(mi.identifiers)) prints('*'*75, '\n\n') possibles = []