Open library covers plugin migrated. Google plugin adds ratings and can now detect when an entry has a cover

This commit is contained in:
Kovid Goyal 2011-03-23 19:10:22 -06:00
parent 2848e0d2f1
commit d8e1dcf8e5
6 changed files with 104 additions and 42 deletions

View File

@ -1032,7 +1032,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
plugins += [GoogleBooks, Amazon]
plugins += [GoogleBooks, Amazon, OpenLibrary]
# }}}

View File

@ -468,7 +468,7 @@ class Amazon(Source):
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found for')
log.info('No cover found')
return
if abort.is_set():

View File

@ -47,12 +47,12 @@ class InternalMetadataCompareKeyGen(object):
The algorithm is:
1. Prefer results that have the same ISBN as specified in the query
2. Prefer results with all available fields filled in
3. Prefer results that are an exact title match to the query
4. Prefer results with longer comments (greater than 10 % longer)
5. Prefer results with a cached cover URL
6. Use the relevance of the result as reported by the metadata source's search
* Prefer results that have the same ISBN as specified in the query
* Prefer results with all available fields filled in
* Prefer results that are an exact title match to the query
* Prefer results with a cached cover URL
* Prefer results with longer comments (greater than 10 % longer)
* Use the relevance of the result as reported by the metadata source's search
engine
'''
@ -67,9 +67,9 @@ class InternalMetadataCompareKeyGen(object):
has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\
is None else 1
self.base = (isbn, all_fields, exact_title)
self.base = (isbn, all_fields, exact_title, has_cover)
self.comments_len = len(mi.comments.strip() if mi.comments else '')
self.extra = (has_cover, getattr(mi, 'source_relevance', 0))
self.extra = (getattr(mi, 'source_relevance', 0), )
def __cmp__(self, other):
result = cmp(self.base, other.base)
@ -130,6 +130,12 @@ class Source(Plugin):
# Utility functions {{{
def get_related_isbns(self, id_):
with self.cache_lock:
for isbn, q in self._isbn_to_identifier_cache.iteritems():
if q == id_:
yield isbn
def cache_isbn_to_identifier(self, isbn, identifier):
with self.cache_lock:
self._isbn_to_identifier_cache[isbn] = identifier

View File

@ -25,7 +25,8 @@ from calibre import as_unicode
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'dc': 'http://purl.org/dc/terms'
'dc' : 'http://purl.org/dc/terms',
'gd' : 'http://schemas.google.com/g/2005'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
@ -42,6 +43,7 @@ publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
rating = XPath('descendant::gd:rating[@average]')
def get_details(browser, url, timeout): # {{{
try:
@ -114,8 +116,10 @@ def to_metadata(browser, log, entry_, timeout): # {{{
btags = [x.text for x in subject(extra) if x.text]
tags = []
for t in btags:
tags.extend([y.strip() for y in t.split('/')])
tags = list(sorted(list(set(tags))))
atags = [y.strip() for y in t.split('/')]
for tag in atags:
if tag not in tags:
tags.append(tag)
except:
log.exception('Failed to parse tags:')
tags = []
@ -131,6 +135,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
except:
log.exception('Failed to parse pubdate')
# Ratings
for x in rating(extra):
try:
mi.rating = float(x.get('average'))
if mi.rating > 5:
mi.rating /= 2
except:
log.exception('Failed to parse rating')
# Cover
mi.has_google_cover = len(extra.xpath(
'//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
return mi
# }}}
@ -142,9 +158,11 @@ class GoogleBooks(Source):
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'identifier:isbn',
'comments', 'publisher', 'identifier:isbn', 'rating',
'identifier:google']) # language currently disabled
GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
isbn = check_isbn(identifiers.get('isbn', None))
@ -175,18 +193,9 @@ class GoogleBooks(Source):
})
# }}}
def cover_url_from_identifiers(self, identifiers):
goog = identifiers.get('google', None)
if goog is None:
isbn = identifiers.get('isbn', None)
goog = self.cached_isbn_to_identifier(isbn)
if goog is not None:
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
goog)
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.cover_url_from_identifiers(identifiers)
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
@ -215,32 +224,38 @@ class GoogleBooks(Source):
br = self.browser
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
if self.is_cover_image_valid(cdata):
result_queue.put(cdata)
else:
log.error('No cover found for %r'%identifiers)
result_queue.put(cdata)
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def get_cached_cover_url(self, identifiers): # {{{
url = None
goog = identifiers.get('google', None)
if goog is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
goog = self.cached_isbn_to_identifier(isbn)
if goog is not None:
url = self.cached_identifier_to_cover_url(goog)
def is_cover_image_valid(self, raw):
# When no cover is present, returns a PNG saying image not available
# Try for example google identifier llNqPwAACAAJ
# I have yet to see an actual cover in PNG format
return raw and len(raw) > 17000 and raw[1:4] != b'PNG'
return url
# }}}
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout):
for relevance, i in enumerate(entries):
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
ans.source_relevance = relevance
goog = ans.identifiers['google']
for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn,
ans.identifiers['google'])
self.cache_isbn_to_identifier(isbn, goog)
if ans.has_google_cover:
self.cache_identifier_to_cover_url(goog,
self.GOOGLE_COVER%goog)
result_queue.put(ans)
except:
log.exception(
@ -248,6 +263,7 @@ class GoogleBooks(Source):
etree.tostring(i))
if abort.is_set():
break
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
@ -281,7 +297,7 @@ class GoogleBooks(Source):
return None
# }}}
if __name__ == '__main__':
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
@ -296,8 +312,10 @@ if __name__ == '__main__':
authors_test(['Francis Scott Fitzgerald'])]
),
#(
# {'title': 'Great Expectations', 'authors':['Charles Dickens']},
# [title_test('Great Expectations', exact=True)]
#),
(
{'title': 'Flatland', 'authors':['Abbott']},
[title_test('Flatland', exact=False)]
),
])
# }}}

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source
class OpenLibrary(Source):
name = 'Open Library'
description = _('Downloads metadata from The Open Library')
capabilities = frozenset(['cover'])
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30):
if 'isbn' not in identifiers:
return
isbn = identifiers['isbn']
br = self.browser
try:
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
result_queue.put(ans)
except Exception as e:
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
log.error('No cover for ISBN: %r found'%isbn)
else:
log.exception('Failed to download cover for ISBN:', isbn)

View File

@ -99,6 +99,8 @@ def test_identify_plugin(name, tests):
for i, mi in enumerate(results):
prints('*'*30, 'Relevance:', i, '*'*30)
prints(mi)
prints('\nCached cover URL :',
plugin.get_cached_cover_url(mi.identifiers))
prints('*'*75, '\n\n')
possibles = []