mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Open library covers plugin migrated. Google plugin adds ratings and can now detect when an entry has a cover
This commit is contained in:
parent
2848e0d2f1
commit
d8e1dcf8e5
@ -1032,7 +1032,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
|
||||
plugins += [GoogleBooks, Amazon]
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary]
|
||||
|
||||
# }}}
|
||||
|
@ -468,7 +468,7 @@ class Amazon(Source):
|
||||
if cached_url is not None:
|
||||
break
|
||||
if cached_url is None:
|
||||
log.info('No cover found for')
|
||||
log.info('No cover found')
|
||||
return
|
||||
|
||||
if abort.is_set():
|
||||
|
@ -47,12 +47,12 @@ class InternalMetadataCompareKeyGen(object):
|
||||
|
||||
The algorithm is:
|
||||
|
||||
1. Prefer results that have the same ISBN as specified in the query
|
||||
2. Prefer results with all available fields filled in
|
||||
3. Prefer results that are an exact title match to the query
|
||||
4. Prefer results with longer comments (greater than 10 % longer)
|
||||
5. Prefer results with a cached cover URL
|
||||
6. Use the relevance of the result as reported by the metadata source's search
|
||||
* Prefer results that have the same ISBN as specified in the query
|
||||
* Prefer results with all available fields filled in
|
||||
* Prefer results that are an exact title match to the query
|
||||
* Prefer results with a cached cover URL
|
||||
* Prefer results with longer comments (greater than 10 % longer)
|
||||
* Use the relevance of the result as reported by the metadata source's search
|
||||
engine
|
||||
'''
|
||||
|
||||
@ -67,9 +67,9 @@ class InternalMetadataCompareKeyGen(object):
|
||||
has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\
|
||||
is None else 1
|
||||
|
||||
self.base = (isbn, all_fields, exact_title)
|
||||
self.base = (isbn, all_fields, exact_title, has_cover)
|
||||
self.comments_len = len(mi.comments.strip() if mi.comments else '')
|
||||
self.extra = (has_cover, getattr(mi, 'source_relevance', 0))
|
||||
self.extra = (getattr(mi, 'source_relevance', 0), )
|
||||
|
||||
def __cmp__(self, other):
|
||||
result = cmp(self.base, other.base)
|
||||
@ -130,6 +130,12 @@ class Source(Plugin):
|
||||
|
||||
# Utility functions {{{
|
||||
|
||||
def get_related_isbns(self, id_):
|
||||
with self.cache_lock:
|
||||
for isbn, q in self._isbn_to_identifier_cache.iteritems():
|
||||
if q == id_:
|
||||
yield isbn
|
||||
|
||||
def cache_isbn_to_identifier(self, isbn, identifier):
|
||||
with self.cache_lock:
|
||||
self._isbn_to_identifier_cache[isbn] = identifier
|
||||
|
@ -25,7 +25,8 @@ from calibre import as_unicode
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
'atom' : 'http://www.w3.org/2005/Atom',
|
||||
'dc': 'http://purl.org/dc/terms'
|
||||
'dc' : 'http://purl.org/dc/terms',
|
||||
'gd' : 'http://schemas.google.com/g/2005'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
|
||||
@ -42,6 +43,7 @@ publisher = XPath('descendant::dc:publisher')
|
||||
subject = XPath('descendant::dc:subject')
|
||||
description = XPath('descendant::dc:description')
|
||||
language = XPath('descendant::dc:language')
|
||||
rating = XPath('descendant::gd:rating[@average]')
|
||||
|
||||
def get_details(browser, url, timeout): # {{{
|
||||
try:
|
||||
@ -114,8 +116,10 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
btags = [x.text for x in subject(extra) if x.text]
|
||||
tags = []
|
||||
for t in btags:
|
||||
tags.extend([y.strip() for y in t.split('/')])
|
||||
tags = list(sorted(list(set(tags))))
|
||||
atags = [y.strip() for y in t.split('/')]
|
||||
for tag in atags:
|
||||
if tag not in tags:
|
||||
tags.append(tag)
|
||||
except:
|
||||
log.exception('Failed to parse tags:')
|
||||
tags = []
|
||||
@ -131,6 +135,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
except:
|
||||
log.exception('Failed to parse pubdate')
|
||||
|
||||
# Ratings
|
||||
for x in rating(extra):
|
||||
try:
|
||||
mi.rating = float(x.get('average'))
|
||||
if mi.rating > 5:
|
||||
mi.rating /= 2
|
||||
except:
|
||||
log.exception('Failed to parse rating')
|
||||
|
||||
# Cover
|
||||
mi.has_google_cover = len(extra.xpath(
|
||||
'//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
|
||||
|
||||
return mi
|
||||
# }}}
|
||||
@ -142,9 +158,11 @@ class GoogleBooks(Source):
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||
'comments', 'publisher', 'identifier:isbn',
|
||||
'comments', 'publisher', 'identifier:isbn', 'rating',
|
||||
'identifier:google']) # language currently disabled
|
||||
|
||||
GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||
isbn = check_isbn(identifiers.get('isbn', None))
|
||||
@ -175,18 +193,9 @@ class GoogleBooks(Source):
|
||||
})
|
||||
# }}}
|
||||
|
||||
def cover_url_from_identifiers(self, identifiers):
|
||||
goog = identifiers.get('google', None)
|
||||
if goog is None:
|
||||
isbn = identifiers.get('isbn', None)
|
||||
goog = self.cached_isbn_to_identifier(isbn)
|
||||
if goog is not None:
|
||||
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
|
||||
goog)
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
cached_url = self.cover_url_from_identifiers(identifiers)
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
if cached_url is None:
|
||||
log.info('No cached cover found, running identify')
|
||||
rq = Queue()
|
||||
@ -215,32 +224,38 @@ class GoogleBooks(Source):
|
||||
br = self.browser
|
||||
try:
|
||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||
if self.is_cover_image_valid(cdata):
|
||||
result_queue.put(cdata)
|
||||
else:
|
||||
log.error('No cover found for %r'%identifiers)
|
||||
result_queue.put(cdata)
|
||||
except:
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
url = None
|
||||
goog = identifiers.get('google', None)
|
||||
if goog is None:
|
||||
isbn = identifiers.get('isbn', None)
|
||||
if isbn is not None:
|
||||
goog = self.cached_isbn_to_identifier(isbn)
|
||||
if goog is not None:
|
||||
url = self.cached_identifier_to_cover_url(goog)
|
||||
|
||||
def is_cover_image_valid(self, raw):
|
||||
# When no cover is present, returns a PNG saying image not available
|
||||
# Try for example google identifier llNqPwAACAAJ
|
||||
# I have yet to see an actual cover in PNG format
|
||||
return raw and len(raw) > 17000 and raw[1:4] != b'PNG'
|
||||
return url
|
||||
# }}}
|
||||
|
||||
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
|
||||
def get_all_details(self, br, log, entries, abort, # {{{
|
||||
result_queue, timeout):
|
||||
for relevance, i in enumerate(entries):
|
||||
try:
|
||||
ans = to_metadata(br, log, i, timeout)
|
||||
if isinstance(ans, Metadata):
|
||||
ans.source_relevance = relevance
|
||||
goog = ans.identifiers['google']
|
||||
for isbn in getattr(ans, 'all_isbns', []):
|
||||
self.cache_isbn_to_identifier(isbn,
|
||||
ans.identifiers['google'])
|
||||
self.cache_isbn_to_identifier(isbn, goog)
|
||||
if ans.has_google_cover:
|
||||
self.cache_identifier_to_cover_url(goog,
|
||||
self.GOOGLE_COVER%goog)
|
||||
result_queue.put(ans)
|
||||
except:
|
||||
log.exception(
|
||||
@ -248,6 +263,7 @@ class GoogleBooks(Source):
|
||||
etree.tostring(i))
|
||||
if abort.is_set():
|
||||
break
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
@ -281,7 +297,7 @@ class GoogleBooks(Source):
|
||||
return None
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == '__main__': # tests {{{
|
||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test, authors_test)
|
||||
@ -296,8 +312,10 @@ if __name__ == '__main__':
|
||||
authors_test(['Francis Scott Fitzgerald'])]
|
||||
),
|
||||
|
||||
#(
|
||||
# {'title': 'Great Expectations', 'authors':['Charles Dickens']},
|
||||
# [title_test('Great Expectations', exact=True)]
|
||||
#),
|
||||
(
|
||||
{'title': 'Flatland', 'authors':['Abbott']},
|
||||
[title_test('Flatland', exact=False)]
|
||||
),
|
||||
])
|
||||
# }}}
|
||||
|
||||
|
35
src/calibre/ebooks/metadata/sources/openlibrary.py
Normal file
35
src/calibre/ebooks/metadata/sources/openlibrary.py
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
|
||||
class OpenLibrary(Source):
|
||||
|
||||
name = 'Open Library'
|
||||
description = _('Downloads metadata from The Open Library')
|
||||
|
||||
capabilities = frozenset(['cover'])
|
||||
|
||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||
|
||||
def download_cover(self, log, result_queue, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
if 'isbn' not in identifiers:
|
||||
return
|
||||
isbn = identifiers['isbn']
|
||||
br = self.browser
|
||||
try:
|
||||
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
|
||||
result_queue.put(ans)
|
||||
except Exception as e:
|
||||
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
|
||||
log.error('No cover for ISBN: %r found'%isbn)
|
||||
else:
|
||||
log.exception('Failed to download cover for ISBN:', isbn)
|
||||
|
@ -99,6 +99,8 @@ def test_identify_plugin(name, tests):
|
||||
for i, mi in enumerate(results):
|
||||
prints('*'*30, 'Relevance:', i, '*'*30)
|
||||
prints(mi)
|
||||
prints('\nCached cover URL :',
|
||||
plugin.get_cached_cover_url(mi.identifiers))
|
||||
prints('*'*75, '\n\n')
|
||||
|
||||
possibles = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user