diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 3ccc07040b..1dd575f45b 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -511,14 +511,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
- LibraryThingCovers, DoubanCovers
+ AmazonCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
- Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
+ Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers]
plugins += [
ComicInput,
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index fcd4491fd3..6078a0aa94 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -271,6 +271,8 @@ def check_isbn13(isbn):
return None
def check_isbn(isbn):
+ if not isbn:
+ return None
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if len(isbn) == 10:
return check_isbn10(isbn)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index cf96c9732c..98a2ac6d36 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
Fetch metadata using Amazon AWS
'''
import sys, re
+from threading import RLock
from lxml import html
from lxml.html import soupparser
@@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
+asin_cache = {}
+cover_url_cache = {}
+cache_lock = RLock()
+
def find_asin(br, isbn):
q = 'http://www.amazon.com/s?field-keywords='+isbn
raw = br.open_novisit(q).read()
@@ -29,6 +34,12 @@ def find_asin(br, isbn):
return revs[0]
def to_asin(br, isbn):
+ with cache_lock:
+ ans = asin_cache.get(isbn, None)
+ if ans:
+ return ans
+ if ans is False:
+ return None
if len(isbn) == 13:
try:
asin = find_asin(br, isbn)
@@ -38,8 +49,11 @@ def to_asin(br, isbn):
asin = None
else:
asin = isbn
+ with cache_lock:
+ asin_cache[isbn] = ans if ans else False
return asin
+
def get_social_metadata(title, authors, publisher, isbn):
mi = Metadata(title, authors)
if not isbn:
@@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
return mi
return mi
+def get_cover_url(isbn, br):
+ isbn = check_isbn(isbn)
+ if not isbn:
+ return None
+ with cache_lock:
+ ans = cover_url_cache.get(isbn, None)
+ if ans:
+ return ans
+ if ans is False:
+ return None
+ asin = to_asin(br, isbn)
+ if asin:
+ ans = _get_cover_url(br, asin)
+ if ans:
+ with cache_lock:
+ cover_url_cache[isbn] = ans
+ return ans
+ from calibre.ebooks.metadata.xisbn import xisbn
+ for i in xisbn.get_associated_isbns(isbn):
+ asin = to_asin(br, i)
+ if asin:
+ ans = _get_cover_url(br, asin)
+ if ans:
+ with cache_lock:
+ cover_url_cache[isbn] = ans
+ cover_url_cache[i] = ans
+ return ans
+ with cache_lock:
+ cover_url_cache[isbn] = False
+ return None
+
+def _get_cover_url(br, asin):
+ q = 'http://amzn.com/'+asin
+ try:
+ raw = br.open_novisit(q).read()
+ except Exception, e:
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ raise
+ if '
404 - ' in raw:
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ root = soupparser.fromstring(raw)
+ except:
+ return False
+
+ imgs = root.xpath('//img[@id="prodImage" and @src]')
+ if imgs:
+ src = imgs[0].get('src')
+ parts = src.split('/')
+ if len(parts) > 3:
+ bn = parts[-1]
+ sparts = bn.split('_')
+ if len(sparts) > 2:
+ bn = sparts[0] + sparts[-1]
+ return ('/'.join(parts[:-1]))+'/'+bn
+ return None
+
+
def get_metadata(br, asin, mi):
q = 'http://amzn.com/'+asin
try:
@@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
def main(args=sys.argv):
- # Test xisbn
- print get_social_metadata('Learning Python', None, None, '8324616489')
- print
+ import tempfile, os
+ tdir = tempfile.gettempdir()
+ br = browser()
+ for title, isbn in [
+ ('Learning Python', '8324616489'), # Test xisbn
+ ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
+ # Random tests
+ ('Star Trek: Destiny: Mere Mortals', '9781416551720'),
+ ('The Great Gatsby', '0743273567'),
+ ]:
+ cpath = os.path.join(tdir, title+'.jpg')
+ curl = get_cover_url(isbn, br)
+ if curl is None:
+ print 'No cover found for', title
+ else:
+ open(cpath, 'wb').write(br.open_novisit(curl).read())
+ print 'Cover for', title, 'saved to', cpath
- # Test sophisticated comment formatting
- print get_social_metadata('Angels & Demons', None, None, '9781416580829')
- print
-
- # Random tests
- print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
- print
- print get_social_metadata('The Great Gatsby', None, None, '0743273567')
+ print get_social_metadata(title, None, None, isbn)
return 0
diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index 3deb54da10..15e0a05c1e 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import traceback, socket, re, sys
+import traceback, socket, sys
from functools import partial
from threading import Thread, Event
from Queue import Queue, Empty
@@ -15,7 +15,6 @@ import mechanize
from calibre.customize import Plugin
from calibre import browser, prints
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.constants import preferred_encoding, DEBUG
class CoverDownload(Plugin):
@@ -112,73 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
# }}}
-class LibraryThingCovers(CoverDownload): # {{{
+class AmazonCovers(CoverDownload): # {{{
- name = 'librarything.com covers'
- description = _('Download covers from librarything.com')
+ name = 'amazon.com covers'
+ description = _('Download covers from amazon.com')
author = 'Kovid Goyal'
- LIBRARYTHING = 'http://www.librarything.com/isbn/'
-
- def get_cover_url(self, isbn, br, timeout=5.):
-
- try:
- src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
- timeout=timeout).read().decode('utf-8', 'replace')
- except Exception, err:
- if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
- err = Exception(_('LibraryThing.com timed out. Try again later.'))
- raise err
- else:
- if '/wiki/index.php/HelpThing:Verify' in src:
- raise Exception('LibraryThing is blocking calibre.')
- s = BeautifulSoup(src)
- url = s.find('td', attrs={'class':'left'})
- if url is None:
- if s.find('div', attrs={'class':'highloadwarning'}) is not None:
- raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
- raise Exception(_('ISBN: %s not found')%isbn)
- url = url.find('img')
- if url is None:
- raise Exception(_('LibraryThing.com server error. Try again later.'))
- url = re.sub(r'_S[XY]\d+', '', url['src'])
- return url
def has_cover(self, mi, ans, timeout=5.):
- return False
- if not mi.isbn or not self.site_customization:
+ if not mi.isbn:
return False
- from calibre.ebooks.metadata.library_thing import get_browser, login
- br = get_browser()
- un, _, pw = self.site_customization.partition(':')
- login(br, un, pw)
+ from calibre.ebooks.metadata.amazon import get_cover_url
+ br = browser()
try:
- self.get_cover_url(mi.isbn, br, timeout=timeout)
+ get_cover_url(mi.isbn, br)
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception, e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
- if not mi.isbn or not self.site_customization:
+ if not mi.isbn:
return
- from calibre.ebooks.metadata.library_thing import get_browser, login
- br = get_browser()
- un, _, pw = self.site_customization.partition(':')
- login(br, un, pw)
+ from calibre.ebooks.metadata.amazon import get_cover_url
+ br = browser()
try:
- url = self.get_cover_url(mi.isbn, br, timeout=timeout)
+ url = get_cover_url(mi.isbn, br)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception, e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
- def customization_help(self, gui=False):
- ans = _('To use librarything.com you must sign up for a %sfree account%s '
- 'and enter your username and password separated by a : below.')
- return ''+ans%('', '')
-
# }}}
def check_for_cover(mi, timeout=5.): # {{{