mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Replace LibraryThing cover download plugin with a new plugin to download covers from Amazon
This commit is contained in:
parent
73119e2597
commit
3a2daf39e3
@ -511,14 +511,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
||||||
LibraryThingCovers, DoubanCovers
|
AmazonCovers, DoubanCovers
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||||
|
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||||
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||||
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
|
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
|
||||||
NiceBooksCovers]
|
NiceBooksCovers]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
|
@ -271,6 +271,8 @@ def check_isbn13(isbn):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def check_isbn(isbn):
|
def check_isbn(isbn):
|
||||||
|
if not isbn:
|
||||||
|
return None
|
||||||
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
||||||
if len(isbn) == 10:
|
if len(isbn) == 10:
|
||||||
return check_isbn10(isbn)
|
return check_isbn10(isbn)
|
||||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Fetch metadata using Amazon AWS
|
Fetch metadata using Amazon AWS
|
||||||
'''
|
'''
|
||||||
import sys, re
|
import sys, re
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.library.comments import sanitize_comments_html
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
asin_cache = {}
|
||||||
|
cover_url_cache = {}
|
||||||
|
cache_lock = RLock()
|
||||||
|
|
||||||
def find_asin(br, isbn):
|
def find_asin(br, isbn):
|
||||||
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||||
raw = br.open_novisit(q).read()
|
raw = br.open_novisit(q).read()
|
||||||
@ -29,6 +34,12 @@ def find_asin(br, isbn):
|
|||||||
return revs[0]
|
return revs[0]
|
||||||
|
|
||||||
def to_asin(br, isbn):
|
def to_asin(br, isbn):
|
||||||
|
with cache_lock:
|
||||||
|
ans = asin_cache.get(isbn, None)
|
||||||
|
if ans:
|
||||||
|
return ans
|
||||||
|
if ans is False:
|
||||||
|
return None
|
||||||
if len(isbn) == 13:
|
if len(isbn) == 13:
|
||||||
try:
|
try:
|
||||||
asin = find_asin(br, isbn)
|
asin = find_asin(br, isbn)
|
||||||
@ -38,8 +49,11 @@ def to_asin(br, isbn):
|
|||||||
asin = None
|
asin = None
|
||||||
else:
|
else:
|
||||||
asin = isbn
|
asin = isbn
|
||||||
|
with cache_lock:
|
||||||
|
asin_cache[isbn] = ans if ans else False
|
||||||
return asin
|
return asin
|
||||||
|
|
||||||
|
|
||||||
def get_social_metadata(title, authors, publisher, isbn):
|
def get_social_metadata(title, authors, publisher, isbn):
|
||||||
mi = Metadata(title, authors)
|
mi = Metadata(title, authors)
|
||||||
if not isbn:
|
if not isbn:
|
||||||
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
|
|||||||
return mi
|
return mi
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
def get_cover_url(isbn, br):
|
||||||
|
isbn = check_isbn(isbn)
|
||||||
|
if not isbn:
|
||||||
|
return None
|
||||||
|
with cache_lock:
|
||||||
|
ans = cover_url_cache.get(isbn, None)
|
||||||
|
if ans:
|
||||||
|
return ans
|
||||||
|
if ans is False:
|
||||||
|
return None
|
||||||
|
asin = to_asin(br, isbn)
|
||||||
|
if asin:
|
||||||
|
ans = _get_cover_url(br, asin)
|
||||||
|
if ans:
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = ans
|
||||||
|
return ans
|
||||||
|
from calibre.ebooks.metadata.xisbn import xisbn
|
||||||
|
for i in xisbn.get_associated_isbns(isbn):
|
||||||
|
asin = to_asin(br, i)
|
||||||
|
if asin:
|
||||||
|
ans = _get_cover_url(br, asin)
|
||||||
|
if ans:
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = ans
|
||||||
|
cover_url_cache[i] = ans
|
||||||
|
return ans
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = False
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_cover_url(br, asin):
|
||||||
|
q = 'http://amzn.com/'+asin
|
||||||
|
try:
|
||||||
|
raw = br.open_novisit(q).read()
|
||||||
|
except Exception, e:
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return None
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return None
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
root = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
imgs = root.xpath('//img[@id="prodImage" and @src]')
|
||||||
|
if imgs:
|
||||||
|
src = imgs[0].get('src')
|
||||||
|
parts = src.split('/')
|
||||||
|
if len(parts) > 3:
|
||||||
|
bn = parts[-1]
|
||||||
|
sparts = bn.split('_')
|
||||||
|
if len(sparts) > 2:
|
||||||
|
bn = sparts[0] + sparts[-1]
|
||||||
|
return ('/'.join(parts[:-1]))+'/'+bn
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(br, asin, mi):
|
def get_metadata(br, asin, mi):
|
||||||
q = 'http://amzn.com/'+asin
|
q = 'http://amzn.com/'+asin
|
||||||
try:
|
try:
|
||||||
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
# Test xisbn
|
import tempfile, os
|
||||||
print get_social_metadata('Learning Python', None, None, '8324616489')
|
tdir = tempfile.gettempdir()
|
||||||
print
|
br = browser()
|
||||||
|
for title, isbn in [
|
||||||
|
('Learning Python', '8324616489'), # Test xisbn
|
||||||
|
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
|
||||||
|
# Random tests
|
||||||
|
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
|
||||||
|
('The Great Gatsby', '0743273567'),
|
||||||
|
]:
|
||||||
|
cpath = os.path.join(tdir, title+'.jpg')
|
||||||
|
curl = get_cover_url(isbn, br)
|
||||||
|
if curl is None:
|
||||||
|
print 'No cover found for', title
|
||||||
|
else:
|
||||||
|
open(cpath, 'wb').write(br.open_novisit(curl).read())
|
||||||
|
print 'Cover for', title, 'saved to', cpath
|
||||||
|
|
||||||
# Test sophisticated comment formatting
|
print get_social_metadata(title, None, None, isbn)
|
||||||
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
|
|
||||||
print
|
|
||||||
|
|
||||||
# Random tests
|
|
||||||
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
|
|
||||||
print
|
|
||||||
print get_social_metadata('The Great Gatsby', None, None, '0743273567')
|
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import traceback, socket, re, sys
|
import traceback, socket, sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
@ -15,7 +15,6 @@ import mechanize
|
|||||||
|
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
from calibre import browser, prints
|
from calibre import browser, prints
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.constants import preferred_encoding, DEBUG
|
from calibre.constants import preferred_encoding, DEBUG
|
||||||
|
|
||||||
class CoverDownload(Plugin):
|
class CoverDownload(Plugin):
|
||||||
@ -112,73 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class LibraryThingCovers(CoverDownload): # {{{
|
class AmazonCovers(CoverDownload): # {{{
|
||||||
|
|
||||||
name = 'librarything.com covers'
|
name = 'amazon.com covers'
|
||||||
description = _('Download covers from librarything.com')
|
description = _('Download covers from amazon.com')
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
LIBRARYTHING = 'http://www.librarything.com/isbn/'
|
|
||||||
|
|
||||||
def get_cover_url(self, isbn, br, timeout=5.):
|
|
||||||
|
|
||||||
try:
|
|
||||||
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
|
|
||||||
timeout=timeout).read().decode('utf-8', 'replace')
|
|
||||||
except Exception, err:
|
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
|
||||||
err = Exception(_('LibraryThing.com timed out. Try again later.'))
|
|
||||||
raise err
|
|
||||||
else:
|
|
||||||
if '/wiki/index.php/HelpThing:Verify' in src:
|
|
||||||
raise Exception('LibraryThing is blocking calibre.')
|
|
||||||
s = BeautifulSoup(src)
|
|
||||||
url = s.find('td', attrs={'class':'left'})
|
|
||||||
if url is None:
|
|
||||||
if s.find('div', attrs={'class':'highloadwarning'}) is not None:
|
|
||||||
raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
|
|
||||||
raise Exception(_('ISBN: %s not found')%isbn)
|
|
||||||
url = url.find('img')
|
|
||||||
if url is None:
|
|
||||||
raise Exception(_('LibraryThing.com server error. Try again later.'))
|
|
||||||
url = re.sub(r'_S[XY]\d+', '', url['src'])
|
|
||||||
return url
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
def has_cover(self, mi, ans, timeout=5.):
|
||||||
return False
|
if not mi.isbn:
|
||||||
if not mi.isbn or not self.site_customization:
|
|
||||||
return False
|
return False
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser, login
|
from calibre.ebooks.metadata.amazon import get_cover_url
|
||||||
br = get_browser()
|
br = browser()
|
||||||
un, _, pw = self.site_customization.partition(':')
|
|
||||||
login(br, un, pw)
|
|
||||||
try:
|
try:
|
||||||
self.get_cover_url(mi.isbn, br, timeout=timeout)
|
get_cover_url(mi.isbn, br)
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
self.debug('cover for', mi.isbn, 'found')
|
||||||
ans.set()
|
ans.set()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.debug(e)
|
self.debug(e)
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
||||||
if not mi.isbn or not self.site_customization:
|
if not mi.isbn:
|
||||||
return
|
return
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser, login
|
from calibre.ebooks.metadata.amazon import get_cover_url
|
||||||
br = get_browser()
|
br = browser()
|
||||||
un, _, pw = self.site_customization.partition(':')
|
|
||||||
login(br, un, pw)
|
|
||||||
try:
|
try:
|
||||||
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
|
url = get_cover_url(mi.isbn, br)
|
||||||
cover_data = br.open_novisit(url).read()
|
cover_data = br.open_novisit(url).read()
|
||||||
result_queue.put((True, cover_data, 'jpg', self.name))
|
result_queue.put((True, cover_data, 'jpg', self.name))
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
result_queue.put((False, self.exception_to_string(e),
|
||||||
traceback.format_exc(), self.name))
|
traceback.format_exc(), self.name))
|
||||||
|
|
||||||
def customization_help(self, gui=False):
|
|
||||||
ans = _('To use librarything.com you must sign up for a %sfree account%s '
|
|
||||||
'and enter your username and password separated by a : below.')
|
|
||||||
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def check_for_cover(mi, timeout=5.): # {{{
|
def check_for_cover(mi, timeout=5.): # {{{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user