Only use LibraryThing to download metadata if the user provides a library thing username and password

This commit is contained in:
Kovid Goyal 2011-01-26 17:20:49 -07:00
parent 81c73dd0ab
commit dc9500a27e
3 changed files with 91 additions and 63 deletions

View File

@ -121,6 +121,7 @@ class LibraryThingCovers(CoverDownload): # {{{
LIBRARYTHING = 'http://www.librarything.com/isbn/' LIBRARYTHING = 'http://www.librarything.com/isbn/'
def get_cover_url(self, isbn, br, timeout=5.): def get_cover_url(self, isbn, br, timeout=5.):
try: try:
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn, src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
timeout=timeout).read().decode('utf-8', 'replace') timeout=timeout).read().decode('utf-8', 'replace')
@ -129,6 +130,8 @@ class LibraryThingCovers(CoverDownload): # {{{
err = Exception(_('LibraryThing.com timed out. Try again later.')) err = Exception(_('LibraryThing.com timed out. Try again later.'))
raise err raise err
else: else:
if '/wiki/index.php/HelpThing:Verify' in src:
raise Exception('LibraryThing is blocking your computer.')
s = BeautifulSoup(src) s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'}) url = s.find('td', attrs={'class':'left'})
if url is None: if url is None:
@ -142,9 +145,12 @@ class LibraryThingCovers(CoverDownload): # {{{
return url return url
def has_cover(self, mi, ans, timeout=5.): def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn: if not mi.isbn or not self.site_customization:
return False return False
br = browser() from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
try: try:
self.get_cover_url(mi.isbn, br, timeout=timeout) self.get_cover_url(mi.isbn, br, timeout=timeout)
self.debug('cover for', mi.isbn, 'found') self.debug('cover for', mi.isbn, 'found')
@ -153,9 +159,12 @@ class LibraryThingCovers(CoverDownload): # {{{
self.debug(e) self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.): def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn: if not mi.isbn or not self.site_customization:
return return
br = browser() from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
try: try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout) url = self.get_cover_url(mi.isbn, br, timeout=timeout)
cover_data = br.open_novisit(url).read() cover_data = br.open_novisit(url).read()
@ -164,6 +173,11 @@ class LibraryThingCovers(CoverDownload): # {{{
result_queue.put((False, self.exception_to_string(e), result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name)) traceback.format_exc(), self.name))
def customization_help(self, gui=False):
ans = _('To use librarything.com you must sign up for a %sfree account%s '
'and enter your username and password separated by a : below.')
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
# }}} # }}}
def check_for_cover(mi, timeout=5.): # {{{ def check_for_cover(mi, timeout=5.): # {{{

View File

@ -251,19 +251,26 @@ class LibraryThing(MetadataSource): # {{{
name = 'LibraryThing' name = 'LibraryThing'
metadata_type = 'social' metadata_type = 'social'
description = _('Downloads series/tags/rating information from librarything.com') description = _('Downloads series/covers/rating information from librarything.com')
def fetch(self): def fetch(self):
if not self.isbn: if not self.isbn or not self.site_customization:
return return
from calibre.ebooks.metadata.library_thing import get_social_metadata from calibre.ebooks.metadata.library_thing import get_social_metadata
un, _, pw = self.site_customization.partition(':')
try: try:
self.results = get_social_metadata(self.title, self.book_author, self.results = get_social_metadata(self.title, self.book_author,
self.publisher, self.isbn) self.publisher, self.isbn, username=un, password=pw)
except Exception, e: except Exception, e:
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use librarything.com you must sign up for a %sfree account%s '
'and enter your username and password separated by a : below.')
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
# }}} # }}}

View File

@ -4,14 +4,13 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch cover from LibraryThing.com based on ISBN number. Fetch cover from LibraryThing.com based on ISBN number.
''' '''
import sys, socket, os, re, random import sys, re, random
from lxml import html from lxml import html
import mechanize import mechanize
from calibre import browser, prints from calibre import browser, prints
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import strip_encoding_declarations from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -28,6 +27,12 @@ def get_ua():
] ]
return choices[random.randint(0, len(choices)-1)] return choices[random.randint(0, len(choices)-1)]
_lt_br = None
def get_browser():
global _lt_br
if _lt_br is None:
_lt_br = browser(user_agent=get_ua())
return _lt_br.clone_browser()
class HeadRequest(mechanize.Request): class HeadRequest(mechanize.Request):
@ -35,7 +40,7 @@ class HeadRequest(mechanize.Request):
return 'HEAD' return 'HEAD'
def check_for_cover(isbn, timeout=5.): def check_for_cover(isbn, timeout=5.):
br = browser(user_agent=get_ua()) br = get_browser()
br.set_handle_redirect(False) br.set_handle_redirect(False)
try: try:
br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout) br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
@ -54,46 +59,16 @@ class ISBNNotFound(LibraryThingError):
class ServerBusy(LibraryThingError): class ServerBusy(LibraryThingError):
pass pass
def login(br, username, password, force=True): def login(br, username, password):
br.open('http://www.librarything.com') raw = br.open('http://www.librarything.com').read()
if '>Sign out' in raw:
return
br.select_form('signup') br.select_form('signup')
br['formusername'] = username br['formusername'] = username
br['formpassword'] = password br['formpassword'] = password
br.submit() raw = br.submit().read()
if '>Sign out' not in raw:
raise ValueError('Failed to login as %r:%r'%(username, password))
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
src = None
br = browser(user_agent=get_ua())
try:
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
except:
pass # Cover not found
if username and password:
try:
login(br, username, password, force=False)
except:
pass
try:
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
timeout=timeout).read().decode('utf-8', 'replace')
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = LibraryThingError(_('LibraryThing.com timed out. Try again later.'))
raise err
else:
s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'})
if url is None:
if s.find('div', attrs={'class':'highloadwarning'}) is not None:
raise ServerBusy(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
raise ISBNNotFound('ISBN: '+isbn+_(' not found.'))
url = url.find('img')
if url is None:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_S[XY]\d+', '', url['src'])
cover_data = br.open_novisit(url).read()
return cover_data, url.rpartition('.')[-1]
def option_parser(): def option_parser():
parser = OptionParser(usage=\ parser = OptionParser(usage=\
@ -113,15 +88,16 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
if isbn: if isbn:
br = browser(user_agent=get_ua()) br = get_browser()
if username and password:
try: try:
login(br, username, password, force=False) login(br, username, password)
except:
pass
raw = br.open_novisit('http://www.librarything.com/isbn/' raw = br.open_novisit('http://www.librarything.com/isbn/'
+isbn).read() +isbn).read()
except:
return mi
if '/wiki/index.php/HelpThing:Verify' in raw:
raise Exception('LibraryThing is blocking your computer.')
if not raw: if not raw:
return mi return mi
raw = raw.decode('utf-8', 'replace') raw = raw.decode('utf-8', 'replace')
@ -172,15 +148,46 @@ def main(args=sys.argv):
parser.print_help() parser.print_help()
return 1 return 1
isbn = args[1] isbn = args[1]
mi = get_social_metadata('', [], '', isbn) from calibre.customize.ui import metadata_sources, cover_sources
lt = None
for x in metadata_sources('social'):
if x.name == 'LibraryThing':
lt = x
break
lt('', '', '', isbn, True)
lt.join()
if lt.exception:
print lt.tb
return 1
mi = lt.results
prints(mi) prints(mi)
cover_data, ext = cover_from_isbn(isbn, username=opts.username, mi.isbn = isbn
password=opts.password)
if not ext: lt = None
ext = 'jpg' for x in cover_sources():
oname = os.path.abspath(isbn+'.'+ext) if x.name == 'librarything.com covers':
open(oname, 'w').write(cover_data) lt = x
print 'Cover saved to file', oname break
from threading import Event
from Queue import Queue
ev = Event()
lt.has_cover(mi, ev)
hc = ev.is_set()
print 'Has cover:', hc
if hc:
abort = Event()
temp = Queue()
lt.get_covers(mi, temp, abort)
cover = temp.get_nowait()
if cover[0]:
open(isbn + '.jpg', 'wb').write(cover[1])
print 'Cover saved to:', isbn+'.jpg'
else:
print 'Cover download failed'
print cover[2]
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':