From dc9500a27ea48ff36d9ba632e2cd6fc3a73ba00f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jan 2011 17:20:49 -0700 Subject: [PATCH] Only use LibraryThing to download metadata if the user provides a library thing username and password --- src/calibre/ebooks/metadata/covers.py | 22 +++- src/calibre/ebooks/metadata/fetch.py | 13 +- src/calibre/ebooks/metadata/library_thing.py | 119 ++++++++++--------- 3 files changed, 91 insertions(+), 63 deletions(-) diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py index 2f6fb46540..a0e8b4ea75 100644 --- a/src/calibre/ebooks/metadata/covers.py +++ b/src/calibre/ebooks/metadata/covers.py @@ -121,6 +121,7 @@ class LibraryThingCovers(CoverDownload): # {{{ LIBRARYTHING = 'http://www.librarything.com/isbn/' def get_cover_url(self, isbn, br, timeout=5.): + try: src = br.open_novisit('http://www.librarything.com/isbn/'+isbn, timeout=timeout).read().decode('utf-8', 'replace') @@ -129,6 +130,8 @@ class LibraryThingCovers(CoverDownload): # {{{ err = Exception(_('LibraryThing.com timed out. Try again later.')) raise err else: + if '/wiki/index.php/HelpThing:Verify' in src: + raise Exception('LibraryThing is blocking your computer.') s = BeautifulSoup(src) url = s.find('td', attrs={'class':'left'}) if url is None: @@ -142,9 +145,12 @@ class LibraryThingCovers(CoverDownload): # {{{ return url def has_cover(self, mi, ans, timeout=5.): - if not mi.isbn: + if not mi.isbn or not self.site_customization: return False - br = browser() + from calibre.ebooks.metadata.library_thing import get_browser, login + br = get_browser() + un, _, pw = self.site_customization.partition(':') + login(br, un, pw) try: self.get_cover_url(mi.isbn, br, timeout=timeout) self.debug('cover for', mi.isbn, 'found') @@ -153,9 +159,12 @@ class LibraryThingCovers(CoverDownload): # {{{ self.debug(e) def get_covers(self, mi, result_queue, abort, timeout=5.): - if not mi.isbn: + if not mi.isbn or not self.site_customization: return - br = browser() + from calibre.ebooks.metadata.library_thing import get_browser, login + br = get_browser() + un, _, pw = self.site_customization.partition(':') + login(br, un, pw) try: url = self.get_cover_url(mi.isbn, br, timeout=timeout) cover_data = br.open_novisit(url).read() @@ -164,6 +173,11 @@ class LibraryThingCovers(CoverDownload): # {{{ result_queue.put((False, self.exception_to_string(e), traceback.format_exc(), self.name)) + def customization_help(self, gui=False): + ans = _('To use librarything.com you must sign up for a %sfree account%s ' + 'and enter your username and password separated by a : below.') + return '

'+ans%('', '') + # }}} def check_for_cover(mi, timeout=5.): # {{{ diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index 8018f42b13..bd8d96a399 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -251,19 +251,26 @@ class LibraryThing(MetadataSource): # {{{ name = 'LibraryThing' metadata_type = 'social' - description = _('Downloads series/tags/rating information from librarything.com') + description = _('Downloads series/covers/rating information from librarything.com') def fetch(self): - if not self.isbn: + if not self.isbn or not self.site_customization: return from calibre.ebooks.metadata.library_thing import get_social_metadata + un, _, pw = self.site_customization.partition(':') try: self.results = get_social_metadata(self.title, self.book_author, - self.publisher, self.isbn) + self.publisher, self.isbn, username=un, password=pw) except Exception, e: self.exception = e self.tb = traceback.format_exc() + @property + def string_customization_help(self): + ans = _('To use librarything.com you must sign up for a %sfree account%s ' + 'and enter your username and password separated by a : below.') + return '

'+ans%('', '') + # }}} diff --git a/src/calibre/ebooks/metadata/library_thing.py b/src/calibre/ebooks/metadata/library_thing.py index d956747a2b..54ec259cb0 100644 --- a/src/calibre/ebooks/metadata/library_thing.py +++ b/src/calibre/ebooks/metadata/library_thing.py @@ -4,14 +4,13 @@ __copyright__ = '2008, Kovid Goyal ' Fetch cover from LibraryThing.com based on ISBN number. ''' -import sys, socket, os, re, random +import sys, re, random from lxml import html import mechanize from calibre import browser, prints from calibre.utils.config import OptionParser -from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.chardet import strip_encoding_declarations OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' @@ -28,6 +27,12 @@ def get_ua(): ] return choices[random.randint(0, len(choices)-1)] +_lt_br = None +def get_browser(): + global _lt_br + if _lt_br is None: + _lt_br = browser(user_agent=get_ua()) + return _lt_br.clone_browser() class HeadRequest(mechanize.Request): @@ -35,7 +40,7 @@ class HeadRequest(mechanize.Request): return 'HEAD' def check_for_cover(isbn, timeout=5.): - br = browser(user_agent=get_ua()) + br = get_browser() br.set_handle_redirect(False) try: br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout) @@ -54,46 +59,16 @@ class ISBNNotFound(LibraryThingError): class ServerBusy(LibraryThingError): pass -def login(br, username, password, force=True): - br.open('http://www.librarything.com') +def login(br, username, password): + raw = br.open('http://www.librarything.com').read() + if '>Sign out' in raw: + return br.select_form('signup') br['formusername'] = username br['formpassword'] = password - br.submit() - - -def cover_from_isbn(isbn, timeout=5., username=None, password=None): - src = None - br = browser(user_agent=get_ua()) - try: - return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg' - except: - pass # Cover not found - if username and password: - try: - login(br, username, password, force=False) - except: - pass - try: - src = br.open_novisit('http://www.librarything.com/isbn/'+isbn, - timeout=timeout).read().decode('utf-8', 'replace') - except Exception, err: - if isinstance(getattr(err, 'args', [None])[0], socket.timeout): - err = LibraryThingError(_('LibraryThing.com timed out. Try again later.')) - raise err - else: - s = BeautifulSoup(src) - url = s.find('td', attrs={'class':'left'}) - if url is None: - if s.find('div', attrs={'class':'highloadwarning'}) is not None: - raise ServerBusy(_('Could not fetch cover as server is experiencing high load. Please try again later.')) - raise ISBNNotFound('ISBN: '+isbn+_(' not found.')) - url = url.find('img') - if url is None: - raise LibraryThingError(_('LibraryThing.com server error. Try again later.')) - url = re.sub(r'_S[XY]\d+', '', url['src']) - cover_data = br.open_novisit(url).read() - return cover_data, url.rpartition('.')[-1] + raw = br.submit().read() + if '>Sign out' not in raw: + raise ValueError('Failed to login as %r:%r'%(username, password)) def option_parser(): parser = OptionParser(usage=\ @@ -113,15 +88,16 @@ def get_social_metadata(title, authors, publisher, isbn, username=None, from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: - br = browser(user_agent=get_ua()) - if username and password: - try: - login(br, username, password, force=False) - except: - pass + br = get_browser() + try: + login(br, username, password) - raw = br.open_novisit('http://www.librarything.com/isbn/' - +isbn).read() + raw = br.open_novisit('http://www.librarything.com/isbn/' + +isbn).read() + except: + return mi + if '/wiki/index.php/HelpThing:Verify' in raw: + raise Exception('LibraryThing is blocking your computer.') if not raw: return mi raw = raw.decode('utf-8', 'replace') @@ -172,15 +148,46 @@ def main(args=sys.argv): parser.print_help() return 1 isbn = args[1] - mi = get_social_metadata('', [], '', isbn) + from calibre.customize.ui import metadata_sources, cover_sources + lt = None + for x in metadata_sources('social'): + if x.name == 'LibraryThing': + lt = x + break + lt('', '', '', isbn, True) + lt.join() + if lt.exception: + print lt.tb + return 1 + mi = lt.results prints(mi) - cover_data, ext = cover_from_isbn(isbn, username=opts.username, - password=opts.password) - if not ext: - ext = 'jpg' - oname = os.path.abspath(isbn+'.'+ext) - open(oname, 'w').write(cover_data) - print 'Cover saved to file', oname + mi.isbn = isbn + + lt = None + for x in cover_sources(): + if x.name == 'librarything.com covers': + lt = x + break + + from threading import Event + from Queue import Queue + ev = Event() + lt.has_cover(mi, ev) + hc = ev.is_set() + print 'Has cover:', hc + if hc: + abort = Event() + temp = Queue() + lt.get_covers(mi, temp, abort) + + cover = temp.get_nowait() + if cover[0]: + open(isbn + '.jpg', 'wb').write(cover[1]) + print 'Cover saved to:', isbn+'.jpg' + else: + print 'Cover download failed' + print cover[2] + return 0 if __name__ == '__main__':