diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index 0fd671f86a..66e994f24f 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -210,31 +210,19 @@ class LibraryThing(MetadataSource): # {{{ name = 'LibraryThing' metadata_type = 'social' - description = _('Downloads series information from librarything.com') + description = _('Downloads series/tags/rating information from librarything.com') def fetch(self): if not self.isbn: return - from calibre.ebooks.metadata import MetaInformation - import json - br = browser() + from calibre.ebooks.metadata.library_thing import get_social_metadata try: - raw = br.open( - 'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn - ).read() - data = json.loads(raw) - if not data: - return - if 'error' in data: - raise Exception(data['error']) - if 'series' in data and 'series_index' in data: - mi = MetaInformation(self.title, []) - mi.series = data['series'] - mi.series_index = data['series_index'] - self.results = mi + self.results = get_social_metadata(self.title, self.book_author, + self.publisher, self.isbn) except Exception, e: self.exception = e self.tb = traceback.format_exc() + # }}} diff --git a/src/calibre/ebooks/metadata/library_thing.py b/src/calibre/ebooks/metadata/library_thing.py index d10d80bc61..eaff881ede 100644 --- a/src/calibre/ebooks/metadata/library_thing.py +++ b/src/calibre/ebooks/metadata/library_thing.py @@ -6,10 +6,11 @@ Fetch cover from LibraryThing.com based on ISBN number. import sys, socket, os, re -from calibre import browser as _browser +from lxml import html + +from calibre import browser, prints from calibre.utils.config import OptionParser from calibre.ebooks.BeautifulSoup import BeautifulSoup -browser = None OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' @@ -22,31 +23,28 @@ class ISBNNotFound(LibraryThingError): class ServerBusy(LibraryThingError): pass -def login(username, password, force=True): - global browser - if browser is not None and not force: - return - browser = _browser() - browser.open('http://www.librarything.com') - browser.select_form('signup') - browser['formusername'] = username - browser['formpassword'] = password - browser.submit() +def login(br, username, password, force=True): + br.open('http://www.librarything.com') + br.select_form('signup') + br['formusername'] = username + br['formpassword'] = password + br.submit() def cover_from_isbn(isbn, timeout=5., username=None, password=None): - global browser - if browser is None: - browser = _browser() src = None + br = browser() try: - return browser.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg' + return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg' except: pass # Cover not found if username and password: - login(username, password, force=False) + try: + login(br, username, password, force=False) + except: + pass try: - src = browser.open('http://www.librarything.com/isbn/'+isbn, + src = br.open_novisit('http://www.librarything.com/isbn/'+isbn, timeout=timeout).read().decode('utf-8', 'replace') except Exception, err: if isinstance(getattr(err, 'args', [None])[0], socket.timeout): @@ -63,7 +61,7 @@ def cover_from_isbn(isbn, timeout=5., username=None, password=None): if url is None: raise LibraryThingError(_('LibraryThing.com server error. Try again later.')) url = re.sub(r'_S[XY]\d+', '', url['src']) - cover_data = browser.open(url).read() + cover_data = br.open_novisit(url).read() return cover_data, url.rpartition('.')[-1] def option_parser(): @@ -71,7 +69,7 @@ def option_parser(): _(''' %prog [options] ISBN -Fetch a cover image for the book identified by ISBN from LibraryThing.com +Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com ''')) parser.add_option('-u', '--username', default=None, help='Username for LibraryThing.com') @@ -79,6 +77,59 @@ Fetch a cover image for the book identified by ISBN from LibraryThing.com help='Password for LibraryThing.com') return parser +def get_social_metadata(title, authors, publisher, isbn, username=None, + password=None): + from calibre.ebooks.metadata import MetaInformation + mi = MetaInformation(title, authors) + if isbn: + br = browser() + if username and password: + try: + login(br, username, password, force=False) + except: + pass + + raw = br.open_novisit('http://www.librarything.com/isbn/' + +isbn).read() + root = html.fromstring(raw) + h1 = root.xpath('//div[@class="headsummary"]/h1') + if h1 and not mi.title: + mi.title = html.tostring(h1[0], method='text', encoding=unicode) + h2 = root.xpath('//div[@class="headsummary"]/h2/a') + if h2 and not mi.authors: + mi.authors = [html.tostring(x, method='text', encoding=unicode) for + x in h2] + h3 = root.xpath('//div[@class="headsummary"]/h3/a') + if h3: + match = None + for h in h3: + series = html.tostring(h, method='text', encoding=unicode) + match = re.search(r'(.+) \((.+)\)', series) + if match is not None: + break + if match is not None: + mi.series = match.group(1).strip() + match = re.search(r'[0-9.]+', match.group(2)) + si = 1.0 + if match is not None: + si = float(match.group()) + mi.series_index = si + tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') + if tags: + mi.tags = [html.tostring(x, method='text', encoding=unicode) for x + in tags] + span = root.xpath( + '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') + if span: + raw = html.tostring(span[0], method='text', encoding=unicode) + match = re.search(r'([0-9.]+)', raw) + if match is not None: + rating = float(match.group()) + if rating > 0 and rating <= 5: + mi.rating = rating + return mi + + def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) @@ -86,6 +137,8 @@ def main(args=sys.argv): parser.print_help() return 1 isbn = args[1] + mi = get_social_metadata('', [], '', isbn) + prints(mi) cover_data, ext = cover_from_isbn(isbn, username=opts.username, password=opts.password) if not ext: