Fix LibraryThing metadata download plugin

This commit is contained in:
Kovid Goyal 2010-06-24 16:54:53 -06:00
parent 38893f7a0a
commit 908d0fd6ce
2 changed files with 78 additions and 37 deletions

View File

@ -210,31 +210,19 @@ class LibraryThing(MetadataSource): # {{{
name = 'LibraryThing' name = 'LibraryThing'
metadata_type = 'social' metadata_type = 'social'
description = _('Downloads series information from librarything.com') description = _('Downloads series/tags/rating information from librarything.com')
def fetch(self): def fetch(self):
if not self.isbn: if not self.isbn:
return return
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.library_thing import get_social_metadata
import json
br = browser()
try: try:
raw = br.open( self.results = get_social_metadata(self.title, self.book_author,
'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn self.publisher, self.isbn)
).read()
data = json.loads(raw)
if not data:
return
if 'error' in data:
raise Exception(data['error'])
if 'series' in data and 'series_index' in data:
mi = MetaInformation(self.title, [])
mi.series = data['series']
mi.series_index = data['series_index']
self.results = mi
except Exception, e: except Exception, e:
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
# }}} # }}}

View File

@ -6,10 +6,11 @@ Fetch cover from LibraryThing.com based on ISBN number.
import sys, socket, os, re import sys, socket, os, re
from calibre import browser as _browser from lxml import html
from calibre import browser, prints
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
browser = None
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -22,31 +23,28 @@ class ISBNNotFound(LibraryThingError):
class ServerBusy(LibraryThingError): class ServerBusy(LibraryThingError):
pass pass
def login(username, password, force=True): def login(br, username, password, force=True):
global browser br.open('http://www.librarything.com')
if browser is not None and not force: br.select_form('signup')
return br['formusername'] = username
browser = _browser() br['formpassword'] = password
browser.open('http://www.librarything.com') br.submit()
browser.select_form('signup')
browser['formusername'] = username
browser['formpassword'] = password
browser.submit()
def cover_from_isbn(isbn, timeout=5., username=None, password=None): def cover_from_isbn(isbn, timeout=5., username=None, password=None):
global browser
if browser is None:
browser = _browser()
src = None src = None
br = browser()
try: try:
return browser.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg' return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
except: except:
pass # Cover not found pass # Cover not found
if username and password: if username and password:
login(username, password, force=False) try:
login(br, username, password, force=False)
except:
pass
try: try:
src = browser.open('http://www.librarything.com/isbn/'+isbn, src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
timeout=timeout).read().decode('utf-8', 'replace') timeout=timeout).read().decode('utf-8', 'replace')
except Exception, err: except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout): if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
@ -63,7 +61,7 @@ def cover_from_isbn(isbn, timeout=5., username=None, password=None):
if url is None: if url is None:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.')) raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_S[XY]\d+', '', url['src']) url = re.sub(r'_S[XY]\d+', '', url['src'])
cover_data = browser.open(url).read() cover_data = br.open_novisit(url).read()
return cover_data, url.rpartition('.')[-1] return cover_data, url.rpartition('.')[-1]
def option_parser(): def option_parser():
@ -71,7 +69,7 @@ def option_parser():
_(''' _('''
%prog [options] ISBN %prog [options] ISBN
Fetch a cover image for the book identified by ISBN from LibraryThing.com Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com
''')) '''))
parser.add_option('-u', '--username', default=None, parser.add_option('-u', '--username', default=None,
help='Username for LibraryThing.com') help='Username for LibraryThing.com')
@ -79,6 +77,59 @@ Fetch a cover image for the book identified by ISBN from LibraryThing.com
help='Password for LibraryThing.com') help='Password for LibraryThing.com')
return parser return parser
def get_social_metadata(title, authors, publisher, isbn, username=None,
password=None):
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(title, authors)
if isbn:
br = browser()
if username and password:
try:
login(br, username, password, force=False)
except:
pass
raw = br.open_novisit('http://www.librarything.com/isbn/'
+isbn).read()
root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title:
mi.title = html.tostring(h1[0], method='text', encoding=unicode)
h2 = root.xpath('//div[@class="headsummary"]/h2/a')
if h2 and not mi.authors:
mi.authors = [html.tostring(x, method='text', encoding=unicode) for
x in h2]
h3 = root.xpath('//div[@class="headsummary"]/h3/a')
if h3:
match = None
for h in h3:
series = html.tostring(h, method='text', encoding=unicode)
match = re.search(r'(.+) \((.+)\)', series)
if match is not None:
break
if match is not None:
mi.series = match.group(1).strip()
match = re.search(r'[0-9.]+', match.group(2))
si = 1.0
if match is not None:
si = float(match.group())
mi.series_index = si
tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
if tags:
mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
in tags]
span = root.xpath(
'//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
if span:
raw = html.tostring(span[0], method='text', encoding=unicode)
match = re.search(r'([0-9.]+)', raw)
if match is not None:
rating = float(match.group())
if rating > 0 and rating <= 5:
mi.rating = rating
return mi
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)
@ -86,6 +137,8 @@ def main(args=sys.argv):
parser.print_help() parser.print_help()
return 1 return 1
isbn = args[1] isbn = args[1]
mi = get_social_metadata('', [], '', isbn)
prints(mi)
cover_data, ext = cover_from_isbn(isbn, username=opts.username, cover_data, ext = cover_from_isbn(isbn, username=opts.username,
password=opts.password) password=opts.password)
if not ext: if not ext: