mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix LibraryThing metadata download plugin
This commit is contained in:
parent
38893f7a0a
commit
908d0fd6ce
@ -210,31 +210,19 @@ class LibraryThing(MetadataSource): # {{{
|
||||
|
||||
name = 'LibraryThing'
|
||||
metadata_type = 'social'
|
||||
description = _('Downloads series information from librarything.com')
|
||||
description = _('Downloads series/tags/rating information from librarything.com')
|
||||
|
||||
def fetch(self):
|
||||
if not self.isbn:
|
||||
return
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
import json
|
||||
br = browser()
|
||||
from calibre.ebooks.metadata.library_thing import get_social_metadata
|
||||
try:
|
||||
raw = br.open(
|
||||
'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn
|
||||
).read()
|
||||
data = json.loads(raw)
|
||||
if not data:
|
||||
return
|
||||
if 'error' in data:
|
||||
raise Exception(data['error'])
|
||||
if 'series' in data and 'series_index' in data:
|
||||
mi = MetaInformation(self.title, [])
|
||||
mi.series = data['series']
|
||||
mi.series_index = data['series_index']
|
||||
self.results = mi
|
||||
self.results = get_social_metadata(self.title, self.book_author,
|
||||
self.publisher, self.isbn)
|
||||
except Exception, e:
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -6,10 +6,11 @@ Fetch cover from LibraryThing.com based on ISBN number.
|
||||
|
||||
import sys, socket, os, re
|
||||
|
||||
from calibre import browser as _browser
|
||||
from lxml import html
|
||||
|
||||
from calibre import browser, prints
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
browser = None
|
||||
|
||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||
|
||||
@ -22,31 +23,28 @@ class ISBNNotFound(LibraryThingError):
|
||||
class ServerBusy(LibraryThingError):
|
||||
pass
|
||||
|
||||
def login(username, password, force=True):
|
||||
global browser
|
||||
if browser is not None and not force:
|
||||
return
|
||||
browser = _browser()
|
||||
browser.open('http://www.librarything.com')
|
||||
browser.select_form('signup')
|
||||
browser['formusername'] = username
|
||||
browser['formpassword'] = password
|
||||
browser.submit()
|
||||
def login(br, username, password, force=True):
|
||||
br.open('http://www.librarything.com')
|
||||
br.select_form('signup')
|
||||
br['formusername'] = username
|
||||
br['formpassword'] = password
|
||||
br.submit()
|
||||
|
||||
|
||||
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
|
||||
global browser
|
||||
if browser is None:
|
||||
browser = _browser()
|
||||
src = None
|
||||
br = browser()
|
||||
try:
|
||||
return browser.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
|
||||
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
|
||||
except:
|
||||
pass # Cover not found
|
||||
if username and password:
|
||||
login(username, password, force=False)
|
||||
try:
|
||||
login(br, username, password, force=False)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
src = browser.open('http://www.librarything.com/isbn/'+isbn,
|
||||
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
|
||||
timeout=timeout).read().decode('utf-8', 'replace')
|
||||
except Exception, err:
|
||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||
@ -63,7 +61,7 @@ def cover_from_isbn(isbn, timeout=5., username=None, password=None):
|
||||
if url is None:
|
||||
raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
|
||||
url = re.sub(r'_S[XY]\d+', '', url['src'])
|
||||
cover_data = browser.open(url).read()
|
||||
cover_data = br.open_novisit(url).read()
|
||||
return cover_data, url.rpartition('.')[-1]
|
||||
|
||||
def option_parser():
|
||||
@ -71,7 +69,7 @@ def option_parser():
|
||||
_('''
|
||||
%prog [options] ISBN
|
||||
|
||||
Fetch a cover image for the book identified by ISBN from LibraryThing.com
|
||||
Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com
|
||||
'''))
|
||||
parser.add_option('-u', '--username', default=None,
|
||||
help='Username for LibraryThing.com')
|
||||
@ -79,6 +77,59 @@ Fetch a cover image for the book identified by ISBN from LibraryThing.com
|
||||
help='Password for LibraryThing.com')
|
||||
return parser
|
||||
|
||||
def get_social_metadata(title, authors, publisher, isbn, username=None,
|
||||
password=None):
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
mi = MetaInformation(title, authors)
|
||||
if isbn:
|
||||
br = browser()
|
||||
if username and password:
|
||||
try:
|
||||
login(br, username, password, force=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
raw = br.open_novisit('http://www.librarything.com/isbn/'
|
||||
+isbn).read()
|
||||
root = html.fromstring(raw)
|
||||
h1 = root.xpath('//div[@class="headsummary"]/h1')
|
||||
if h1 and not mi.title:
|
||||
mi.title = html.tostring(h1[0], method='text', encoding=unicode)
|
||||
h2 = root.xpath('//div[@class="headsummary"]/h2/a')
|
||||
if h2 and not mi.authors:
|
||||
mi.authors = [html.tostring(x, method='text', encoding=unicode) for
|
||||
x in h2]
|
||||
h3 = root.xpath('//div[@class="headsummary"]/h3/a')
|
||||
if h3:
|
||||
match = None
|
||||
for h in h3:
|
||||
series = html.tostring(h, method='text', encoding=unicode)
|
||||
match = re.search(r'(.+) \((.+)\)', series)
|
||||
if match is not None:
|
||||
break
|
||||
if match is not None:
|
||||
mi.series = match.group(1).strip()
|
||||
match = re.search(r'[0-9.]+', match.group(2))
|
||||
si = 1.0
|
||||
if match is not None:
|
||||
si = float(match.group())
|
||||
mi.series_index = si
|
||||
tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
|
||||
if tags:
|
||||
mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
|
||||
in tags]
|
||||
span = root.xpath(
|
||||
'//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
|
||||
if span:
|
||||
raw = html.tostring(span[0], method='text', encoding=unicode)
|
||||
match = re.search(r'([0-9.]+)', raw)
|
||||
if match is not None:
|
||||
rating = float(match.group())
|
||||
if rating > 0 and rating <= 5:
|
||||
mi.rating = rating
|
||||
return mi
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
@ -86,6 +137,8 @@ def main(args=sys.argv):
|
||||
parser.print_help()
|
||||
return 1
|
||||
isbn = args[1]
|
||||
mi = get_social_metadata('', [], '', isbn)
|
||||
prints(mi)
|
||||
cover_data, ext = cover_from_isbn(isbn, username=opts.username,
|
||||
password=opts.password)
|
||||
if not ext:
|
||||
|
Loading…
x
Reference in New Issue
Block a user