Fix LibraryThing metadata download plugin

2025-07-09 03:04:10 -04:00 · 2010-06-24 16:54:53 -06:00 · 2010-06-24 16:54:53 -06:00 · 908d0fd6ce
commit 908d0fd6ce
parent 38893f7a0a
2 changed files with 78 additions and 37 deletions
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -210,31 +210,19 @@ class LibraryThing(MetadataSource): # {{{

    name = 'LibraryThing'
    metadata_type = 'social'
-    description = _('Downloads series information from librarything.com')
+    description = _('Downloads series/tags/rating information from librarything.com')

    def fetch(self):
        if not self.isbn:
            return
-        from calibre.ebooks.metadata import MetaInformation
-        import json
-        br = browser()
+        from calibre.ebooks.metadata.library_thing import get_social_metadata
        try:
-            raw = br.open(
-                    'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn
-                    ).read()
-            data = json.loads(raw)
-            if not data:
-                return
-            if 'error' in data:
-                raise Exception(data['error'])
-            if 'series' in data and 'series_index' in data:
-                mi = MetaInformation(self.title, [])
-                mi.series = data['series']
-                mi.series_index = data['series_index']
-                self.results = mi
+            self.results = get_social_metadata(self.title, self.book_author,
+                    self.publisher, self.isbn)
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
+
    # }}}


--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@ -6,10 +6,11 @@ Fetch cover from LibraryThing.com based on ISBN number.

 import sys, socket, os, re

-from calibre import browser as _browser
+from lxml import html
+
+from calibre import browser, prints
 from calibre.utils.config import OptionParser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-browser = None

 OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'

@ -22,31 +23,28 @@ class ISBNNotFound(LibraryThingError):
 class ServerBusy(LibraryThingError):
    pass

-def login(username, password, force=True):
-    global browser
-    if browser is not None and not force:
-        return
-    browser = _browser()
-    browser.open('http://www.librarything.com')
-    browser.select_form('signup')
-    browser['formusername'] = username
-    browser['formpassword'] = password
-    browser.submit()
+def login(br, username, password, force=True):
+    br.open('http://www.librarything.com')
+    br.select_form('signup')
+    br['formusername'] = username
+    br['formpassword'] = password
+    br.submit()


 def cover_from_isbn(isbn, timeout=5., username=None, password=None):
-    global browser
-    if browser is None:
-        browser = _browser()
    src = None
+    br = browser()
    try:
-        return browser.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
+        return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
    except:
        pass # Cover not found
    if username and password:
-        login(username, password, force=False)
+        try:
+            login(br, username, password, force=False)
+        except:
+            pass
    try:
-        src = browser.open('http://www.librarything.com/isbn/'+isbn,
+        src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
                timeout=timeout).read().decode('utf-8', 'replace')
    except Exception, err:
        if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
@ -63,7 +61,7 @@ def cover_from_isbn(isbn, timeout=5., username=None, password=None):
        if url is None:
            raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
        url = re.sub(r'_S[XY]\d+', '', url['src'])
-        cover_data = browser.open(url).read()
+        cover_data = br.open_novisit(url).read()
        return cover_data, url.rpartition('.')[-1]

 def option_parser():
@ -71,7 +69,7 @@ def option_parser():
 _('''
 %prog [options] ISBN

-Fetch a cover image for the book identified by ISBN from LibraryThing.com
+Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com
 '''))
    parser.add_option('-u', '--username', default=None,
                      help='Username for LibraryThing.com')
@ -79,6 +77,59 @@ Fetch a cover image for the book identified by ISBN from LibraryThing.com
                      help='Password for LibraryThing.com')
    return parser

+def get_social_metadata(title, authors, publisher, isbn, username=None,
+        password=None):
+    from calibre.ebooks.metadata import MetaInformation
+    mi = MetaInformation(title, authors)
+    if isbn:
+        br = browser()
+        if username and password:
+            try:
+                login(br, username, password, force=False)
+            except:
+                pass
+
+        raw = br.open_novisit('http://www.librarything.com/isbn/'
+                    +isbn).read()
+        root = html.fromstring(raw)
+        h1 = root.xpath('//div[@class="headsummary"]/h1')
+        if h1 and not mi.title:
+            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
+        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
+        if h2 and not mi.authors:
+            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
+                    x in h2]
+        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
+        if h3:
+            match = None
+            for h in h3:
+               series = html.tostring(h, method='text', encoding=unicode)
+               match = re.search(r'(.+) \((.+)\)', series)
+               if match is not None:
+                   break
+            if match is not None:
+                mi.series = match.group(1).strip()
+                match = re.search(r'[0-9.]+', match.group(2))
+                si = 1.0
+                if match is not None:
+                    si = float(match.group())
+                mi.series_index = si
+        tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
+        if tags:
+            mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
+                    in tags]
+        span = root.xpath(
+                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
+        if span:
+            raw = html.tostring(span[0], method='text', encoding=unicode)
+            match = re.search(r'([0-9.]+)', raw)
+            if match is not None:
+                rating = float(match.group())
+                if rating > 0 and rating <= 5:
+                    mi.rating = rating
+    return mi
+
+
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
@ -86,6 +137,8 @@ def main(args=sys.argv):
        parser.print_help()
        return 1
    isbn = args[1]
+    mi = get_social_metadata('', [], '', isbn)
+    prints(mi)
    cover_data, ext = cover_from_isbn(isbn, username=opts.username,
            password=opts.password)
    if not ext: