Refactoring of isbndb plugin + add get language

2025-07-09 03:04:10 -04:00 · 2010-12-15 09:10:37 +01:00 · 2010-12-15 09:10:37 +01:00 · a64a22a934
commit a64a22a934
parent 08eb0e1a59
3 changed files with 270 additions and 138 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
-from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
+from calibre.ebooks.metadata.fetch import LibraryThing
 from calibre.ebooks.metadata.isbndb import ISBNDB
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.google_books import GoogleBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
    # }}}
-class ISBNDB(MetadataSource): # {{{
+# class ISBNDB(MetadataSource): # {{{
-    name = 'IsbnDB'
+    # name = 'IsbnDB'
-    description = _('Downloads metadata from isbndb.com')
+    # description = _('Downloads metadata from isbndb.com')
-    def fetch(self):
+    # def fetch(self):
-        if not self.site_customization:
+        # if not self.site_customization:
-            return
+            # return
-        from calibre.ebooks.metadata.isbndb import option_parser, create_books
+        # from calibre.ebooks.metadata.isbndb import option_parser, create_books
-        args = ['isbndb']
+        # args = ['isbndb']
-        if self.isbn:
+        # if self.isbn:
-            args.extend(['--isbn', self.isbn])
+            # args.extend(['--isbn', self.isbn])
-        else:
+        # else:
-            if self.title:
+            # if self.title:
-                args.extend(['--title', self.title])
+                # args.extend(['--title', self.title])
-            if self.book_author:
+            # if self.book_author:
-                args.extend(['--author', self.book_author])
+                # args.extend(['--author', self.book_author])
-            if self.publisher:
+            # if self.publisher:
-                args.extend(['--publisher', self.publisher])
+                # args.extend(['--publisher', self.publisher])
-        if self.verbose:
+        # if self.verbose:
-            args.extend(['--verbose'])
+            # args.extend(['--verbose'])
-        args.append(self.site_customization) # IsbnDb key
+        # args.append(self.site_customization) # IsbnDb key
-        try:
+        # try:
-            opts, args = option_parser().parse_args(args)
+            # opts, args = option_parser().parse_args(args)
-            self.results = create_books(opts, args)
+            # self.results = create_books(opts, args)
-        except Exception, e:
+        # except Exception, e:
-            self.exception = e
+            # self.exception = e
-            self.tb = traceback.format_exc()
+            # self.tb = traceback.format_exc()
-    @property
+    # @property
-    def string_customization_help(self):
+    # def string_customization_help(self):
-        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
+        # ans = _('To use isbndb.com you must sign up for a %sfree account%s '
-                'and enter your access key below.')
+                # 'and enter your access key below.')
-        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
+        # return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
    # }}}
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
 '''
 import sys, re
-from urllib import quote
+from urllib import urlencode
 from lxml import etree
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre import browser
-BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
+
 class ISBNDB(MetadataSource):
    name = 'IsbnDB'
    description = _('Downloads metadata from isbndb.com')
    version = (1, 0, 1)
    def fetch(self):
        if not self.site_customization:
            return
        try:
            self.results = search(self.title, self.book_author, self.publisher, self.isbn,
                                   max_results=10, verbose=self.verbose, key=self.site_customization)
        except Exception, e:
            import traceback
            self.exception = e
            self.tb = traceback.format_exc()
    @property
    def string_customization_help(self):
        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
                'and enter your access key below.')
        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
 class ISBNDBError(Exception):
    pass
-def fetch_metadata(url, max=100, timeout=5.):
+def report(verbose):
-    books = []
+    if verbose:
-    page_number = 1
+        import traceback
-    total_results = sys.maxint
+        traceback.print_exc()
-    br = browser()
+
-    while len(books) < total_results and max > 0:
+
 class Query(object):
    BASE_URL = 'http://isbndb.com/api/books.xml?'
    def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
                    keywords=None, max_results=40):
        assert not(title is None and author is None and publisher is None and \
                   isbn is None and keywords is None)
        assert (max_results < 41)
        if title == _('Unknown'):
            title=None
        if author == _('Unknown'):
            author=None
        self.maxresults = int(max_results)
        if isbn is not None:
            q = isbn
            i = 'isbn'
        elif keywords is not None:
            q = ' '.join([e for e in (title, author, publisher, keywords) \
                if e is not None ])
            q = q.strip()
            i = 'full'
        else:
            q = ' '.join([e for e in (title, author, publisher) \
                if e is not None ])
            q = q.strip()
            if len(q) == 0:
                raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
            i = 'combined'
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.url = self.BASE_URL+urlencode({
            'value1':q,
            'results':'subjects,authors,texts,details',
            'access_key':key,
            'index1':i,
            })+'&page_number='
    def brcall(self, browser, url, verbose, timeout):
        if verbose:
            print _('Query: %s') % url
        try:
-            raw = br.open(url, timeout=timeout).read()
+            raw = browser.open_novisit(url, timeout=timeout).read()
-        except Exception, err:
+        except Exception, e:
-            raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
+            import socket
-        soup = BeautifulStoneSoup(raw,
+            report(verbose)
-                convertEntities=BeautifulStoneSoup.XML_ENTITIES)
+            if callable(getattr(e, 'getcode', None)) and \
-        book_list = soup.find('booklist')
+                    e.getcode() == 404:
-        if book_list is None:
+                return None
-            errmsg = soup.find('errormessage').string
+            attr = getattr(e, 'args', [None])
-            raise ISBNDBError('Error fetching metadata: '+errmsg)
+            attr = attr if attr else [None]
-        total_results = int(book_list['total_results'])
+            if isinstance(attr[0], socket.timeout):
-        page_number += 1
+                raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
-        np = '&page_number=%s&'%page_number
+            raise ISBNDBError(_('ISBNDB encountered an error.'))
-        url = re.sub(r'\&page_number=\d+\&', np, url)
+        if '<title>404 - ' in raw:
-        books.extend(book_list.findAll('bookdata'))
+            return None
-        max -= 1
+        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-    return books
+                resolve_entities=True)[0]
-
+        try:
-
+            return etree.fromstring(raw)
-class ISBNDBMetadata(Metadata):
+        except:
-
+            try:
-    def __init__(self, book):
+                #remove ASCII invalid chars (normally not needed)
-        Metadata.__init__(self, None)
+                return etree.fromstring(clean_ascii_chars(raw))
-
+            except:
        def tostring(e):
            if not hasattr(e, 'string'):
                return None
            ans = e.string
            if ans is not None:
                ans = unicode(ans).strip()
            if not ans:
                ans = None
            return ans
-        self.isbn = unicode(book.get('isbn13', book.get('isbn')))
+    def __call__(self, browser, verbose, timeout = 5.):
-        title = tostring(book.find('titlelong'))
+        url = self.url+str(1)
        feed = self.brcall(browser, url, verbose, timeout)
        if feed is None:
            return None
        # print etree.tostring(feed, pretty_print=True)
        total = int(feed.find('BookList').get('total_results'))
        nbresultstoget = total if total < self.maxresults else self.maxresults
        entries = feed.xpath("./BookList/BookData")
        i=2
        while len(entries) < nbresultstoget:
            url = self.url+str(i)
            feed = self.brcall(browser, url, verbose, timeout)
            i+=1
            if feed is None:
                break
            entries.extend(feed.xpath("./BookList/BookData"))
        return entries[:nbresultstoget]
 class ResultList(list):
    def get_description(self, entry, verbose):
        try:
            desc = entry.find('Summary')
            if desc:
                return _(u'SUMMARY:\n%s') % self.output_entry(desc)
        except:
            report(verbose)
    def get_language(self, entry, verbose):
        try:
            return entry.find('Details').get('language')
        except:
            report(verbose)
    def get_title(self, entry):
        title = entry.find('TitleLong')
        if not title:
-            title = tostring(book.find('title'))
+            title = entry.find('Title')
-        self.title = title
+        return self.output_entry(title)
-        self.title = unicode(self.title).strip()
+
    def get_authors(self, entry):
        authors = []
-        au = tostring(book.find('authorstext'))
+        au = entry.find('AuthorsText')
-        if au:
+        if au is not None:
-            au = au.strip()
+            au = self.output_entry(au)
-            temp = au.split(',')
+            temp = au.split(u',')
            for au in temp:
                if not au: continue
-                authors.extend([a.strip() for a in au.split('&amp;')])
+                authors.extend([a.strip() for a in au.split(u'&')])
-        if authors:
+        return authors
-            self.authors = authors
+
    def get_author_sort(self, entry, verbose):
        try:
-            self.author_sort = tostring(book.find('authors').find('person'))
+            return self.output_entry(entry.find('Authors').find('Person'))
            if self.authors and self.author_sort == self.authors[0]:
                self.author_sort = None
        except:
-            pass
+            report(verbose)
-        self.publisher = tostring(book.find('publishertext'))
+            return None
-        summ = tostring(book.find('summary'))
+    def get_isbn(self, entry, verbose):
-        if summ:
+        try:
-            self.comments = 'SUMMARY:\n'+summ
+            return unicode(entry.get('isbn13', entry.get('isbn')))
        except:
            report(verbose)
    def get_publisher(self, entry, verbose):
        try:
            return self.output_entry(entry.find('PublisherText'))
        except:
            report(verbose)
            return None
    def output_entry(self, entry):
        out = etree.tostring(entry, encoding=unicode, method="text")
        return out.strip()
    def populate(self, entries, verbose):
        for x in entries:
            try:
                title = self.get_title(x)
                authors = self.get_authors(x)
            except Exception, e:
                if verbose:
                    print _('Failed to get all details for an entry')
                    print e
                continue
            mi = MetaInformation(title, authors)
            tmpautsort = self.get_author_sort(x, verbose)
            mi.author_sort = tmpautsort if tmpautsort is not None \
                                else authors_to_sort_string(authors)
            mi.comments = self.get_description(x, verbose)
            mi.isbn = self.get_isbn(x, verbose)
            mi.publisher = self.get_publisher(x, verbose)
            mi.language = self.get_language(x, verbose)
            self.append(mi)
-def build_isbn(base_url, opts):
+def search(title=None, author=None, publisher=None, isbn=None,
-    return base_url + 'index1=isbn&value1='+opts.isbn
+           max_results=10, verbose=False, keywords=None, key=None):
    br = browser()
    entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
-def build_combined(base_url, opts):
+    if entries is None or len(entries) == 0:
-    query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
+        return None
        if e is not None ])
    query = query.strip()
    if len(query) == 0:
        raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
    query = re.sub(r'\s+', '+', query)
    if isinstance(query, unicode):
        query = query.encode('utf-8')
    return base_url+'index1=combined&value1='+quote(query, '+')
    #List of entry
    ans = ResultList()
    ans.populate(entries, verbose)
    return list(dict((book.isbn, book) for book in ans).values())
 def option_parser():
-    parser = OptionParser(usage=\
+    import textwrap
-_('''
+    parser = OptionParser(textwrap.dedent(\
-%prog [options] key
+    _('''\
        %prog [options] key
-Fetch metadata for books from isndb.com. You can specify either the
+        Fetch metadata for books from isndb.com. You can specify either the
-books ISBN ID or its title and author. If you specify the title and author,
+        books ISBN ID or its title and author. If you specify the title and author,
-then more than one book may be returned.
+        then more than one book may be returned.
-key is the account key you generate after signing up for a free account from isbndb.com.
+        key is the account key you generate after signing up for a free account from isbndb.com.
-'''))
+    ''')))
    parser.add_option('-i', '--isbn', default=None, dest='isbn',
                      help=_('The ISBN ID of the book you want metadata for.'))
    parser.add_option('-a', '--author', dest='author',
@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
                      default=None, help=_('The title of the book to search for.'))
    parser.add_option('-p', '--publisher', default=None, dest='publisher',
                      help=_('The publisher of the book to search for.'))
-    parser.add_option('-v', '--verbose', default=False,
+    parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
-                      action='store_true', help=_('Verbose processing'))
+    parser.add_option('-m', '--max-results', default=10,
-
+                      help=_('Maximum number of results to fetch'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def create_books(opts, args, timeout=5.):
    base_url = BASE_URL%dict(key=args[1])
    if opts.isbn is not None:
        url = build_isbn(base_url, opts)
    else:
        url = build_combined(base_url, opts)
    if opts.verbose:
        print ('ISBNDB query: '+url)
    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
    #remove duplicates ISBN
    return list(dict((book.isbn, book) for book in tans).values())
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) != 2:
        parser.print_help()
-        print ('You must supply the isbndb.com key')
+        print
        print _('You must supply the isbndb.com key')
        return 1
-
+    try:
-    for book in create_books(opts, args):
+        results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
-        print unicode(book).encode('utf-8')
+            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
-
+    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
    return 0
 if __name__ == '__main__':
    sys.exit(main())
 # calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html