diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 9e34d33941..f95c29a718 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
-from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
+from calibre.ebooks.metadata.fetch import LibraryThing
+from calibre.ebooks.metadata.isbndb import ISBNDB
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 0c607b9bb7..3bf4c22afe 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
# }}}
-class ISBNDB(MetadataSource): # {{{
+# class ISBNDB(MetadataSource): # {{{
- name = 'IsbnDB'
- description = _('Downloads metadata from isbndb.com')
+ # name = 'IsbnDB'
+ # description = _('Downloads metadata from isbndb.com')
- def fetch(self):
- if not self.site_customization:
- return
- from calibre.ebooks.metadata.isbndb import option_parser, create_books
- args = ['isbndb']
- if self.isbn:
- args.extend(['--isbn', self.isbn])
- else:
- if self.title:
- args.extend(['--title', self.title])
- if self.book_author:
- args.extend(['--author', self.book_author])
- if self.publisher:
- args.extend(['--publisher', self.publisher])
- if self.verbose:
- args.extend(['--verbose'])
- args.append(self.site_customization) # IsbnDb key
- try:
- opts, args = option_parser().parse_args(args)
- self.results = create_books(opts, args)
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # if not self.site_customization:
+ # return
+ # from calibre.ebooks.metadata.isbndb import option_parser, create_books
+ # args = ['isbndb']
+ # if self.isbn:
+ # args.extend(['--isbn', self.isbn])
+ # else:
+ # if self.title:
+ # args.extend(['--title', self.title])
+ # if self.book_author:
+ # args.extend(['--author', self.book_author])
+ # if self.publisher:
+ # args.extend(['--publisher', self.publisher])
+ # if self.verbose:
+ # args.extend(['--verbose'])
+ # args.append(self.site_customization) # IsbnDb key
+ # try:
+ # opts, args = option_parser().parse_args(args)
+ # self.results = create_books(opts, args)
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
- @property
- def string_customization_help(self):
- ans = _('To use isbndb.com you must sign up for a %sfree account%s '
- 'and enter your access key below.')
- return '
'+ans%('', '')
+ # @property
+ # def string_customization_help(self):
+ # ans = _('To use isbndb.com you must sign up for a %sfree account%s '
+ # 'and enter your access key below.')
+ # return '
'+ans%('', '')
# }}}
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 9169227326..330755fe35 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
'''
import sys, re
-from urllib import quote
+from urllib import urlencode
+from lxml import etree
+
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
+from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre import browser
-BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
+
+class ISBNDB(MetadataSource):
+
+ name = 'IsbnDB'
+ description = _('Downloads metadata from isbndb.com')
+ version = (1, 0, 1)
+
+ def fetch(self):
+ if not self.site_customization:
+ return
+ try:
+ self.results = search(self.title, self.book_author, self.publisher, self.isbn,
+ max_results=10, verbose=self.verbose, key=self.site_customization)
+ except Exception, e:
+ import traceback
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ @property
+ def string_customization_help(self):
+ ans = _('To use isbndb.com you must sign up for a %sfree account%s '
+ 'and enter your access key below.')
+ return '
'+ans%('', '')
+
class ISBNDBError(Exception):
pass
-def fetch_metadata(url, max=100, timeout=5.):
- books = []
- page_number = 1
- total_results = sys.maxint
- br = browser()
- while len(books) < total_results and max > 0:
+def report(verbose):
+ if verbose:
+ import traceback
+ traceback.print_exc()
+
+
+class Query(object):
+
+ BASE_URL = 'http://isbndb.com/api/books.xml?'
+
+ def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
+ keywords=None, max_results=40):
+ assert not(title is None and author is None and publisher is None and \
+ isbn is None and keywords is None)
+ assert (max_results < 41)
+
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
+ self.maxresults = int(max_results)
+
+ if isbn is not None:
+ q = isbn
+ i = 'isbn'
+ elif keywords is not None:
+ q = ' '.join([e for e in (title, author, publisher, keywords) \
+ if e is not None ])
+ q = q.strip()
+ i = 'full'
+ else:
+ q = ' '.join([e for e in (title, author, publisher) \
+ if e is not None ])
+ q = q.strip()
+ if len(q) == 0:
+ raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
+ i = 'combined'
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.url = self.BASE_URL+urlencode({
+ 'value1':q,
+ 'results':'subjects,authors,texts,details',
+ 'access_key':key,
+ 'index1':i,
+ })+'&page_number='
+
+ def brcall(self, browser, url, verbose, timeout):
+ if verbose:
+ print _('Query: %s') % url
+
try:
- raw = br.open(url, timeout=timeout).read()
- except Exception, err:
- raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
- soup = BeautifulStoneSoup(raw,
- convertEntities=BeautifulStoneSoup.XML_ENTITIES)
- book_list = soup.find('booklist')
- if book_list is None:
- errmsg = soup.find('errormessage').string
- raise ISBNDBError('Error fetching metadata: '+errmsg)
- total_results = int(book_list['total_results'])
- page_number += 1
- np = '&page_number=%s&'%page_number
- url = re.sub(r'\&page_number=\d+\&', np, url)
- books.extend(book_list.findAll('bookdata'))
- max -= 1
- return books
-
-
-class ISBNDBMetadata(Metadata):
-
- def __init__(self, book):
- Metadata.__init__(self, None)
-
- def tostring(e):
- if not hasattr(e, 'string'):
+ raw = browser.open_novisit(url, timeout=timeout).read()
+ except Exception, e:
+ import socket
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
+ raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
+ raise ISBNDBError(_('ISBNDB encountered an error.'))
+ if '
404 - ' in raw:
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return etree.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars (normally not needed)
+ return etree.fromstring(clean_ascii_chars(raw))
+ except:
return None
- ans = e.string
- if ans is not None:
- ans = unicode(ans).strip()
- if not ans:
- ans = None
- return ans
- self.isbn = unicode(book.get('isbn13', book.get('isbn')))
- title = tostring(book.find('titlelong'))
+ def __call__(self, browser, verbose, timeout = 5.):
+ url = self.url+str(1)
+ feed = self.brcall(browser, url, verbose, timeout)
+ if feed is None:
+ return None
+
+ # print etree.tostring(feed, pretty_print=True)
+ total = int(feed.find('BookList').get('total_results'))
+ nbresultstoget = total if total < self.maxresults else self.maxresults
+ entries = feed.xpath("./BookList/BookData")
+ i=2
+ while len(entries) < nbresultstoget:
+ url = self.url+str(i)
+ feed = self.brcall(browser, url, verbose, timeout)
+ i+=1
+ if feed is None:
+ break
+ entries.extend(feed.xpath("./BookList/BookData"))
+ return entries[:nbresultstoget]
+
+class ResultList(list):
+
+ def get_description(self, entry, verbose):
+ try:
+ desc = entry.find('Summary')
+ if desc:
+ return _(u'SUMMARY:\n%s') % self.output_entry(desc)
+ except:
+ report(verbose)
+
+ def get_language(self, entry, verbose):
+ try:
+ return entry.find('Details').get('language')
+ except:
+ report(verbose)
+
+ def get_title(self, entry):
+ title = entry.find('TitleLong')
if not title:
- title = tostring(book.find('title'))
- self.title = title
- self.title = unicode(self.title).strip()
+ title = entry.find('Title')
+ return self.output_entry(title)
+
+ def get_authors(self, entry):
authors = []
- au = tostring(book.find('authorstext'))
- if au:
- au = au.strip()
- temp = au.split(',')
+ au = entry.find('AuthorsText')
+ if au is not None:
+ au = self.output_entry(au)
+ temp = au.split(u',')
for au in temp:
if not au: continue
- authors.extend([a.strip() for a in au.split('&')])
- if authors:
- self.authors = authors
+ authors.extend([a.strip() for a in au.split(u'&')])
+ return authors
+
+ def get_author_sort(self, entry, verbose):
try:
- self.author_sort = tostring(book.find('authors').find('person'))
- if self.authors and self.author_sort == self.authors[0]:
- self.author_sort = None
+ return self.output_entry(entry.find('Authors').find('Person'))
except:
- pass
- self.publisher = tostring(book.find('publishertext'))
+ report(verbose)
+ return None
- summ = tostring(book.find('summary'))
- if summ:
- self.comments = 'SUMMARY:\n'+summ
+ def get_isbn(self, entry, verbose):
+ try:
+ return unicode(entry.get('isbn13', entry.get('isbn')))
+ except:
+ report(verbose)
+
+ def get_publisher(self, entry, verbose):
+ try:
+ return self.output_entry(entry.find('PublisherText'))
+ except:
+ report(verbose)
+ return None
+
+ def output_entry(self, entry):
+ out = etree.tostring(entry, encoding=unicode, method="text")
+ return out.strip()
+
+ def populate(self, entries, verbose):
+ for x in entries:
+ try:
+ title = self.get_title(x)
+ authors = self.get_authors(x)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ continue
+ mi = MetaInformation(title, authors)
+ tmpautsort = self.get_author_sort(x, verbose)
+ mi.author_sort = tmpautsort if tmpautsort is not None \
+ else authors_to_sort_string(authors)
+ mi.comments = self.get_description(x, verbose)
+ mi.isbn = self.get_isbn(x, verbose)
+ mi.publisher = self.get_publisher(x, verbose)
+ mi.language = self.get_language(x, verbose)
+ self.append(mi)
-def build_isbn(base_url, opts):
- return base_url + 'index1=isbn&value1='+opts.isbn
+def search(title=None, author=None, publisher=None, isbn=None,
+ max_results=10, verbose=False, keywords=None, key=None):
+ br = browser()
+ entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
+ keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
-def build_combined(base_url, opts):
- query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
- if e is not None ])
- query = query.strip()
- if len(query) == 0:
- raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
-
- query = re.sub(r'\s+', '+', query)
- if isinstance(query, unicode):
- query = query.encode('utf-8')
- return base_url+'index1=combined&value1='+quote(query, '+')
+ if entries is None or len(entries) == 0:
+ return None
+ #List of entry
+ ans = ResultList()
+ ans.populate(entries, verbose)
+ return list(dict((book.isbn, book) for book in ans).values())
def option_parser():
- parser = OptionParser(usage=\
-_('''
-%prog [options] key
+ import textwrap
+ parser = OptionParser(textwrap.dedent(\
+ _('''\
+ %prog [options] key
-Fetch metadata for books from isndb.com. You can specify either the
-books ISBN ID or its title and author. If you specify the title and author,
-then more than one book may be returned.
+ Fetch metadata for books from isndb.com. You can specify either the
+ books ISBN ID or its title and author. If you specify the title and author,
+ then more than one book may be returned.
-key is the account key you generate after signing up for a free account from isbndb.com.
+ key is the account key you generate after signing up for a free account from isbndb.com.
-'''))
+ ''')))
parser.add_option('-i', '--isbn', default=None, dest='isbn',
help=_('The ISBN ID of the book you want metadata for.'))
parser.add_option('-a', '--author', dest='author',
@@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.'))
- parser.add_option('-v', '--verbose', default=False,
- action='store_true', help=_('Verbose processing'))
-
+ parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
+ parser.add_option('-m', '--max-results', default=10,
+ help=_('Maximum number of results to fetch'))
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help=_('Be more verbose about errors'))
return parser
-
-def create_books(opts, args, timeout=5.):
- base_url = BASE_URL%dict(key=args[1])
- if opts.isbn is not None:
- url = build_isbn(base_url, opts)
- else:
- url = build_combined(base_url, opts)
-
- if opts.verbose:
- print ('ISBNDB query: '+url)
-
- tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
- #remove duplicates ISBN
- return list(dict((book.isbn, book) for book in tans).values())
-
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
- print ('You must supply the isbndb.com key')
+ print
+ print _('You must supply the isbndb.com key')
return 1
-
- for book in create_books(opts, args):
- print unicode(book).encode('utf-8')
-
+ try:
+ results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ if results is None or len(results) == 0:
+ print _('No result found for this search!')
+ return 0
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
return 0
if __name__ == '__main__':
sys.exit(main())
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html
\ No newline at end of file