Refactoring of isbndb plugin + add get language

This commit is contained in:
Sengian 2010-12-15 09:10:37 +01:00
parent 08eb0e1a59
commit a64a22a934
3 changed files with 270 additions and 138 deletions

View File

@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing from calibre.ebooks.metadata.fetch import LibraryThing
from calibre.ebooks.metadata.isbndb import ISBNDB
from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.google_books import GoogleBooks from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers

View File

@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
# }}} # }}}
class ISBNDB(MetadataSource): # {{{ # class ISBNDB(MetadataSource): # {{{
name = 'IsbnDB' # name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com') # description = _('Downloads metadata from isbndb.com')
def fetch(self): # def fetch(self):
if not self.site_customization: # if not self.site_customization:
return # return
from calibre.ebooks.metadata.isbndb import option_parser, create_books # from calibre.ebooks.metadata.isbndb import option_parser, create_books
args = ['isbndb'] # args = ['isbndb']
if self.isbn: # if self.isbn:
args.extend(['--isbn', self.isbn]) # args.extend(['--isbn', self.isbn])
else: # else:
if self.title: # if self.title:
args.extend(['--title', self.title]) # args.extend(['--title', self.title])
if self.book_author: # if self.book_author:
args.extend(['--author', self.book_author]) # args.extend(['--author', self.book_author])
if self.publisher: # if self.publisher:
args.extend(['--publisher', self.publisher]) # args.extend(['--publisher', self.publisher])
if self.verbose: # if self.verbose:
args.extend(['--verbose']) # args.extend(['--verbose'])
args.append(self.site_customization) # IsbnDb key # args.append(self.site_customization) # IsbnDb key
try: # try:
opts, args = option_parser().parse_args(args) # opts, args = option_parser().parse_args(args)
self.results = create_books(opts, args) # self.results = create_books(opts, args)
except Exception, e: # except Exception, e:
self.exception = e # self.exception = e
self.tb = traceback.format_exc() # self.tb = traceback.format_exc()
@property # @property
def string_customization_help(self): # def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s ' # ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.') # 'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>') # return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
# }}} # }}}

View File

@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
''' '''
import sys, re import sys, re
from urllib import quote from urllib import urlencode
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre import browser
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
class ISBNDB(MetadataSource):
name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com')
version = (1, 0, 1)
def fetch(self):
if not self.site_customization:
return
try:
self.results = search(self.title, self.book_author, self.publisher, self.isbn,
max_results=10, verbose=self.verbose, key=self.site_customization)
except Exception, e:
import traceback
self.exception = e
self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
class ISBNDBError(Exception): class ISBNDBError(Exception):
pass pass
def fetch_metadata(url, max=100, timeout=5.): def report(verbose):
books = [] if verbose:
page_number = 1 import traceback
total_results = sys.maxint traceback.print_exc()
br = browser()
while len(books) < total_results and max > 0:
class Query(object):
BASE_URL = 'http://isbndb.com/api/books.xml?'
def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
keywords=None, max_results=40):
assert not(title is None and author is None and publisher is None and \
isbn is None and keywords is None)
assert (max_results < 41)
if title == _('Unknown'):
title=None
if author == _('Unknown'):
author=None
self.maxresults = int(max_results)
if isbn is not None:
q = isbn
i = 'isbn'
elif keywords is not None:
q = ' '.join([e for e in (title, author, publisher, keywords) \
if e is not None ])
q = q.strip()
i = 'full'
else:
q = ' '.join([e for e in (title, author, publisher) \
if e is not None ])
q = q.strip()
if len(q) == 0:
raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
i = 'combined'
if isinstance(q, unicode):
q = q.encode('utf-8')
self.url = self.BASE_URL+urlencode({
'value1':q,
'results':'subjects,authors,texts,details',
'access_key':key,
'index1':i,
})+'&page_number='
def brcall(self, browser, url, verbose, timeout):
if verbose:
print _('Query: %s') % url
try: try:
raw = br.open(url, timeout=timeout).read() raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, err: except Exception, e:
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err)) import socket
soup = BeautifulStoneSoup(raw, report(verbose)
convertEntities=BeautifulStoneSoup.XML_ENTITIES) if callable(getattr(e, 'getcode', None)) and \
book_list = soup.find('booklist') e.getcode() == 404:
if book_list is None: return None
errmsg = soup.find('errormessage').string attr = getattr(e, 'args', [None])
raise ISBNDBError('Error fetching metadata: '+errmsg) attr = attr if attr else [None]
total_results = int(book_list['total_results']) if isinstance(attr[0], socket.timeout):
page_number += 1 raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
np = '&page_number=%s&'%page_number raise ISBNDBError(_('ISBNDB encountered an error.'))
url = re.sub(r'\&page_number=\d+\&', np, url) if '<title>404 - ' in raw:
books.extend(book_list.findAll('bookdata')) return None
max -= 1 raw = xml_to_unicode(raw, strip_encoding_pats=True,
return books resolve_entities=True)[0]
try:
return etree.fromstring(raw)
class ISBNDBMetadata(Metadata): except:
try:
def __init__(self, book): #remove ASCII invalid chars (normally not needed)
Metadata.__init__(self, None) return etree.fromstring(clean_ascii_chars(raw))
except:
def tostring(e):
if not hasattr(e, 'string'):
return None return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn'))) def __call__(self, browser, verbose, timeout = 5.):
title = tostring(book.find('titlelong')) url = self.url+str(1)
feed = self.brcall(browser, url, verbose, timeout)
if feed is None:
return None
# print etree.tostring(feed, pretty_print=True)
total = int(feed.find('BookList').get('total_results'))
nbresultstoget = total if total < self.maxresults else self.maxresults
entries = feed.xpath("./BookList/BookData")
i=2
while len(entries) < nbresultstoget:
url = self.url+str(i)
feed = self.brcall(browser, url, verbose, timeout)
i+=1
if feed is None:
break
entries.extend(feed.xpath("./BookList/BookData"))
return entries[:nbresultstoget]
class ResultList(list):
def get_description(self, entry, verbose):
try:
desc = entry.find('Summary')
if desc:
return _(u'SUMMARY:\n%s') % self.output_entry(desc)
except:
report(verbose)
def get_language(self, entry, verbose):
try:
return entry.find('Details').get('language')
except:
report(verbose)
def get_title(self, entry):
title = entry.find('TitleLong')
if not title: if not title:
title = tostring(book.find('title')) title = entry.find('Title')
self.title = title return self.output_entry(title)
self.title = unicode(self.title).strip()
def get_authors(self, entry):
authors = [] authors = []
au = tostring(book.find('authorstext')) au = entry.find('AuthorsText')
if au: if au is not None:
au = au.strip() au = self.output_entry(au)
temp = au.split(',') temp = au.split(u',')
for au in temp: for au in temp:
if not au: continue if not au: continue
authors.extend([a.strip() for a in au.split('&amp;')]) authors.extend([a.strip() for a in au.split(u'&')])
if authors: return authors
self.authors = authors
def get_author_sort(self, entry, verbose):
try: try:
self.author_sort = tostring(book.find('authors').find('person')) return self.output_entry(entry.find('Authors').find('Person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except: except:
pass report(verbose)
self.publisher = tostring(book.find('publishertext')) return None
summ = tostring(book.find('summary')) def get_isbn(self, entry, verbose):
if summ: try:
self.comments = 'SUMMARY:\n'+summ return unicode(entry.get('isbn13', entry.get('isbn')))
except:
report(verbose)
def get_publisher(self, entry, verbose):
try:
return self.output_entry(entry.find('PublisherText'))
except:
report(verbose)
return None
def output_entry(self, entry):
out = etree.tostring(entry, encoding=unicode, method="text")
return out.strip()
def populate(self, entries, verbose):
for x in entries:
try:
title = self.get_title(x)
authors = self.get_authors(x)
except Exception, e:
if verbose:
print _('Failed to get all details for an entry')
print e
continue
mi = MetaInformation(title, authors)
tmpautsort = self.get_author_sort(x, verbose)
mi.author_sort = tmpautsort if tmpautsort is not None \
else authors_to_sort_string(authors)
mi.comments = self.get_description(x, verbose)
mi.isbn = self.get_isbn(x, verbose)
mi.publisher = self.get_publisher(x, verbose)
mi.language = self.get_language(x, verbose)
self.append(mi)
def build_isbn(base_url, opts): def search(title=None, author=None, publisher=None, isbn=None,
return base_url + 'index1=isbn&value1='+opts.isbn max_results=10, verbose=False, keywords=None, key=None):
br = browser()
entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
def build_combined(base_url, opts): if entries is None or len(entries) == 0:
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \ return None
if e is not None ])
query = query.strip()
if len(query) == 0:
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
query = re.sub(r'\s+', '+', query)
if isinstance(query, unicode):
query = query.encode('utf-8')
return base_url+'index1=combined&value1='+quote(query, '+')
#List of entry
ans = ResultList()
ans.populate(entries, verbose)
return list(dict((book.isbn, book) for book in ans).values())
def option_parser(): def option_parser():
parser = OptionParser(usage=\ import textwrap
_(''' parser = OptionParser(textwrap.dedent(\
%prog [options] key _('''\
%prog [options] key
Fetch metadata for books from isndb.com. You can specify either the Fetch metadata for books from isndb.com. You can specify either the
books ISBN ID or its title and author. If you specify the title and author, books ISBN ID or its title and author. If you specify the title and author,
then more than one book may be returned. then more than one book may be returned.
key is the account key you generate after signing up for a free account from isbndb.com. key is the account key you generate after signing up for a free account from isbndb.com.
''')) ''')))
parser.add_option('-i', '--isbn', default=None, dest='isbn', parser.add_option('-i', '--isbn', default=None, dest='isbn',
help=_('The ISBN ID of the book you want metadata for.')) help=_('The ISBN ID of the book you want metadata for.'))
parser.add_option('-a', '--author', dest='author', parser.add_option('-a', '--author', dest='author',
@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
default=None, help=_('The title of the book to search for.')) default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher', parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.')) help=_('The publisher of the book to search for.'))
parser.add_option('-v', '--verbose', default=False, parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
action='store_true', help=_('Verbose processing')) parser.add_option('-m', '--max-results', default=10,
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Be more verbose about errors'))
return parser return parser
def create_books(opts, args, timeout=5.):
base_url = BASE_URL%dict(key=args[1])
if opts.isbn is not None:
url = build_isbn(base_url, opts)
else:
url = build_combined(base_url, opts)
if opts.verbose:
print ('ISBNDB query: '+url)
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
#remove duplicates ISBN
return list(dict((book.isbn, book) for book in tans).values())
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)
if len(args) != 2: if len(args) != 2:
parser.print_help() parser.print_help()
print ('You must supply the isbndb.com key') print
print _('You must supply the isbndb.com key')
return 1 return 1
try:
for book in create_books(opts, args): results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
print unicode(book).encode('utf-8') keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
except AssertionError:
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html