Refactoring of isbndb plugin + add get language

This commit is contained in:
Sengian 2010-12-15 09:10:37 +01:00
parent 08eb0e1a59
commit a64a22a934
3 changed files with 270 additions and 138 deletions

View File

@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
from calibre.ebooks.metadata.fetch import LibraryThing
from calibre.ebooks.metadata.isbndb import ISBNDB
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers

View File

@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
# }}}
class ISBNDB(MetadataSource): # {{{
# class ISBNDB(MetadataSource): # {{{
name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com')
# name = 'IsbnDB'
# description = _('Downloads metadata from isbndb.com')
def fetch(self):
if not self.site_customization:
return
from calibre.ebooks.metadata.isbndb import option_parser, create_books
args = ['isbndb']
if self.isbn:
args.extend(['--isbn', self.isbn])
else:
if self.title:
args.extend(['--title', self.title])
if self.book_author:
args.extend(['--author', self.book_author])
if self.publisher:
args.extend(['--publisher', self.publisher])
if self.verbose:
args.extend(['--verbose'])
args.append(self.site_customization) # IsbnDb key
try:
opts, args = option_parser().parse_args(args)
self.results = create_books(opts, args)
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
# def fetch(self):
# if not self.site_customization:
# return
# from calibre.ebooks.metadata.isbndb import option_parser, create_books
# args = ['isbndb']
# if self.isbn:
# args.extend(['--isbn', self.isbn])
# else:
# if self.title:
# args.extend(['--title', self.title])
# if self.book_author:
# args.extend(['--author', self.book_author])
# if self.publisher:
# args.extend(['--publisher', self.publisher])
# if self.verbose:
# args.extend(['--verbose'])
# args.append(self.site_customization) # IsbnDb key
# try:
# opts, args = option_parser().parse_args(args)
# self.results = create_books(opts, args)
# except Exception, e:
# self.exception = e
# self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
# @property
# def string_customization_help(self):
# ans = _('To use isbndb.com you must sign up for a %sfree account%s '
# 'and enter your access key below.')
# return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
# }}}

View File

@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
'''
import sys, re
from urllib import quote
from urllib import urlencode
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre import browser
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
class ISBNDB(MetadataSource):
name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com')
version = (1, 0, 1)
def fetch(self):
if not self.site_customization:
return
try:
self.results = search(self.title, self.book_author, self.publisher, self.isbn,
max_results=10, verbose=self.verbose, key=self.site_customization)
except Exception, e:
import traceback
self.exception = e
self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
class ISBNDBError(Exception):
pass
def fetch_metadata(url, max=100, timeout=5.):
books = []
page_number = 1
total_results = sys.maxint
br = browser()
while len(books) < total_results and max > 0:
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class Query(object):
BASE_URL = 'http://isbndb.com/api/books.xml?'
def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
keywords=None, max_results=40):
assert not(title is None and author is None and publisher is None and \
isbn is None and keywords is None)
assert (max_results < 41)
if title == _('Unknown'):
title=None
if author == _('Unknown'):
author=None
self.maxresults = int(max_results)
if isbn is not None:
q = isbn
i = 'isbn'
elif keywords is not None:
q = ' '.join([e for e in (title, author, publisher, keywords) \
if e is not None ])
q = q.strip()
i = 'full'
else:
q = ' '.join([e for e in (title, author, publisher) \
if e is not None ])
q = q.strip()
if len(q) == 0:
raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
i = 'combined'
if isinstance(q, unicode):
q = q.encode('utf-8')
self.url = self.BASE_URL+urlencode({
'value1':q,
'results':'subjects,authors,texts,details',
'access_key':key,
'index1':i,
})+'&page_number='
def brcall(self, browser, url, verbose, timeout):
if verbose:
print _('Query: %s') % url
try:
raw = br.open(url, timeout=timeout).read()
except Exception, err:
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
soup = BeautifulStoneSoup(raw,
convertEntities=BeautifulStoneSoup.XML_ENTITIES)
book_list = soup.find('booklist')
if book_list is None:
errmsg = soup.find('errormessage').string
raise ISBNDBError('Error fetching metadata: '+errmsg)
total_results = int(book_list['total_results'])
page_number += 1
np = '&page_number=%s&'%page_number
url = re.sub(r'\&page_number=\d+\&', np, url)
books.extend(book_list.findAll('bookdata'))
max -= 1
return books
class ISBNDBMetadata(Metadata):
def __init__(self, book):
Metadata.__init__(self, None)
def tostring(e):
if not hasattr(e, 'string'):
raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, e:
import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
attr = getattr(e, 'args', [None])
attr = attr if attr else [None]
if isinstance(attr[0], socket.timeout):
raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
raise ISBNDBError(_('ISBNDB encountered an error.'))
if '<title>404 - ' in raw:
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return etree.fromstring(raw)
except:
try:
#remove ASCII invalid chars (normally not needed)
return etree.fromstring(clean_ascii_chars(raw))
except:
return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
title = tostring(book.find('titlelong'))
def __call__(self, browser, verbose, timeout = 5.):
url = self.url+str(1)
feed = self.brcall(browser, url, verbose, timeout)
if feed is None:
return None
# print etree.tostring(feed, pretty_print=True)
total = int(feed.find('BookList').get('total_results'))
nbresultstoget = total if total < self.maxresults else self.maxresults
entries = feed.xpath("./BookList/BookData")
i=2
while len(entries) < nbresultstoget:
url = self.url+str(i)
feed = self.brcall(browser, url, verbose, timeout)
i+=1
if feed is None:
break
entries.extend(feed.xpath("./BookList/BookData"))
return entries[:nbresultstoget]
class ResultList(list):
def get_description(self, entry, verbose):
try:
desc = entry.find('Summary')
if desc:
return _(u'SUMMARY:\n%s') % self.output_entry(desc)
except:
report(verbose)
def get_language(self, entry, verbose):
try:
return entry.find('Details').get('language')
except:
report(verbose)
def get_title(self, entry):
title = entry.find('TitleLong')
if not title:
title = tostring(book.find('title'))
self.title = title
self.title = unicode(self.title).strip()
title = entry.find('Title')
return self.output_entry(title)
def get_authors(self, entry):
authors = []
au = tostring(book.find('authorstext'))
if au:
au = au.strip()
temp = au.split(',')
au = entry.find('AuthorsText')
if au is not None:
au = self.output_entry(au)
temp = au.split(u',')
for au in temp:
if not au: continue
authors.extend([a.strip() for a in au.split('&amp;')])
if authors:
self.authors = authors
authors.extend([a.strip() for a in au.split(u'&')])
return authors
def get_author_sort(self, entry, verbose):
try:
self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
return self.output_entry(entry.find('Authors').find('Person'))
except:
pass
self.publisher = tostring(book.find('publishertext'))
report(verbose)
return None
summ = tostring(book.find('summary'))
if summ:
self.comments = 'SUMMARY:\n'+summ
def get_isbn(self, entry, verbose):
try:
return unicode(entry.get('isbn13', entry.get('isbn')))
except:
report(verbose)
def get_publisher(self, entry, verbose):
try:
return self.output_entry(entry.find('PublisherText'))
except:
report(verbose)
return None
def output_entry(self, entry):
out = etree.tostring(entry, encoding=unicode, method="text")
return out.strip()
def populate(self, entries, verbose):
for x in entries:
try:
title = self.get_title(x)
authors = self.get_authors(x)
except Exception, e:
if verbose:
print _('Failed to get all details for an entry')
print e
continue
mi = MetaInformation(title, authors)
tmpautsort = self.get_author_sort(x, verbose)
mi.author_sort = tmpautsort if tmpautsort is not None \
else authors_to_sort_string(authors)
mi.comments = self.get_description(x, verbose)
mi.isbn = self.get_isbn(x, verbose)
mi.publisher = self.get_publisher(x, verbose)
mi.language = self.get_language(x, verbose)
self.append(mi)
def build_isbn(base_url, opts):
return base_url + 'index1=isbn&value1='+opts.isbn
def search(title=None, author=None, publisher=None, isbn=None,
max_results=10, verbose=False, keywords=None, key=None):
br = browser()
entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
def build_combined(base_url, opts):
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
if e is not None ])
query = query.strip()
if len(query) == 0:
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
query = re.sub(r'\s+', '+', query)
if isinstance(query, unicode):
query = query.encode('utf-8')
return base_url+'index1=combined&value1='+quote(query, '+')
if entries is None or len(entries) == 0:
return None
#List of entry
ans = ResultList()
ans.populate(entries, verbose)
return list(dict((book.isbn, book) for book in ans).values())
def option_parser():
parser = OptionParser(usage=\
_('''
%prog [options] key
import textwrap
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options] key
Fetch metadata for books from isndb.com. You can specify either the
books ISBN ID or its title and author. If you specify the title and author,
then more than one book may be returned.
Fetch metadata for books from isndb.com. You can specify either the
books ISBN ID or its title and author. If you specify the title and author,
then more than one book may be returned.
key is the account key you generate after signing up for a free account from isbndb.com.
key is the account key you generate after signing up for a free account from isbndb.com.
'''))
''')))
parser.add_option('-i', '--isbn', default=None, dest='isbn',
help=_('The ISBN ID of the book you want metadata for.'))
parser.add_option('-a', '--author', dest='author',
@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.'))
parser.add_option('-v', '--verbose', default=False,
action='store_true', help=_('Verbose processing'))
parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
parser.add_option('-m', '--max-results', default=10,
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Be more verbose about errors'))
return parser
def create_books(opts, args, timeout=5.):
base_url = BASE_URL%dict(key=args[1])
if opts.isbn is not None:
url = build_isbn(base_url, opts)
else:
url = build_combined(base_url, opts)
if opts.verbose:
print ('ISBNDB query: '+url)
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
#remove duplicates ISBN
return list(dict((book.isbn, book) for book in tans).values())
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print ('You must supply the isbndb.com key')
print
print _('You must supply the isbndb.com key')
return 1
for book in create_books(opts, args):
print unicode(book).encode('utf-8')
try:
results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
except AssertionError:
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print
return 0
if __name__ == '__main__':
sys.exit(main())
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html