mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Refactoring of isbndb plugin + add get language
This commit is contained in:
parent
08eb0e1a59
commit
a64a22a934
@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
|
||||
from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
|
||||
from calibre.ebooks.metadata.fetch import LibraryThing
|
||||
from calibre.ebooks.metadata.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||
from calibre.ebooks.metadata.google_books import GoogleBooks
|
||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||
|
@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class ISBNDB(MetadataSource): # {{{
|
||||
# class ISBNDB(MetadataSource): # {{{
|
||||
|
||||
name = 'IsbnDB'
|
||||
description = _('Downloads metadata from isbndb.com')
|
||||
# name = 'IsbnDB'
|
||||
# description = _('Downloads metadata from isbndb.com')
|
||||
|
||||
def fetch(self):
|
||||
if not self.site_customization:
|
||||
return
|
||||
from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
||||
args = ['isbndb']
|
||||
if self.isbn:
|
||||
args.extend(['--isbn', self.isbn])
|
||||
else:
|
||||
if self.title:
|
||||
args.extend(['--title', self.title])
|
||||
if self.book_author:
|
||||
args.extend(['--author', self.book_author])
|
||||
if self.publisher:
|
||||
args.extend(['--publisher', self.publisher])
|
||||
if self.verbose:
|
||||
args.extend(['--verbose'])
|
||||
args.append(self.site_customization) # IsbnDb key
|
||||
try:
|
||||
opts, args = option_parser().parse_args(args)
|
||||
self.results = create_books(opts, args)
|
||||
except Exception, e:
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
# def fetch(self):
|
||||
# if not self.site_customization:
|
||||
# return
|
||||
# from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
||||
# args = ['isbndb']
|
||||
# if self.isbn:
|
||||
# args.extend(['--isbn', self.isbn])
|
||||
# else:
|
||||
# if self.title:
|
||||
# args.extend(['--title', self.title])
|
||||
# if self.book_author:
|
||||
# args.extend(['--author', self.book_author])
|
||||
# if self.publisher:
|
||||
# args.extend(['--publisher', self.publisher])
|
||||
# if self.verbose:
|
||||
# args.extend(['--verbose'])
|
||||
# args.append(self.site_customization) # IsbnDb key
|
||||
# try:
|
||||
# opts, args = option_parser().parse_args(args)
|
||||
# self.results = create_books(opts, args)
|
||||
# except Exception, e:
|
||||
# self.exception = e
|
||||
# self.tb = traceback.format_exc()
|
||||
|
||||
@property
|
||||
def string_customization_help(self):
|
||||
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
||||
'and enter your access key below.')
|
||||
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||
# @property
|
||||
# def string_customization_help(self):
|
||||
# ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
||||
# 'and enter your access key below.')
|
||||
# return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
|
||||
'''
|
||||
|
||||
import sys, re
|
||||
from urllib import quote
|
||||
from urllib import urlencode
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre import browser, preferred_encoding
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre import browser
|
||||
|
||||
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
|
||||
|
||||
class ISBNDB(MetadataSource):
|
||||
|
||||
name = 'IsbnDB'
|
||||
description = _('Downloads metadata from isbndb.com')
|
||||
version = (1, 0, 1)
|
||||
|
||||
def fetch(self):
|
||||
if not self.site_customization:
|
||||
return
|
||||
try:
|
||||
self.results = search(self.title, self.book_author, self.publisher, self.isbn,
|
||||
max_results=10, verbose=self.verbose, key=self.site_customization)
|
||||
except Exception, e:
|
||||
import traceback
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
@property
|
||||
def string_customization_help(self):
|
||||
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
||||
'and enter your access key below.')
|
||||
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||
|
||||
|
||||
class ISBNDBError(Exception):
|
||||
pass
|
||||
|
||||
def fetch_metadata(url, max=100, timeout=5.):
|
||||
books = []
|
||||
page_number = 1
|
||||
total_results = sys.maxint
|
||||
br = browser()
|
||||
while len(books) < total_results and max > 0:
|
||||
def report(verbose):
|
||||
if verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class Query(object):
|
||||
|
||||
BASE_URL = 'http://isbndb.com/api/books.xml?'
|
||||
|
||||
def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
|
||||
keywords=None, max_results=40):
|
||||
assert not(title is None and author is None and publisher is None and \
|
||||
isbn is None and keywords is None)
|
||||
assert (max_results < 41)
|
||||
|
||||
if title == _('Unknown'):
|
||||
title=None
|
||||
if author == _('Unknown'):
|
||||
author=None
|
||||
self.maxresults = int(max_results)
|
||||
|
||||
if isbn is not None:
|
||||
q = isbn
|
||||
i = 'isbn'
|
||||
elif keywords is not None:
|
||||
q = ' '.join([e for e in (title, author, publisher, keywords) \
|
||||
if e is not None ])
|
||||
q = q.strip()
|
||||
i = 'full'
|
||||
else:
|
||||
q = ' '.join([e for e in (title, author, publisher) \
|
||||
if e is not None ])
|
||||
q = q.strip()
|
||||
if len(q) == 0:
|
||||
raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
|
||||
i = 'combined'
|
||||
|
||||
if isinstance(q, unicode):
|
||||
q = q.encode('utf-8')
|
||||
self.url = self.BASE_URL+urlencode({
|
||||
'value1':q,
|
||||
'results':'subjects,authors,texts,details',
|
||||
'access_key':key,
|
||||
'index1':i,
|
||||
})+'&page_number='
|
||||
|
||||
def brcall(self, browser, url, verbose, timeout):
|
||||
if verbose:
|
||||
print _('Query: %s') % url
|
||||
|
||||
try:
|
||||
raw = br.open(url, timeout=timeout).read()
|
||||
except Exception, err:
|
||||
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
|
||||
soup = BeautifulStoneSoup(raw,
|
||||
convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
||||
book_list = soup.find('booklist')
|
||||
if book_list is None:
|
||||
errmsg = soup.find('errormessage').string
|
||||
raise ISBNDBError('Error fetching metadata: '+errmsg)
|
||||
total_results = int(book_list['total_results'])
|
||||
page_number += 1
|
||||
np = '&page_number=%s&'%page_number
|
||||
url = re.sub(r'\&page_number=\d+\&', np, url)
|
||||
books.extend(book_list.findAll('bookdata'))
|
||||
max -= 1
|
||||
return books
|
||||
|
||||
|
||||
class ISBNDBMetadata(Metadata):
|
||||
|
||||
def __init__(self, book):
|
||||
Metadata.__init__(self, None)
|
||||
|
||||
def tostring(e):
|
||||
if not hasattr(e, 'string'):
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
except Exception, e:
|
||||
import socket
|
||||
report(verbose)
|
||||
if callable(getattr(e, 'getcode', None)) and \
|
||||
e.getcode() == 404:
|
||||
return None
|
||||
attr = getattr(e, 'args', [None])
|
||||
attr = attr if attr else [None]
|
||||
if isinstance(attr[0], socket.timeout):
|
||||
raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
|
||||
raise ISBNDBError(_('ISBNDB encountered an error.'))
|
||||
if '<title>404 - ' in raw:
|
||||
return None
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
try:
|
||||
return etree.fromstring(raw)
|
||||
except:
|
||||
try:
|
||||
#remove ASCII invalid chars (normally not needed)
|
||||
return etree.fromstring(clean_ascii_chars(raw))
|
||||
except:
|
||||
return None
|
||||
ans = e.string
|
||||
if ans is not None:
|
||||
ans = unicode(ans).strip()
|
||||
if not ans:
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
||||
title = tostring(book.find('titlelong'))
|
||||
def __call__(self, browser, verbose, timeout = 5.):
|
||||
url = self.url+str(1)
|
||||
feed = self.brcall(browser, url, verbose, timeout)
|
||||
if feed is None:
|
||||
return None
|
||||
|
||||
# print etree.tostring(feed, pretty_print=True)
|
||||
total = int(feed.find('BookList').get('total_results'))
|
||||
nbresultstoget = total if total < self.maxresults else self.maxresults
|
||||
entries = feed.xpath("./BookList/BookData")
|
||||
i=2
|
||||
while len(entries) < nbresultstoget:
|
||||
url = self.url+str(i)
|
||||
feed = self.brcall(browser, url, verbose, timeout)
|
||||
i+=1
|
||||
if feed is None:
|
||||
break
|
||||
entries.extend(feed.xpath("./BookList/BookData"))
|
||||
return entries[:nbresultstoget]
|
||||
|
||||
class ResultList(list):
|
||||
|
||||
def get_description(self, entry, verbose):
|
||||
try:
|
||||
desc = entry.find('Summary')
|
||||
if desc:
|
||||
return _(u'SUMMARY:\n%s') % self.output_entry(desc)
|
||||
except:
|
||||
report(verbose)
|
||||
|
||||
def get_language(self, entry, verbose):
|
||||
try:
|
||||
return entry.find('Details').get('language')
|
||||
except:
|
||||
report(verbose)
|
||||
|
||||
def get_title(self, entry):
|
||||
title = entry.find('TitleLong')
|
||||
if not title:
|
||||
title = tostring(book.find('title'))
|
||||
self.title = title
|
||||
self.title = unicode(self.title).strip()
|
||||
title = entry.find('Title')
|
||||
return self.output_entry(title)
|
||||
|
||||
def get_authors(self, entry):
|
||||
authors = []
|
||||
au = tostring(book.find('authorstext'))
|
||||
if au:
|
||||
au = au.strip()
|
||||
temp = au.split(',')
|
||||
au = entry.find('AuthorsText')
|
||||
if au is not None:
|
||||
au = self.output_entry(au)
|
||||
temp = au.split(u',')
|
||||
for au in temp:
|
||||
if not au: continue
|
||||
authors.extend([a.strip() for a in au.split('&')])
|
||||
if authors:
|
||||
self.authors = authors
|
||||
authors.extend([a.strip() for a in au.split(u'&')])
|
||||
return authors
|
||||
|
||||
def get_author_sort(self, entry, verbose):
|
||||
try:
|
||||
self.author_sort = tostring(book.find('authors').find('person'))
|
||||
if self.authors and self.author_sort == self.authors[0]:
|
||||
self.author_sort = None
|
||||
return self.output_entry(entry.find('Authors').find('Person'))
|
||||
except:
|
||||
pass
|
||||
self.publisher = tostring(book.find('publishertext'))
|
||||
report(verbose)
|
||||
return None
|
||||
|
||||
summ = tostring(book.find('summary'))
|
||||
if summ:
|
||||
self.comments = 'SUMMARY:\n'+summ
|
||||
def get_isbn(self, entry, verbose):
|
||||
try:
|
||||
return unicode(entry.get('isbn13', entry.get('isbn')))
|
||||
except:
|
||||
report(verbose)
|
||||
|
||||
def get_publisher(self, entry, verbose):
|
||||
try:
|
||||
return self.output_entry(entry.find('PublisherText'))
|
||||
except:
|
||||
report(verbose)
|
||||
return None
|
||||
|
||||
def output_entry(self, entry):
|
||||
out = etree.tostring(entry, encoding=unicode, method="text")
|
||||
return out.strip()
|
||||
|
||||
def populate(self, entries, verbose):
|
||||
for x in entries:
|
||||
try:
|
||||
title = self.get_title(x)
|
||||
authors = self.get_authors(x)
|
||||
except Exception, e:
|
||||
if verbose:
|
||||
print _('Failed to get all details for an entry')
|
||||
print e
|
||||
continue
|
||||
mi = MetaInformation(title, authors)
|
||||
tmpautsort = self.get_author_sort(x, verbose)
|
||||
mi.author_sort = tmpautsort if tmpautsort is not None \
|
||||
else authors_to_sort_string(authors)
|
||||
mi.comments = self.get_description(x, verbose)
|
||||
mi.isbn = self.get_isbn(x, verbose)
|
||||
mi.publisher = self.get_publisher(x, verbose)
|
||||
mi.language = self.get_language(x, verbose)
|
||||
self.append(mi)
|
||||
|
||||
|
||||
def build_isbn(base_url, opts):
|
||||
return base_url + 'index1=isbn&value1='+opts.isbn
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
max_results=10, verbose=False, keywords=None, key=None):
|
||||
br = browser()
|
||||
entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
|
||||
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
|
||||
|
||||
def build_combined(base_url, opts):
|
||||
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
|
||||
if e is not None ])
|
||||
query = query.strip()
|
||||
if len(query) == 0:
|
||||
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
|
||||
|
||||
query = re.sub(r'\s+', '+', query)
|
||||
if isinstance(query, unicode):
|
||||
query = query.encode('utf-8')
|
||||
return base_url+'index1=combined&value1='+quote(query, '+')
|
||||
if entries is None or len(entries) == 0:
|
||||
return None
|
||||
|
||||
#List of entry
|
||||
ans = ResultList()
|
||||
ans.populate(entries, verbose)
|
||||
return list(dict((book.isbn, book) for book in ans).values())
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=\
|
||||
_('''
|
||||
%prog [options] key
|
||||
import textwrap
|
||||
parser = OptionParser(textwrap.dedent(\
|
||||
_('''\
|
||||
%prog [options] key
|
||||
|
||||
Fetch metadata for books from isndb.com. You can specify either the
|
||||
books ISBN ID or its title and author. If you specify the title and author,
|
||||
then more than one book may be returned.
|
||||
Fetch metadata for books from isndb.com. You can specify either the
|
||||
books ISBN ID or its title and author. If you specify the title and author,
|
||||
then more than one book may be returned.
|
||||
|
||||
key is the account key you generate after signing up for a free account from isbndb.com.
|
||||
key is the account key you generate after signing up for a free account from isbndb.com.
|
||||
|
||||
'''))
|
||||
''')))
|
||||
parser.add_option('-i', '--isbn', default=None, dest='isbn',
|
||||
help=_('The ISBN ID of the book you want metadata for.'))
|
||||
parser.add_option('-a', '--author', dest='author',
|
||||
@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
|
||||
default=None, help=_('The title of the book to search for.'))
|
||||
parser.add_option('-p', '--publisher', default=None, dest='publisher',
|
||||
help=_('The publisher of the book to search for.'))
|
||||
parser.add_option('-v', '--verbose', default=False,
|
||||
action='store_true', help=_('Verbose processing'))
|
||||
|
||||
parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
|
||||
parser.add_option('-m', '--max-results', default=10,
|
||||
help=_('Maximum number of results to fetch'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Be more verbose about errors'))
|
||||
return parser
|
||||
|
||||
|
||||
def create_books(opts, args, timeout=5.):
|
||||
base_url = BASE_URL%dict(key=args[1])
|
||||
if opts.isbn is not None:
|
||||
url = build_isbn(base_url, opts)
|
||||
else:
|
||||
url = build_combined(base_url, opts)
|
||||
|
||||
if opts.verbose:
|
||||
print ('ISBNDB query: '+url)
|
||||
|
||||
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
||||
#remove duplicates ISBN
|
||||
return list(dict((book.isbn, book) for book in tans).values())
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
print ('You must supply the isbndb.com key')
|
||||
print
|
||||
print _('You must supply the isbndb.com key')
|
||||
return 1
|
||||
|
||||
for book in create_books(opts, args):
|
||||
print unicode(book).encode('utf-8')
|
||||
|
||||
try:
|
||||
results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
|
||||
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
||||
except AssertionError:
|
||||
report(True)
|
||||
parser.print_help()
|
||||
return 1
|
||||
if results is None or len(results) == 0:
|
||||
print _('No result found for this search!')
|
||||
return 0
|
||||
for result in results:
|
||||
print unicode(result).encode(preferred_encoding, 'replace')
|
||||
print
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html
|
Loading…
x
Reference in New Issue
Block a user