mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Refactoring of isbndb plugin + add get language
This commit is contained in:
parent
08eb0e1a59
commit
a64a22a934
@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
|||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
|
|
||||||
from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
|
from calibre.ebooks.metadata.fetch import LibraryThing
|
||||||
|
from calibre.ebooks.metadata.isbndb import ISBNDB
|
||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.ebooks.metadata.google_books import GoogleBooks
|
from calibre.ebooks.metadata.google_books import GoogleBooks
|
||||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||||
|
@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class ISBNDB(MetadataSource): # {{{
|
# class ISBNDB(MetadataSource): # {{{
|
||||||
|
|
||||||
name = 'IsbnDB'
|
# name = 'IsbnDB'
|
||||||
description = _('Downloads metadata from isbndb.com')
|
# description = _('Downloads metadata from isbndb.com')
|
||||||
|
|
||||||
def fetch(self):
|
# def fetch(self):
|
||||||
if not self.site_customization:
|
# if not self.site_customization:
|
||||||
return
|
# return
|
||||||
from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
# from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
||||||
args = ['isbndb']
|
# args = ['isbndb']
|
||||||
if self.isbn:
|
# if self.isbn:
|
||||||
args.extend(['--isbn', self.isbn])
|
# args.extend(['--isbn', self.isbn])
|
||||||
else:
|
# else:
|
||||||
if self.title:
|
# if self.title:
|
||||||
args.extend(['--title', self.title])
|
# args.extend(['--title', self.title])
|
||||||
if self.book_author:
|
# if self.book_author:
|
||||||
args.extend(['--author', self.book_author])
|
# args.extend(['--author', self.book_author])
|
||||||
if self.publisher:
|
# if self.publisher:
|
||||||
args.extend(['--publisher', self.publisher])
|
# args.extend(['--publisher', self.publisher])
|
||||||
if self.verbose:
|
# if self.verbose:
|
||||||
args.extend(['--verbose'])
|
# args.extend(['--verbose'])
|
||||||
args.append(self.site_customization) # IsbnDb key
|
# args.append(self.site_customization) # IsbnDb key
|
||||||
try:
|
# try:
|
||||||
opts, args = option_parser().parse_args(args)
|
# opts, args = option_parser().parse_args(args)
|
||||||
self.results = create_books(opts, args)
|
# self.results = create_books(opts, args)
|
||||||
except Exception, e:
|
# except Exception, e:
|
||||||
self.exception = e
|
# self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
# self.tb = traceback.format_exc()
|
||||||
|
|
||||||
@property
|
# @property
|
||||||
def string_customization_help(self):
|
# def string_customization_help(self):
|
||||||
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
# ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
||||||
'and enter your access key below.')
|
# 'and enter your access key below.')
|
||||||
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
# return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, re
|
import sys, re
|
||||||
from urllib import quote
|
from urllib import urlencode
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
from calibre import browser
|
|
||||||
|
|
||||||
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
|
|
||||||
|
class ISBNDB(MetadataSource):
|
||||||
|
|
||||||
|
name = 'IsbnDB'
|
||||||
|
description = _('Downloads metadata from isbndb.com')
|
||||||
|
version = (1, 0, 1)
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
if not self.site_customization:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher, self.isbn,
|
||||||
|
max_results=10, verbose=self.verbose, key=self.site_customization)
|
||||||
|
except Exception, e:
|
||||||
|
import traceback
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def string_customization_help(self):
|
||||||
|
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
||||||
|
'and enter your access key below.')
|
||||||
|
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||||
|
|
||||||
|
|
||||||
class ISBNDBError(Exception):
|
class ISBNDBError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def fetch_metadata(url, max=100, timeout=5.):
|
def report(verbose):
|
||||||
books = []
|
if verbose:
|
||||||
page_number = 1
|
import traceback
|
||||||
total_results = sys.maxint
|
traceback.print_exc()
|
||||||
br = browser()
|
|
||||||
while len(books) < total_results and max > 0:
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL = 'http://isbndb.com/api/books.xml?'
|
||||||
|
|
||||||
|
def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
|
||||||
|
keywords=None, max_results=40):
|
||||||
|
assert not(title is None and author is None and publisher is None and \
|
||||||
|
isbn is None and keywords is None)
|
||||||
|
assert (max_results < 41)
|
||||||
|
|
||||||
|
if title == _('Unknown'):
|
||||||
|
title=None
|
||||||
|
if author == _('Unknown'):
|
||||||
|
author=None
|
||||||
|
self.maxresults = int(max_results)
|
||||||
|
|
||||||
|
if isbn is not None:
|
||||||
|
q = isbn
|
||||||
|
i = 'isbn'
|
||||||
|
elif keywords is not None:
|
||||||
|
q = ' '.join([e for e in (title, author, publisher, keywords) \
|
||||||
|
if e is not None ])
|
||||||
|
q = q.strip()
|
||||||
|
i = 'full'
|
||||||
|
else:
|
||||||
|
q = ' '.join([e for e in (title, author, publisher) \
|
||||||
|
if e is not None ])
|
||||||
|
q = q.strip()
|
||||||
|
if len(q) == 0:
|
||||||
|
raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
|
||||||
|
i = 'combined'
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.url = self.BASE_URL+urlencode({
|
||||||
|
'value1':q,
|
||||||
|
'results':'subjects,authors,texts,details',
|
||||||
|
'access_key':key,
|
||||||
|
'index1':i,
|
||||||
|
})+'&page_number='
|
||||||
|
|
||||||
|
def brcall(self, browser, url, verbose, timeout):
|
||||||
|
if verbose:
|
||||||
|
print _('Query: %s') % url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = br.open(url, timeout=timeout).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
except Exception, err:
|
except Exception, e:
|
||||||
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
|
import socket
|
||||||
soup = BeautifulStoneSoup(raw,
|
report(verbose)
|
||||||
convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
book_list = soup.find('booklist')
|
e.getcode() == 404:
|
||||||
if book_list is None:
|
return None
|
||||||
errmsg = soup.find('errormessage').string
|
attr = getattr(e, 'args', [None])
|
||||||
raise ISBNDBError('Error fetching metadata: '+errmsg)
|
attr = attr if attr else [None]
|
||||||
total_results = int(book_list['total_results'])
|
if isinstance(attr[0], socket.timeout):
|
||||||
page_number += 1
|
raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
|
||||||
np = '&page_number=%s&'%page_number
|
raise ISBNDBError(_('ISBNDB encountered an error.'))
|
||||||
url = re.sub(r'\&page_number=\d+\&', np, url)
|
if '<title>404 - ' in raw:
|
||||||
books.extend(book_list.findAll('bookdata'))
|
return None
|
||||||
max -= 1
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
return books
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return etree.fromstring(raw)
|
||||||
class ISBNDBMetadata(Metadata):
|
except:
|
||||||
|
try:
|
||||||
def __init__(self, book):
|
#remove ASCII invalid chars (normally not needed)
|
||||||
Metadata.__init__(self, None)
|
return etree.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
def tostring(e):
|
|
||||||
if not hasattr(e, 'string'):
|
|
||||||
return None
|
return None
|
||||||
ans = e.string
|
|
||||||
if ans is not None:
|
|
||||||
ans = unicode(ans).strip()
|
|
||||||
if not ans:
|
|
||||||
ans = None
|
|
||||||
return ans
|
|
||||||
|
|
||||||
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
title = tostring(book.find('titlelong'))
|
url = self.url+str(1)
|
||||||
|
feed = self.brcall(browser, url, verbose, timeout)
|
||||||
|
if feed is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# print etree.tostring(feed, pretty_print=True)
|
||||||
|
total = int(feed.find('BookList').get('total_results'))
|
||||||
|
nbresultstoget = total if total < self.maxresults else self.maxresults
|
||||||
|
entries = feed.xpath("./BookList/BookData")
|
||||||
|
i=2
|
||||||
|
while len(entries) < nbresultstoget:
|
||||||
|
url = self.url+str(i)
|
||||||
|
feed = self.brcall(browser, url, verbose, timeout)
|
||||||
|
i+=1
|
||||||
|
if feed is None:
|
||||||
|
break
|
||||||
|
entries.extend(feed.xpath("./BookList/BookData"))
|
||||||
|
return entries[:nbresultstoget]
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
def get_description(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
desc = entry.find('Summary')
|
||||||
|
if desc:
|
||||||
|
return _(u'SUMMARY:\n%s') % self.output_entry(desc)
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
|
||||||
|
def get_language(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
return entry.find('Details').get('language')
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.find('TitleLong')
|
||||||
if not title:
|
if not title:
|
||||||
title = tostring(book.find('title'))
|
title = entry.find('Title')
|
||||||
self.title = title
|
return self.output_entry(title)
|
||||||
self.title = unicode(self.title).strip()
|
|
||||||
|
def get_authors(self, entry):
|
||||||
authors = []
|
authors = []
|
||||||
au = tostring(book.find('authorstext'))
|
au = entry.find('AuthorsText')
|
||||||
if au:
|
if au is not None:
|
||||||
au = au.strip()
|
au = self.output_entry(au)
|
||||||
temp = au.split(',')
|
temp = au.split(u',')
|
||||||
for au in temp:
|
for au in temp:
|
||||||
if not au: continue
|
if not au: continue
|
||||||
authors.extend([a.strip() for a in au.split('&')])
|
authors.extend([a.strip() for a in au.split(u'&')])
|
||||||
if authors:
|
return authors
|
||||||
self.authors = authors
|
|
||||||
|
def get_author_sort(self, entry, verbose):
|
||||||
try:
|
try:
|
||||||
self.author_sort = tostring(book.find('authors').find('person'))
|
return self.output_entry(entry.find('Authors').find('Person'))
|
||||||
if self.authors and self.author_sort == self.authors[0]:
|
|
||||||
self.author_sort = None
|
|
||||||
except:
|
except:
|
||||||
pass
|
report(verbose)
|
||||||
self.publisher = tostring(book.find('publishertext'))
|
return None
|
||||||
|
|
||||||
summ = tostring(book.find('summary'))
|
def get_isbn(self, entry, verbose):
|
||||||
if summ:
|
try:
|
||||||
self.comments = 'SUMMARY:\n'+summ
|
return unicode(entry.get('isbn13', entry.get('isbn')))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
|
||||||
|
def get_publisher(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
return self.output_entry(entry.find('PublisherText'))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def output_entry(self, entry):
|
||||||
|
out = etree.tostring(entry, encoding=unicode, method="text")
|
||||||
|
return out.strip()
|
||||||
|
|
||||||
|
def populate(self, entries, verbose):
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
title = self.get_title(x)
|
||||||
|
authors = self.get_authors(x)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
continue
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
tmpautsort = self.get_author_sort(x, verbose)
|
||||||
|
mi.author_sort = tmpautsort if tmpautsort is not None \
|
||||||
|
else authors_to_sort_string(authors)
|
||||||
|
mi.comments = self.get_description(x, verbose)
|
||||||
|
mi.isbn = self.get_isbn(x, verbose)
|
||||||
|
mi.publisher = self.get_publisher(x, verbose)
|
||||||
|
mi.language = self.get_language(x, verbose)
|
||||||
|
self.append(mi)
|
||||||
|
|
||||||
|
|
||||||
def build_isbn(base_url, opts):
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
return base_url + 'index1=isbn&value1='+opts.isbn
|
max_results=10, verbose=False, keywords=None, key=None):
|
||||||
|
br = browser()
|
||||||
|
entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
|
||||||
|
|
||||||
def build_combined(base_url, opts):
|
if entries is None or len(entries) == 0:
|
||||||
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
|
return None
|
||||||
if e is not None ])
|
|
||||||
query = query.strip()
|
|
||||||
if len(query) == 0:
|
|
||||||
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
|
|
||||||
|
|
||||||
query = re.sub(r'\s+', '+', query)
|
|
||||||
if isinstance(query, unicode):
|
|
||||||
query = query.encode('utf-8')
|
|
||||||
return base_url+'index1=combined&value1='+quote(query, '+')
|
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList()
|
||||||
|
ans.populate(entries, verbose)
|
||||||
|
return list(dict((book.isbn, book) for book in ans).values())
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = OptionParser(usage=\
|
import textwrap
|
||||||
_('''
|
parser = OptionParser(textwrap.dedent(\
|
||||||
%prog [options] key
|
_('''\
|
||||||
|
%prog [options] key
|
||||||
|
|
||||||
Fetch metadata for books from isndb.com. You can specify either the
|
Fetch metadata for books from isndb.com. You can specify either the
|
||||||
books ISBN ID or its title and author. If you specify the title and author,
|
books ISBN ID or its title and author. If you specify the title and author,
|
||||||
then more than one book may be returned.
|
then more than one book may be returned.
|
||||||
|
|
||||||
key is the account key you generate after signing up for a free account from isbndb.com.
|
key is the account key you generate after signing up for a free account from isbndb.com.
|
||||||
|
|
||||||
'''))
|
''')))
|
||||||
parser.add_option('-i', '--isbn', default=None, dest='isbn',
|
parser.add_option('-i', '--isbn', default=None, dest='isbn',
|
||||||
help=_('The ISBN ID of the book you want metadata for.'))
|
help=_('The ISBN ID of the book you want metadata for.'))
|
||||||
parser.add_option('-a', '--author', dest='author',
|
parser.add_option('-a', '--author', dest='author',
|
||||||
@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
|
|||||||
default=None, help=_('The title of the book to search for.'))
|
default=None, help=_('The title of the book to search for.'))
|
||||||
parser.add_option('-p', '--publisher', default=None, dest='publisher',
|
parser.add_option('-p', '--publisher', default=None, dest='publisher',
|
||||||
help=_('The publisher of the book to search for.'))
|
help=_('The publisher of the book to search for.'))
|
||||||
parser.add_option('-v', '--verbose', default=False,
|
parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
|
||||||
action='store_true', help=_('Verbose processing'))
|
parser.add_option('-m', '--max-results', default=10,
|
||||||
|
help=_('Maximum number of results to fetch'))
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help=_('Be more verbose about errors'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def create_books(opts, args, timeout=5.):
|
|
||||||
base_url = BASE_URL%dict(key=args[1])
|
|
||||||
if opts.isbn is not None:
|
|
||||||
url = build_isbn(base_url, opts)
|
|
||||||
else:
|
|
||||||
url = build_combined(base_url, opts)
|
|
||||||
|
|
||||||
if opts.verbose:
|
|
||||||
print ('ISBNDB query: '+url)
|
|
||||||
|
|
||||||
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
|
||||||
#remove duplicates ISBN
|
|
||||||
return list(dict((book.isbn, book) for book in tans).values())
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
if len(args) != 2:
|
if len(args) != 2:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
print ('You must supply the isbndb.com key')
|
print
|
||||||
|
print _('You must supply the isbndb.com key')
|
||||||
return 1
|
return 1
|
||||||
|
try:
|
||||||
for book in create_books(opts, args):
|
results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
|
||||||
print unicode(book).encode('utf-8')
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print _('No result found for this search!')
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|
||||||
|
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html
|
Loading…
x
Reference in New Issue
Block a user