calibre now searches the Google Books database ina ddition to isbndb.com for book metadata

This commit is contained in:
Kovid Goyal 2009-03-19 19:02:04 -07:00
parent 7a277a43c5
commit ed0f570ffb
10 changed files with 290 additions and 84 deletions

View File

@ -233,7 +233,7 @@ class MetaInformation(object):
if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'comments', 'category',
for attr in ('author_sort', 'title_sort', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer',
@ -249,6 +249,15 @@ class MetaInformation(object):
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
self.cover_data = mi.cover_data
my_comments = getattr(self, 'comments', '')
other_comments = getattr(mi, 'comments', '')
if not my_comments:
my_comments = ''
if not other_comments:
other_comments = ''
if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments
def format_series_index(self):
try:
x = float(self.series_index)

View File

@ -0,0 +1,153 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, sys, textwrap
from threading import Thread
from calibre import preferred_encoding
from calibre.utils.config import OptionParser
class FetchGoogle(Thread):
name = 'Google Books'
def __init__(self, title, author, publisher, isbn, verbose):
self.title = title
self.verbose = verbose
self.author = author
self.publisher = publisher
self.isbn = isbn
Thread.__init__(self, None)
self.daemon = True
self.exception, self.tb = None, None
def run(self):
from calibre.ebooks.metadata.google_books import search
try:
self.results = search(self.title, self.author, self.publisher,
self.isbn, max_results=10,
verbose=self.verbose)
except Exception, e:
self.results = []
self.exception = e
self.tb = traceback.format_exc()
class FetchISBNDB(Thread):
name = 'IsbnDB'
def __init__(self, title, author, publisher, isbn, verbose, key):
self.title = title
self.author = author
self.publisher = publisher
self.isbn = isbn
self.verbose = verbose
Thread.__init__(self, None)
self.daemon = True
self.exception, self.tb = None, None
self.key = key
def run(self):
from calibre.ebooks.metadata.isbndb import option_parser, create_books
args = ['isbndb']
if self.isbn:
args.extend(['--isbn', self.isbn])
else:
if self.title:
args.extend(['--title', self.title])
if self.author:
args.extend(['--author', self.author])
if self.publisher:
args.extend(['--publisher', self.publisher])
args.append(self.key)
try:
opts, args = option_parser().parse_args(args)
self.results = create_books(opts, args)
except Exception, e:
self.results = []
self.exception = e
self.tb = traceback.format_exc()
def result_index(source, result):
if not result.isbn:
return -1
for i, x in enumerate(source):
if x.isbn == result.isbn:
return i
return -1
def merge_results(one, two):
for x in two:
idx = result_index(one, x)
if idx < 0:
one.append(x)
else:
one[idx].smart_update(x)
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
verbose=0):
assert not(title is None and author is None and publisher is None and \
isbn is None)
fetchers = [FetchGoogle(title, author, publisher, isbn, verbose)]
if isbndb_key:
fetchers.append(FetchISBNDB(title, author, publisher, isbn, verbose,
isbndb_key))
for fetcher in fetchers:
fetcher.start()
for fetcher in fetchers:
fetcher.join()
for fetcher in fetchers[1:]:
merge_results(fetchers[0].results, fetcher.results)
results = sorted(fetchers[0].results, cmp=lambda x, y : cmp(
(x.comments.strip() if x.comments else ''),
(y.comments.strip() if y.comments else '')
), reverse=True)
return results, [(x.name, x.exception, x.tb) for x in fetchers]
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from online sources. You must specify at least one
of title, author, publisher or ISBN. If you specify ISBN, the others
are ignored.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-k', '--isbndb-key',
help=('The access key for your ISBNDB.com account. '
'Only needed if you want to search isbndb.com'))
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
results, exceptions = search(opts.title, opts.author, opts.publisher,
opts.isbn, opts.isbndb_key, opts.verbose)
for result in results:
print unicode(result).encode(preferred_encoding)
print
for name, exception, tb in exceptions:
if exception is not None:
print 'WARNING: Fetching from', name, 'failed with error:'
print exception
print tb
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -90,7 +90,7 @@ class ResultList(list):
try:
desc = description(entry)
if desc:
return desc[0].text
return 'SUMMARY:\n'+desc[0].text
except:
report(verbose)
@ -200,9 +200,6 @@ def search(title=None, author=None, publisher=None, isbn=None,
ans = ResultList()
ans.populate(entries, br, verbose)
ans.sort(cmp=lambda x, y:cmp(len(x.comments if x.comments else ''),
len(x.comments if x.comments else '')),
reverse=True)
return ans
def option_parser():

View File

@ -4,10 +4,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Interface to isbndb.com. My key HLLXQX2A.
'''
import sys, logging, re, socket
import sys, re, socket
from urllib import urlopen, quote
from calibre import setup_cli_handlers
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
@ -63,9 +62,10 @@ class ISBNDBMetadata(MetaInformation):
try:
self.author_sort = book.find('authors').find('person').string
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
if self.authors:
self.author_sort = authors_to_sort_string(self.authors)
pass
self.publisher = book.find('publishertext').string
summ = book.find('summary')
@ -118,19 +118,15 @@ key is the account key you generate after signing up for a free account from isb
return parser
def create_books(opts, args, logger=None, timeout=5.):
if logger is None:
level = logging.DEBUG if opts.verbose else logging.INFO
logger = logging.getLogger('isbndb')
setup_cli_handlers(logger, level)
def create_books(opts, args, timeout=5.):
base_url = BASE_URL%dict(key=args[1])
if opts.isbn is not None:
url = build_isbn(base_url, opts)
else:
url = build_combined(base_url, opts)
logger.info('ISBNDB query: '+url)
if opts.verbose:
print ('ISBNDB query: '+url)
return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
@ -139,7 +135,7 @@ def main(args=sys.argv):
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print('You must supply the isbndb.com key')
print ('You must supply the isbndb.com key')
return 1
for book in create_books(opts, args):

View File

@ -4,22 +4,74 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
GUI for fetching metadata from servers.
'''
import logging, cStringIO
import time
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, \
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, QThread, \
QAbstractTableModel, QCoreApplication, QTimer
from PyQt4.QtGui import QDialog, QItemSelectionModel
from PyQt4.QtGui import QDialog, QItemSelectionModel, QWidget, QLabel, QMovie
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
from calibre.gui2 import error_dialog, NONE, info_dialog
from calibre.ebooks.metadata.isbndb import create_books, option_parser, ISBNDBError
from calibre.gui2 import error_dialog, NONE, info_dialog, warning_dialog
from calibre.utils.config import prefs
class Fetcher(QThread):
def __init__(self, title, author, publisher, isbn, key):
QThread.__init__(self)
self.title = title
self.author = author
self.publisher = publisher
self.isbn = isbn
self.key = key
def run(self):
from calibre.ebooks.metadata.fetch import search
self.results, self.exceptions = search(self.title, self.author,
self.publisher, self.isbn,
self.key if self.key else None)
class ProgressIndicator(QWidget):
def __init__(self, *args):
QWidget.__init__(self, *args)
self.setGeometry(0, 0, 300, 350)
self.movie = QMovie(':/images/jobs-animated.mng')
self.ml = QLabel(self)
self.ml.setMovie(self.movie)
self.movie.start()
self.movie.setPaused(True)
self.status = QLabel(self)
self.status.setWordWrap(True)
self.status.setAlignment(Qt.AlignHCenter|Qt.AlignTop)
self.status.font().setBold(True)
self.status.font().setPointSize(self.font().pointSize()+6)
self.setVisible(False)
def start(self, msg=''):
view = self.parent()
pwidth, pheight = view.size().width(), view.size().height()
self.resize(pwidth, min(pheight, 250))
self.move(0, (pheight-self.size().height())/2.)
self.ml.resize(self.ml.sizeHint())
self.ml.move(int((self.size().width()-self.ml.size().width())/2.), 0)
self.status.resize(self.size().width(), self.size().height()-self.ml.size().height()-10)
self.status.move(0, self.ml.size().height()+10)
self.status.setText(msg)
self.setVisible(True)
self.movie.setPaused(False)
def stop(self):
if self.movie.state() == self.movie.Running:
self.movie.setPaused(True)
self.setVisible(False)
class Matches(QAbstractTableModel):
def __init__(self, matches):
self.matches = matches
self.matches.sort(cmp=lambda b, a: cmp(len(a.comments if a.comments else ''), len(b.comments if b.comments else '')))
self.matches.sort(cmp=lambda b, a: \
cmp(len(a.comments if a.comments else ''),
len(b.comments if b.comments else '')))
QAbstractTableModel.__init__(self)
def rowCount(self, *args):
@ -73,22 +125,23 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
Ui_FetchMetadata.__init__(self)
self.setupUi(self)
self.pi = ProgressIndicator(self)
self.timeout = timeout
QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata)
self.key.setText(prefs['isbndb_com_key'])
self.setWindowTitle(title if title else 'Unknown')
self.tlabel.setText(self.tlabel.text().arg(title if title else 'Unknown'))
self.setWindowTitle(title if title else _('Unknown'))
self.isbn = isbn
self.title = title
self.author = author.strip()
self.publisher = publisher
self.previous_row = None
self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen)
key = str(self.key.text())
if key:
QTimer.singleShot(100, self.fetch_metadata)
self.connect(self.matches, SIGNAL('entered(QModelIndex)'),
lambda index:self.matches.setCurrentIndex(index))
self.matches.setMouseTracking(True)
self.fetch_metadata()
def show_summary(self, current, previous):
@ -100,52 +153,57 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
def fetch_metadata(self):
key = str(self.key.text())
if not key:
error_dialog(self, _('Cannot connect'),
_('You must specify a valid access key for isbndb.com'))
return
else:
if key:
prefs['isbndb_com_key'] = key
args = ['isbndb']
else:
key = None
title = author = publisher = isbn = None
if self.isbn:
args.extend(('--isbn', self.isbn))
isbn = self.isbn
if self.title:
args.extend(('--title', self.title))
if self.author and not self.author == 'Unknown':
args.extend(('--author', self.author))
#if self.publisher:
# args.extend(('--publisher', self.publisher))
title = self.title
if self.author and not self.author == _('Unknown'):
author = self.author
self.fetch.setEnabled(False)
self.setCursor(Qt.WaitCursor)
QCoreApplication.instance().processEvents()
try:
args.append(key)
parser = option_parser()
opts, args = parser.parse_args(args)
self.logger = logging.getLogger('Job #'+str(id))
self.logger.setLevel(logging.DEBUG)
self.log_dest = cStringIO.StringIO()
handler = logging.StreamHandler(self.log_dest)
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))
self.logger.addHandler(handler)
self.fetcher = Fetcher(title, author, publisher, isbn, key)
self.fetcher.start()
self.pi.start(_('Finding metadata...'))
self._hangcheck = QTimer(self)
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
self.start_time = time.time()
self._hangcheck.start()
def hangcheck(self):
if not (self.fetcher.isFinished() or time.time() - self.start_time > 75):
return
self._hangcheck.stop()
try:
books = create_books(opts, args, self.logger, self.timeout)
except ISBNDBError, err:
error_dialog(self, _('Error fetching metadata'), str(err)).exec_()
if self.fetcher.isRunning():
error_dialog(self, _('Could not find metadata'),
_('The metadata download seems to have stalled. '
'Try again later.')).exec_()
self.fetcher.terminate()
return
self.model = Matches(self.fetcher.results)
warnings = [(x[0], unicode(x[1])) for x in \
self.fetcher.exceptions if x[1] is not None]
if warnings:
warnings='<br>'.join(['<b>%s</b>: %s'%(name, exc) for name,exc in warnings])
warning_dialog(self, _('Warning'),
'<p>'+_('Could not fetch metadata from:')+\
'<br><br>'+warnings+'</p>').exec_()
if self.model.rowCount() < 1:
info_dialog(self, _('No metadata found'),
_('No metadata found, try adjusting the title and author '
'or the ISBN key.')).exec_()
self.reject()
return
self.model = Matches(books)
if self.model.rowCount() < 1:
info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author or the ISBN key.')).exec_()
self.reject()
self.matches.setModel(self.model)
QObject.connect(self.matches.selectionModel(), SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
QObject.connect(self.matches.selectionModel(),
SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
self.show_summary)
self.model.reset()
self.matches.selectionModel().select(self.model.index(0, 0),
@ -155,7 +213,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
self.fetch.setEnabled(True)
self.unsetCursor()
self.matches.resizeColumnsToContents()
self.pi.stop()
def selected_book(self):

View File

@ -23,20 +23,13 @@
<item>
<widget class="QLabel" name="tlabel" >
<property name="text" >
<string>Fetching metadata for &lt;b>%1&lt;/b></string>
<string>&lt;p>calibre can find metadata for your books from two locations: &lt;b>Google Books&lt;/b> and &lt;b>isbndb.com&lt;/b>. &lt;p>To use isbndb.com you must sign up for a &lt;a href="http://www.isbndb.com">free account&lt;/a> and exter you access key below.</string>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="label" >
<property name="text" >
<string>Sign up for a free account from &lt;a href="http://www.isbndb.com">ISBNdb.com&lt;/a> to get an access key.</string>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
<property name="wordWrap" >
<bool>true</bool>
</property>
<property name="openExternalLinks" >
<bool>true</bool>

View File

@ -219,6 +219,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
return QVariant(category + ' [%d]'%num)
elif role == Qt.FontRole:
return self.bold_font
elif role == Qt.ForegroundRole and category == _('Scheduled'):
return QVariant(QColor(0, 255, 0))
return NONE
def update_recipe_schedule(self, recipe):

View File

@ -823,7 +823,7 @@ class DeviceBooksModel(BooksModel):
def search(self, text, refinement, reset=True):
if not text:
if not text or not text.strip():
self.map = list(range(len(self.db)))
else:
matches = self.search_engine.parse(text)

View File

@ -90,7 +90,6 @@ class ProgressIndicator(QWidget):
def stop(self):
if self.movie.state() == self.movie.Running:
#self.movie.jumpToFrame(0)
self.movie.setPaused(True)
self.setVisible(False)

View File

@ -53,7 +53,6 @@ entry_points = {
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
@ -69,7 +68,7 @@ entry_points = {
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'google-books = calibre.ebooks.metadata.google_books:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
@ -196,9 +195,9 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lit.from_any import option_parser as any2lit
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
from calibre.gui2.main import option_parser as guiop
from calibre.ebooks.metadata.google_books import option_parser as gbop
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
f = open_file('/etc/bash_completion.d/libprs500')
@ -246,7 +245,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
f.write(opts_and_words('google-books', gbop, []))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))