calibre now searches the Google Books database ina ddition to isbndb.com for book metadata

This commit is contained in:
Kovid Goyal 2009-03-19 19:02:04 -07:00
parent 7a277a43c5
commit ed0f570ffb
10 changed files with 290 additions and 84 deletions

View File

@ -233,7 +233,7 @@ class MetaInformation(object):
if mi.authors and mi.authors[0] != _('Unknown'): if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'comments', 'category', for attr in ('author_sort', 'title_sort', 'category',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer', 'cover', 'language', 'guide', 'book_producer',
@ -249,6 +249,15 @@ class MetaInformation(object):
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None: if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
self.cover_data = mi.cover_data self.cover_data = mi.cover_data
my_comments = getattr(self, 'comments', '')
other_comments = getattr(mi, 'comments', '')
if not my_comments:
my_comments = ''
if not other_comments:
other_comments = ''
if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments
def format_series_index(self): def format_series_index(self):
try: try:
x = float(self.series_index) x = float(self.series_index)

View File

@ -0,0 +1,153 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, sys, textwrap
from threading import Thread
from calibre import preferred_encoding
from calibre.utils.config import OptionParser
class FetchGoogle(Thread):
name = 'Google Books'
def __init__(self, title, author, publisher, isbn, verbose):
self.title = title
self.verbose = verbose
self.author = author
self.publisher = publisher
self.isbn = isbn
Thread.__init__(self, None)
self.daemon = True
self.exception, self.tb = None, None
def run(self):
from calibre.ebooks.metadata.google_books import search
try:
self.results = search(self.title, self.author, self.publisher,
self.isbn, max_results=10,
verbose=self.verbose)
except Exception, e:
self.results = []
self.exception = e
self.tb = traceback.format_exc()
class FetchISBNDB(Thread):
name = 'IsbnDB'
def __init__(self, title, author, publisher, isbn, verbose, key):
self.title = title
self.author = author
self.publisher = publisher
self.isbn = isbn
self.verbose = verbose
Thread.__init__(self, None)
self.daemon = True
self.exception, self.tb = None, None
self.key = key
def run(self):
from calibre.ebooks.metadata.isbndb import option_parser, create_books
args = ['isbndb']
if self.isbn:
args.extend(['--isbn', self.isbn])
else:
if self.title:
args.extend(['--title', self.title])
if self.author:
args.extend(['--author', self.author])
if self.publisher:
args.extend(['--publisher', self.publisher])
args.append(self.key)
try:
opts, args = option_parser().parse_args(args)
self.results = create_books(opts, args)
except Exception, e:
self.results = []
self.exception = e
self.tb = traceback.format_exc()
def result_index(source, result):
if not result.isbn:
return -1
for i, x in enumerate(source):
if x.isbn == result.isbn:
return i
return -1
def merge_results(one, two):
for x in two:
idx = result_index(one, x)
if idx < 0:
one.append(x)
else:
one[idx].smart_update(x)
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
verbose=0):
assert not(title is None and author is None and publisher is None and \
isbn is None)
fetchers = [FetchGoogle(title, author, publisher, isbn, verbose)]
if isbndb_key:
fetchers.append(FetchISBNDB(title, author, publisher, isbn, verbose,
isbndb_key))
for fetcher in fetchers:
fetcher.start()
for fetcher in fetchers:
fetcher.join()
for fetcher in fetchers[1:]:
merge_results(fetchers[0].results, fetcher.results)
results = sorted(fetchers[0].results, cmp=lambda x, y : cmp(
(x.comments.strip() if x.comments else ''),
(y.comments.strip() if y.comments else '')
), reverse=True)
return results, [(x.name, x.exception, x.tb) for x in fetchers]
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from online sources. You must specify at least one
of title, author, publisher or ISBN. If you specify ISBN, the others
are ignored.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-k', '--isbndb-key',
help=('The access key for your ISBNDB.com account. '
'Only needed if you want to search isbndb.com'))
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
results, exceptions = search(opts.title, opts.author, opts.publisher,
opts.isbn, opts.isbndb_key, opts.verbose)
for result in results:
print unicode(result).encode(preferred_encoding)
print
for name, exception, tb in exceptions:
if exception is not None:
print 'WARNING: Fetching from', name, 'failed with error:'
print exception
print tb
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -90,7 +90,7 @@ class ResultList(list):
try: try:
desc = description(entry) desc = description(entry)
if desc: if desc:
return desc[0].text return 'SUMMARY:\n'+desc[0].text
except: except:
report(verbose) report(verbose)
@ -200,9 +200,6 @@ def search(title=None, author=None, publisher=None, isbn=None,
ans = ResultList() ans = ResultList()
ans.populate(entries, br, verbose) ans.populate(entries, br, verbose)
ans.sort(cmp=lambda x, y:cmp(len(x.comments if x.comments else ''),
len(x.comments if x.comments else '')),
reverse=True)
return ans return ans
def option_parser(): def option_parser():

View File

@ -4,10 +4,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Interface to isbndb.com. My key HLLXQX2A. Interface to isbndb.com. My key HLLXQX2A.
''' '''
import sys, logging, re, socket import sys, re, socket
from urllib import urlopen, quote from urllib import urlopen, quote
from calibre import setup_cli_handlers
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
@ -63,9 +62,10 @@ class ISBNDBMetadata(MetaInformation):
try: try:
self.author_sort = book.find('authors').find('person').string self.author_sort = book.find('authors').find('person').string
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except: except:
if self.authors: pass
self.author_sort = authors_to_sort_string(self.authors)
self.publisher = book.find('publishertext').string self.publisher = book.find('publishertext').string
summ = book.find('summary') summ = book.find('summary')
@ -118,19 +118,15 @@ key is the account key you generate after signing up for a free account from isb
return parser return parser
def create_books(opts, args, logger=None, timeout=5.): def create_books(opts, args, timeout=5.):
if logger is None:
level = logging.DEBUG if opts.verbose else logging.INFO
logger = logging.getLogger('isbndb')
setup_cli_handlers(logger, level)
base_url = BASE_URL%dict(key=args[1]) base_url = BASE_URL%dict(key=args[1])
if opts.isbn is not None: if opts.isbn is not None:
url = build_isbn(base_url, opts) url = build_isbn(base_url, opts)
else: else:
url = build_combined(base_url, opts) url = build_combined(base_url, opts)
logger.info('ISBNDB query: '+url) if opts.verbose:
print ('ISBNDB query: '+url)
return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)] return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]

View File

@ -4,22 +4,74 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
GUI for fetching metadata from servers. GUI for fetching metadata from servers.
''' '''
import logging, cStringIO import time
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, \ from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, QThread, \
QAbstractTableModel, QCoreApplication, QTimer QAbstractTableModel, QCoreApplication, QTimer
from PyQt4.QtGui import QDialog, QItemSelectionModel from PyQt4.QtGui import QDialog, QItemSelectionModel, QWidget, QLabel, QMovie
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
from calibre.gui2 import error_dialog, NONE, info_dialog from calibre.gui2 import error_dialog, NONE, info_dialog, warning_dialog
from calibre.ebooks.metadata.isbndb import create_books, option_parser, ISBNDBError
from calibre.utils.config import prefs from calibre.utils.config import prefs
class Fetcher(QThread):
def __init__(self, title, author, publisher, isbn, key):
QThread.__init__(self)
self.title = title
self.author = author
self.publisher = publisher
self.isbn = isbn
self.key = key
def run(self):
from calibre.ebooks.metadata.fetch import search
self.results, self.exceptions = search(self.title, self.author,
self.publisher, self.isbn,
self.key if self.key else None)
class ProgressIndicator(QWidget):
def __init__(self, *args):
QWidget.__init__(self, *args)
self.setGeometry(0, 0, 300, 350)
self.movie = QMovie(':/images/jobs-animated.mng')
self.ml = QLabel(self)
self.ml.setMovie(self.movie)
self.movie.start()
self.movie.setPaused(True)
self.status = QLabel(self)
self.status.setWordWrap(True)
self.status.setAlignment(Qt.AlignHCenter|Qt.AlignTop)
self.status.font().setBold(True)
self.status.font().setPointSize(self.font().pointSize()+6)
self.setVisible(False)
def start(self, msg=''):
view = self.parent()
pwidth, pheight = view.size().width(), view.size().height()
self.resize(pwidth, min(pheight, 250))
self.move(0, (pheight-self.size().height())/2.)
self.ml.resize(self.ml.sizeHint())
self.ml.move(int((self.size().width()-self.ml.size().width())/2.), 0)
self.status.resize(self.size().width(), self.size().height()-self.ml.size().height()-10)
self.status.move(0, self.ml.size().height()+10)
self.status.setText(msg)
self.setVisible(True)
self.movie.setPaused(False)
def stop(self):
if self.movie.state() == self.movie.Running:
self.movie.setPaused(True)
self.setVisible(False)
class Matches(QAbstractTableModel): class Matches(QAbstractTableModel):
def __init__(self, matches): def __init__(self, matches):
self.matches = matches self.matches = matches
self.matches.sort(cmp=lambda b, a: cmp(len(a.comments if a.comments else ''), len(b.comments if b.comments else ''))) self.matches.sort(cmp=lambda b, a: \
cmp(len(a.comments if a.comments else ''),
len(b.comments if b.comments else '')))
QAbstractTableModel.__init__(self) QAbstractTableModel.__init__(self)
def rowCount(self, *args): def rowCount(self, *args):
@ -73,22 +125,23 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
Ui_FetchMetadata.__init__(self) Ui_FetchMetadata.__init__(self)
self.setupUi(self) self.setupUi(self)
self.pi = ProgressIndicator(self)
self.timeout = timeout self.timeout = timeout
QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata) QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata)
self.key.setText(prefs['isbndb_com_key']) self.key.setText(prefs['isbndb_com_key'])
self.setWindowTitle(title if title else 'Unknown') self.setWindowTitle(title if title else _('Unknown'))
self.tlabel.setText(self.tlabel.text().arg(title if title else 'Unknown'))
self.isbn = isbn self.isbn = isbn
self.title = title self.title = title
self.author = author.strip() self.author = author.strip()
self.publisher = publisher self.publisher = publisher
self.previous_row = None self.previous_row = None
self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen) self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen)
key = str(self.key.text()) self.connect(self.matches, SIGNAL('entered(QModelIndex)'),
if key: lambda index:self.matches.setCurrentIndex(index))
QTimer.singleShot(100, self.fetch_metadata) self.matches.setMouseTracking(True)
self.fetch_metadata()
def show_summary(self, current, previous): def show_summary(self, current, previous):
@ -100,52 +153,57 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
def fetch_metadata(self): def fetch_metadata(self):
key = str(self.key.text()) key = str(self.key.text())
if not key: if key:
error_dialog(self, _('Cannot connect'),
_('You must specify a valid access key for isbndb.com'))
return
else:
prefs['isbndb_com_key'] = key prefs['isbndb_com_key'] = key
else:
args = ['isbndb'] key = None
title = author = publisher = isbn = None
if self.isbn: if self.isbn:
args.extend(('--isbn', self.isbn)) isbn = self.isbn
if self.title: if self.title:
args.extend(('--title', self.title)) title = self.title
if self.author and not self.author == 'Unknown': if self.author and not self.author == _('Unknown'):
args.extend(('--author', self.author)) author = self.author
#if self.publisher:
# args.extend(('--publisher', self.publisher))
self.fetch.setEnabled(False) self.fetch.setEnabled(False)
self.setCursor(Qt.WaitCursor) self.setCursor(Qt.WaitCursor)
QCoreApplication.instance().processEvents() QCoreApplication.instance().processEvents()
try: self.fetcher = Fetcher(title, author, publisher, isbn, key)
args.append(key) self.fetcher.start()
parser = option_parser() self.pi.start(_('Finding metadata...'))
opts, args = parser.parse_args(args) self._hangcheck = QTimer(self)
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
self.logger = logging.getLogger('Job #'+str(id)) self.start_time = time.time()
self.logger.setLevel(logging.DEBUG) self._hangcheck.start()
self.log_dest = cStringIO.StringIO()
handler = logging.StreamHandler(self.log_dest)
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))
self.logger.addHandler(handler)
def hangcheck(self):
if not (self.fetcher.isFinished() or time.time() - self.start_time > 75):
return
self._hangcheck.stop()
try: try:
books = create_books(opts, args, self.logger, self.timeout) if self.fetcher.isRunning():
except ISBNDBError, err: error_dialog(self, _('Could not find metadata'),
error_dialog(self, _('Error fetching metadata'), str(err)).exec_() _('The metadata download seems to have stalled. '
'Try again later.')).exec_()
self.fetcher.terminate()
return
self.model = Matches(self.fetcher.results)
warnings = [(x[0], unicode(x[1])) for x in \
self.fetcher.exceptions if x[1] is not None]
if warnings:
warnings='<br>'.join(['<b>%s</b>: %s'%(name, exc) for name,exc in warnings])
warning_dialog(self, _('Warning'),
'<p>'+_('Could not fetch metadata from:')+\
'<br><br>'+warnings+'</p>').exec_()
if self.model.rowCount() < 1:
info_dialog(self, _('No metadata found'),
_('No metadata found, try adjusting the title and author '
'or the ISBN key.')).exec_()
self.reject()
return return
self.model = Matches(books)
if self.model.rowCount() < 1:
info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author or the ISBN key.')).exec_()
self.reject()
self.matches.setModel(self.model) self.matches.setModel(self.model)
QObject.connect(self.matches.selectionModel(), SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'), QObject.connect(self.matches.selectionModel(),
SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
self.show_summary) self.show_summary)
self.model.reset() self.model.reset()
self.matches.selectionModel().select(self.model.index(0, 0), self.matches.selectionModel().select(self.model.index(0, 0),
@ -155,7 +213,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
self.fetch.setEnabled(True) self.fetch.setEnabled(True)
self.unsetCursor() self.unsetCursor()
self.matches.resizeColumnsToContents() self.matches.resizeColumnsToContents()
self.pi.stop()
def selected_book(self): def selected_book(self):

View File

@ -23,20 +23,13 @@
<item> <item>
<widget class="QLabel" name="tlabel" > <widget class="QLabel" name="tlabel" >
<property name="text" > <property name="text" >
<string>Fetching metadata for &lt;b>%1&lt;/b></string> <string>&lt;p>calibre can find metadata for your books from two locations: &lt;b>Google Books&lt;/b> and &lt;b>isbndb.com&lt;/b>. &lt;p>To use isbndb.com you must sign up for a &lt;a href="http://www.isbndb.com">free account&lt;/a> and exter you access key below.</string>
</property> </property>
<property name="alignment" > <property name="alignment" >
<set>Qt::AlignCenter</set> <set>Qt::AlignCenter</set>
</property> </property>
</widget> <property name="wordWrap" >
</item> <bool>true</bool>
<item>
<widget class="QLabel" name="label" >
<property name="text" >
<string>Sign up for a free account from &lt;a href="http://www.isbndb.com">ISBNdb.com&lt;/a> to get an access key.</string>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property> </property>
<property name="openExternalLinks" > <property name="openExternalLinks" >
<bool>true</bool> <bool>true</bool>

View File

@ -219,6 +219,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
return QVariant(category + ' [%d]'%num) return QVariant(category + ' [%d]'%num)
elif role == Qt.FontRole: elif role == Qt.FontRole:
return self.bold_font return self.bold_font
elif role == Qt.ForegroundRole and category == _('Scheduled'):
return QVariant(QColor(0, 255, 0))
return NONE return NONE
def update_recipe_schedule(self, recipe): def update_recipe_schedule(self, recipe):

View File

@ -823,7 +823,7 @@ class DeviceBooksModel(BooksModel):
def search(self, text, refinement, reset=True): def search(self, text, refinement, reset=True):
if not text: if not text or not text.strip():
self.map = list(range(len(self.db))) self.map = list(range(len(self.db)))
else: else:
matches = self.search_engine.parse(text) matches = self.search_engine.parse(text)

View File

@ -90,7 +90,6 @@ class ProgressIndicator(QWidget):
def stop(self): def stop(self):
if self.movie.state() == self.movie.Running: if self.movie.state() == self.movie.Running:
#self.movie.jumpToFrame(0)
self.movie.setPaused(True) self.movie.setPaused(True)
self.setVisible(False) self.setVisible(False)

View File

@ -53,7 +53,6 @@ entry_points = {
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main', 'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main', 'oeb2mobi = calibre.ebooks.mobi.writer:main',
@ -69,7 +68,7 @@ entry_points = {
'calibre-parallel = calibre.parallel:main', 'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main', 'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' , 'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'google-books = calibre.ebooks.metadata.google_books:main', 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
], ],
'gui_scripts' : [ 'gui_scripts' : [
__appname__+' = calibre.gui2.main:main', __appname__+' = calibre.gui2.main:main',
@ -196,9 +195,9 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lit.from_any import option_parser as any2lit from calibre.ebooks.lit.from_any import option_parser as any2lit
from calibre.ebooks.epub.from_comic import option_parser as comic2epub from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.mobi.from_any import option_parser as any2mobi from calibre.ebooks.mobi.from_any import option_parser as any2mobi
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
from calibre.gui2.main import option_parser as guiop from calibre.gui2.main import option_parser as guiop
from calibre.ebooks.metadata.google_books import option_parser as gbop
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
f = open_file('/etc/bash_completion.d/libprs500') f = open_file('/etc/bash_completion.d/libprs500')
@ -246,7 +245,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles)) f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles)) f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
f.write(opts_and_words('google-books', gbop, [])) f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf'])) f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml'])) f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt'])) f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))