From ed0f570ffb982cc8b5bf4da0679b6eaf57014507 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 19 Mar 2009 19:02:04 -0700 Subject: [PATCH] calibre now searches the Google Books database ina ddition to isbndb.com for book metadata --- src/calibre/ebooks/metadata/__init__.py | 11 +- src/calibre/ebooks/metadata/fetch.py | 153 +++++++++++++++++++ src/calibre/ebooks/metadata/google_books.py | 5 +- src/calibre/ebooks/metadata/isbndb.py | 22 ++- src/calibre/gui2/dialogs/fetch_metadata.py | 158 +++++++++++++------- src/calibre/gui2/dialogs/fetch_metadata.ui | 13 +- src/calibre/gui2/dialogs/scheduler.py | 2 + src/calibre/gui2/library.py | 2 +- src/calibre/gui2/viewer/main.py | 1 - src/calibre/linux.py | 7 +- 10 files changed, 290 insertions(+), 84 deletions(-) create mode 100644 src/calibre/ebooks/metadata/fetch.py diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index d9b0514362..20a0d9c608 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -233,7 +233,7 @@ class MetaInformation(object): if mi.authors and mi.authors[0] != _('Unknown'): self.authors = mi.authors - for attr in ('author_sort', 'title_sort', 'comments', 'category', + for attr in ('author_sort', 'title_sort', 'category', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover', 'language', 'guide', 'book_producer', @@ -249,6 +249,15 @@ class MetaInformation(object): if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None: self.cover_data = mi.cover_data + my_comments = getattr(self, 'comments', '') + other_comments = getattr(mi, 'comments', '') + if not my_comments: + my_comments = '' + if not other_comments: + other_comments = '' + if len(other_comments.strip()) > len(my_comments.strip()): + self.comments = other_comments + def format_series_index(self): try: x = float(self.series_index) diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py new file mode 100644 index 0000000000..682291ae48 --- /dev/null +++ b/src/calibre/ebooks/metadata/fetch.py @@ -0,0 +1,153 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import traceback, sys, textwrap +from threading import Thread + +from calibre import preferred_encoding +from calibre.utils.config import OptionParser + +class FetchGoogle(Thread): + name = 'Google Books' + + def __init__(self, title, author, publisher, isbn, verbose): + self.title = title + self.verbose = verbose + self.author = author + self.publisher = publisher + self.isbn = isbn + Thread.__init__(self, None) + self.daemon = True + self.exception, self.tb = None, None + + def run(self): + from calibre.ebooks.metadata.google_books import search + try: + self.results = search(self.title, self.author, self.publisher, + self.isbn, max_results=10, + verbose=self.verbose) + except Exception, e: + self.results = [] + self.exception = e + self.tb = traceback.format_exc() + + +class FetchISBNDB(Thread): + name = 'IsbnDB' + def __init__(self, title, author, publisher, isbn, verbose, key): + self.title = title + self.author = author + self.publisher = publisher + self.isbn = isbn + self.verbose = verbose + Thread.__init__(self, None) + self.daemon = True + self.exception, self.tb = None, None + self.key = key + + def run(self): + from calibre.ebooks.metadata.isbndb import option_parser, create_books + args = ['isbndb'] + if self.isbn: + args.extend(['--isbn', self.isbn]) + else: + if self.title: + args.extend(['--title', self.title]) + if self.author: + args.extend(['--author', self.author]) + if self.publisher: + args.extend(['--publisher', self.publisher]) + args.append(self.key) + try: + opts, args = option_parser().parse_args(args) + self.results = create_books(opts, args) + except Exception, e: + self.results = [] + self.exception = e + self.tb = traceback.format_exc() + +def result_index(source, result): + if not result.isbn: + return -1 + for i, x in enumerate(source): + if x.isbn == result.isbn: + return i + return -1 + +def merge_results(one, two): + for x in two: + idx = result_index(one, x) + if idx < 0: + one.append(x) + else: + one[idx].smart_update(x) + +def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None, + verbose=0): + assert not(title is None and author is None and publisher is None and \ + isbn is None) + fetchers = [FetchGoogle(title, author, publisher, isbn, verbose)] + if isbndb_key: + fetchers.append(FetchISBNDB(title, author, publisher, isbn, verbose, + isbndb_key)) + + + for fetcher in fetchers: + fetcher.start() + for fetcher in fetchers: + fetcher.join() + for fetcher in fetchers[1:]: + merge_results(fetchers[0].results, fetcher.results) + + results = sorted(fetchers[0].results, cmp=lambda x, y : cmp( + (x.comments.strip() if x.comments else ''), + (y.comments.strip() if y.comments else '') + ), reverse=True) + + return results, [(x.name, x.exception, x.tb) for x in fetchers] + + +def option_parser(): + parser = OptionParser(textwrap.dedent( + '''\ + %prog [options] + + Fetch book metadata from online sources. You must specify at least one + of title, author, publisher or ISBN. If you specify ISBN, the others + are ignored. + ''' + )) + parser.add_option('-t', '--title', help='Book title') + parser.add_option('-a', '--author', help='Book author(s)') + parser.add_option('-p', '--publisher', help='Book publisher') + parser.add_option('-i', '--isbn', help='Book ISBN') + parser.add_option('-m', '--max-results', default=10, + help='Maximum number of results to fetch') + parser.add_option('-k', '--isbndb-key', + help=('The access key for your ISBNDB.com account. ' + 'Only needed if you want to search isbndb.com')) + parser.add_option('-v', '--verbose', default=0, action='count', + help='Be more verbose about errors') + return parser + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + results, exceptions = search(opts.title, opts.author, opts.publisher, + opts.isbn, opts.isbndb_key, opts.verbose) + for result in results: + print unicode(result).encode(preferred_encoding) + print + + for name, exception, tb in exceptions: + if exception is not None: + print 'WARNING: Fetching from', name, 'failed with error:' + print exception + print tb + + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py index 261d559127..176692f66c 100644 --- a/src/calibre/ebooks/metadata/google_books.py +++ b/src/calibre/ebooks/metadata/google_books.py @@ -90,7 +90,7 @@ class ResultList(list): try: desc = description(entry) if desc: - return desc[0].text + return 'SUMMARY:\n'+desc[0].text except: report(verbose) @@ -200,9 +200,6 @@ def search(title=None, author=None, publisher=None, isbn=None, ans = ResultList() ans.populate(entries, br, verbose) - ans.sort(cmp=lambda x, y:cmp(len(x.comments if x.comments else ''), - len(x.comments if x.comments else '')), - reverse=True) return ans def option_parser(): diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 487a52335b..da4dcc6253 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -4,10 +4,9 @@ __copyright__ = '2008, Kovid Goyal ' Interface to isbndb.com. My key HLLXQX2A. ''' -import sys, logging, re, socket +import sys, re, socket from urllib import urlopen, quote -from calibre import setup_cli_handlers from calibre.utils.config import OptionParser from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup @@ -63,9 +62,10 @@ class ISBNDBMetadata(MetaInformation): try: self.author_sort = book.find('authors').find('person').string + if self.authors and self.author_sort == self.authors[0]: + self.author_sort = None except: - if self.authors: - self.author_sort = authors_to_sort_string(self.authors) + pass self.publisher = book.find('publishertext').string summ = book.find('summary') @@ -118,19 +118,15 @@ key is the account key you generate after signing up for a free account from isb return parser -def create_books(opts, args, logger=None, timeout=5.): - if logger is None: - level = logging.DEBUG if opts.verbose else logging.INFO - logger = logging.getLogger('isbndb') - setup_cli_handlers(logger, level) - +def create_books(opts, args, timeout=5.): base_url = BASE_URL%dict(key=args[1]) if opts.isbn is not None: url = build_isbn(base_url, opts) else: url = build_combined(base_url, opts) - - logger.info('ISBNDB query: '+url) + + if opts.verbose: + print ('ISBNDB query: '+url) return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)] @@ -139,7 +135,7 @@ def main(args=sys.argv): opts, args = parser.parse_args(args) if len(args) != 2: parser.print_help() - print('You must supply the isbndb.com key') + print ('You must supply the isbndb.com key') return 1 for book in create_books(opts, args): diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py index 76a5979f9e..99d454fa7e 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.py +++ b/src/calibre/gui2/dialogs/fetch_metadata.py @@ -4,22 +4,74 @@ __copyright__ = '2008, Kovid Goyal ' GUI for fetching metadata from servers. ''' -import logging, cStringIO +import time -from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, \ +from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, QThread, \ QAbstractTableModel, QCoreApplication, QTimer -from PyQt4.QtGui import QDialog, QItemSelectionModel +from PyQt4.QtGui import QDialog, QItemSelectionModel, QWidget, QLabel, QMovie from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata -from calibre.gui2 import error_dialog, NONE, info_dialog -from calibre.ebooks.metadata.isbndb import create_books, option_parser, ISBNDBError +from calibre.gui2 import error_dialog, NONE, info_dialog, warning_dialog from calibre.utils.config import prefs +class Fetcher(QThread): + + def __init__(self, title, author, publisher, isbn, key): + QThread.__init__(self) + self.title = title + self.author = author + self.publisher = publisher + self.isbn = isbn + self.key = key + + def run(self): + from calibre.ebooks.metadata.fetch import search + self.results, self.exceptions = search(self.title, self.author, + self.publisher, self.isbn, + self.key if self.key else None) + +class ProgressIndicator(QWidget): + + def __init__(self, *args): + QWidget.__init__(self, *args) + self.setGeometry(0, 0, 300, 350) + self.movie = QMovie(':/images/jobs-animated.mng') + self.ml = QLabel(self) + self.ml.setMovie(self.movie) + self.movie.start() + self.movie.setPaused(True) + self.status = QLabel(self) + self.status.setWordWrap(True) + self.status.setAlignment(Qt.AlignHCenter|Qt.AlignTop) + self.status.font().setBold(True) + self.status.font().setPointSize(self.font().pointSize()+6) + self.setVisible(False) + + def start(self, msg=''): + view = self.parent() + pwidth, pheight = view.size().width(), view.size().height() + self.resize(pwidth, min(pheight, 250)) + self.move(0, (pheight-self.size().height())/2.) + self.ml.resize(self.ml.sizeHint()) + self.ml.move(int((self.size().width()-self.ml.size().width())/2.), 0) + self.status.resize(self.size().width(), self.size().height()-self.ml.size().height()-10) + self.status.move(0, self.ml.size().height()+10) + self.status.setText(msg) + self.setVisible(True) + self.movie.setPaused(False) + + def stop(self): + if self.movie.state() == self.movie.Running: + self.movie.setPaused(True) + self.setVisible(False) + class Matches(QAbstractTableModel): def __init__(self, matches): self.matches = matches - self.matches.sort(cmp=lambda b, a: cmp(len(a.comments if a.comments else ''), len(b.comments if b.comments else ''))) + self.matches.sort(cmp=lambda b, a: \ + cmp(len(a.comments if a.comments else ''), + len(b.comments if b.comments else ''))) QAbstractTableModel.__init__(self) def rowCount(self, *args): @@ -73,22 +125,23 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): Ui_FetchMetadata.__init__(self) self.setupUi(self) + self.pi = ProgressIndicator(self) self.timeout = timeout QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata) self.key.setText(prefs['isbndb_com_key']) - self.setWindowTitle(title if title else 'Unknown') - self.tlabel.setText(self.tlabel.text().arg(title if title else 'Unknown')) + self.setWindowTitle(title if title else _('Unknown')) self.isbn = isbn self.title = title self.author = author.strip() self.publisher = publisher self.previous_row = None self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen) - key = str(self.key.text()) - if key: - QTimer.singleShot(100, self.fetch_metadata) + self.connect(self.matches, SIGNAL('entered(QModelIndex)'), + lambda index:self.matches.setCurrentIndex(index)) + self.matches.setMouseTracking(True) + self.fetch_metadata() def show_summary(self, current, previous): @@ -100,53 +153,58 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): def fetch_metadata(self): key = str(self.key.text()) - if not key: - error_dialog(self, _('Cannot connect'), - _('You must specify a valid access key for isbndb.com')) - return - else: + if key: prefs['isbndb_com_key'] = key - - args = ['isbndb'] + else: + key = None + title = author = publisher = isbn = None if self.isbn: - args.extend(('--isbn', self.isbn)) + isbn = self.isbn if self.title: - args.extend(('--title', self.title)) - if self.author and not self.author == 'Unknown': - args.extend(('--author', self.author)) - #if self.publisher: - # args.extend(('--publisher', self.publisher)) - + title = self.title + if self.author and not self.author == _('Unknown'): + author = self.author self.fetch.setEnabled(False) self.setCursor(Qt.WaitCursor) QCoreApplication.instance().processEvents() + self.fetcher = Fetcher(title, author, publisher, isbn, key) + self.fetcher.start() + self.pi.start(_('Finding metadata...')) + self._hangcheck = QTimer(self) + self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck) + self.start_time = time.time() + self._hangcheck.start() + + def hangcheck(self): + if not (self.fetcher.isFinished() or time.time() - self.start_time > 75): + return + self._hangcheck.stop() try: - args.append(key) - parser = option_parser() - opts, args = parser.parse_args(args) - - self.logger = logging.getLogger('Job #'+str(id)) - self.logger.setLevel(logging.DEBUG) - self.log_dest = cStringIO.StringIO() - handler = logging.StreamHandler(self.log_dest) - handler.setLevel(logging.DEBUG) - handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s')) - self.logger.addHandler(handler) - - try: - books = create_books(opts, args, self.logger, self.timeout) - except ISBNDBError, err: - error_dialog(self, _('Error fetching metadata'), str(err)).exec_() + if self.fetcher.isRunning(): + error_dialog(self, _('Could not find metadata'), + _('The metadata download seems to have stalled. ' + 'Try again later.')).exec_() + self.fetcher.terminate() + return + self.model = Matches(self.fetcher.results) + warnings = [(x[0], unicode(x[1])) for x in \ + self.fetcher.exceptions if x[1] is not None] + if warnings: + warnings='
'.join(['%s: %s'%(name, exc) for name,exc in warnings]) + warning_dialog(self, _('Warning'), + '

'+_('Could not fetch metadata from:')+\ + '

'+warnings+'

').exec_() + if self.model.rowCount() < 1: + info_dialog(self, _('No metadata found'), + _('No metadata found, try adjusting the title and author ' + 'or the ISBN key.')).exec_() + self.reject() return - self.model = Matches(books) - if self.model.rowCount() < 1: - info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author or the ISBN key.')).exec_() - self.reject() - self.matches.setModel(self.model) - QObject.connect(self.matches.selectionModel(), SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'), - self.show_summary) + QObject.connect(self.matches.selectionModel(), + SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'), + self.show_summary) self.model.reset() self.matches.selectionModel().select(self.model.index(0, 0), QItemSelectionModel.Select | QItemSelectionModel.Rows) @@ -155,9 +213,9 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): self.fetch.setEnabled(True) self.unsetCursor() self.matches.resizeColumnsToContents() + self.pi.stop() + - - def selected_book(self): try: return self.matches.model().matches[self.matches.currentIndex().row()] diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui index 8e5747778f..9adc1fa5e5 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.ui +++ b/src/calibre/gui2/dialogs/fetch_metadata.ui @@ -23,20 +23,13 @@ - Fetching metadata for <b>%1</b> + <p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and exter you access key below. Qt::AlignCenter - - - - - - Sign up for a free account from <a href="http://www.isbndb.com">ISBNdb.com</a> to get an access key. - - - Qt::AlignCenter + + true true diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py index 2e4a296407..ea30f0e97b 100644 --- a/src/calibre/gui2/dialogs/scheduler.py +++ b/src/calibre/gui2/dialogs/scheduler.py @@ -219,6 +219,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser): return QVariant(category + ' [%d]'%num) elif role == Qt.FontRole: return self.bold_font + elif role == Qt.ForegroundRole and category == _('Scheduled'): + return QVariant(QColor(0, 255, 0)) return NONE def update_recipe_schedule(self, recipe): diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index 77606b19ef..f12f560af5 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -823,7 +823,7 @@ class DeviceBooksModel(BooksModel): def search(self, text, refinement, reset=True): - if not text: + if not text or not text.strip(): self.map = list(range(len(self.db))) else: matches = self.search_engine.parse(text) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 14cfbc80a1..5360cfd453 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -90,7 +90,6 @@ class ProgressIndicator(QWidget): def stop(self): if self.movie.state() == self.movie.Running: - #self.movie.jumpToFrame(0) self.movie.setPaused(True) self.setVisible(False) diff --git a/src/calibre/linux.py b/src/calibre/linux.py index e43336f238..949bb3160b 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -53,7 +53,6 @@ entry_points = { 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', - 'isbndb = calibre.ebooks.metadata.isbndb:main', 'librarything = calibre.ebooks.metadata.library_thing:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main', 'oeb2mobi = calibre.ebooks.mobi.writer:main', @@ -69,7 +68,7 @@ entry_points = { 'calibre-parallel = calibre.parallel:main', 'calibre-customize = calibre.customize.ui:main', 'pdftrim = calibre.ebooks.pdf.pdftrim:main' , - 'google-books = calibre.ebooks.metadata.google_books:main', + 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', ], 'gui_scripts' : [ __appname__+' = calibre.gui2.main:main', @@ -196,9 +195,9 @@ def setup_completion(fatal_errors): from calibre.ebooks.lit.from_any import option_parser as any2lit from calibre.ebooks.epub.from_comic import option_parser as comic2epub from calibre.ebooks.mobi.from_any import option_parser as any2mobi + from calibre.ebooks.metadata.fetch import option_parser as fem_op from calibre.ebooks.mobi.writer import option_parser as oeb2mobi from calibre.gui2.main import option_parser as guiop - from calibre.ebooks.metadata.google_books import option_parser as gbop any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] f = open_file('/etc/bash_completion.d/libprs500') @@ -246,7 +245,7 @@ def setup_completion(fatal_errors): f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles)) f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles)) - f.write(opts_and_words('google-books', gbop, [])) + f.write(opts_and_words('fetch-ebook-metadata', fem_op, [])) f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf'])) f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml'])) f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))