From 945a8e3ae8f40ca2e33d9f09b6637605ec4ac9eb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 18:24:15 +0530 Subject: [PATCH] More granular progress reporting during bulk metadata download --- src/calibre/gui2/metadata/bulk_download.py | 118 +++++++++++++-------- 1 file changed, 76 insertions(+), 42 deletions(-) diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 0f7097a4e6..0f2f5ae9be 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' import os, time, shutil from functools import partial +from threading import Thread from PyQt4.Qt import (QIcon, QDialog, QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt) @@ -169,6 +170,36 @@ class HeartBeat(object): self.last_time = time.time() return True +class Notifier(Thread): + + def __init__(self, notifications, title_map, tdir, total): + Thread.__init__(self) + self.daemon = True + self.notifications, self.title_map = notifications, title_map + self.tdir, self.total = tdir, total + self.seen = set() + self.keep_going = True + + def run(self): + while self.keep_going: + try: + names = os.listdir(self.tdir) + except: + pass + else: + for x in names: + if x.endswith('.log'): + try: + book_id = int(x.partition('.')[0]) + except: + continue + if book_id not in self.seen and book_id in self.title_map: + self.seen.add(book_id) + self.notifications.put(( + float(len(self.seen))/self.total, + _('Processed %s')%self.title_map[book_id])) + time.sleep(1) + def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 @@ -184,49 +215,52 @@ def download(all_ids, tf, db, do_identify, covers, ensure_fields, all_failed = True aborted = False count = 0 + notifier = Notifier(notifications, title_map, tdir, len(all_ids)) + notifier.start() + + try: + for ids in batches: + if abort.is_set(): + log.error('Aborting...') + break + metadata = {i:db.get_metadata(i, index_is_id=True, + get_user_categories=False) for i in ids} + for i in ids: + title_map[i] = metadata[i].title + lm_map[i] = metadata[i].last_modified + metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in + metadata.iteritems()} + try: + ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', + (do_identify, covers, metadata, ensure_fields), + cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True) + except WorkerError as e: + if e.orig_tb: + raise Exception('Failed to download metadata. Original ' + 'traceback: \n\n'+e.orig_tb) + raise + count += batch_size + + fids, fcovs, allf = ret['result'] + if not allf: + all_failed = False + failed_ids = failed_ids.union(fids) + failed_covers = failed_covers.union(fcovs) + ans = ans.union(set(ids) - fids) + for book_id in ids: + lp = os.path.join(tdir, '%d.log'%book_id) + if os.path.exists(lp): + with open(tf, 'ab') as dest, open(lp, 'rb') as src: + dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + + '#'*20+'\n').encode('utf-8')) + shutil.copyfileobj(src, dest) - for ids in batches: if abort.is_set(): - log.error('Aborting...') - break - metadata = {i:db.get_metadata(i, index_is_id=True, - get_user_categories=False) for i in ids} - for i in ids: - title_map[i] = metadata[i].title - lm_map[i] = metadata[i].last_modified - metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in - metadata.iteritems()} - try: - ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', - (do_identify, covers, metadata, ensure_fields), - cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True) - except WorkerError as e: - if e.orig_tb: - raise Exception('Failed to download metadata. Original ' - 'traceback: \n\n'+e.orig_tb) - raise - count += batch_size - notifications.put((count/len(ids), - _('Downloaded %(num)d of %(tot)d')%dict( - num=count, tot=len(all_ids)))) + aborted = True + log('Download complete, with %d failures'%len(failed_ids)) + return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, + lm_map, all_failed) + finally: + notifier.keep_going = False - fids, fcovs, allf = ret['result'] - if not allf: - all_failed = False - failed_ids = failed_ids.union(fids) - failed_covers = failed_covers.union(fcovs) - ans = ans.union(set(ids) - fids) - for book_id in ids: - lp = os.path.join(tdir, '%d.log'%book_id) - if os.path.exists(lp): - with open(tf, 'ab') as dest, open(lp, 'rb') as src: - dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + - '#'*20+'\n').encode('utf-8')) - shutil.copyfileobj(src, dest) - - if abort.is_set(): - aborted = True - log('Download complete, with %d failures'%len(failed_ids)) - return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, - lm_map, all_failed)