More granular progress reporting during bulk metadata download

This commit is contained in:
Kovid Goyal 2012-04-02 18:24:15 +05:30
parent e3873e4254
commit 945a8e3ae8

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, time, shutil import os, time, shutil
from functools import partial from functools import partial
from threading import Thread
from PyQt4.Qt import (QIcon, QDialog, from PyQt4.Qt import (QIcon, QDialog,
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt) QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
@ -169,6 +170,36 @@ class HeartBeat(object):
self.last_time = time.time() self.last_time = time.time()
return True return True
class Notifier(Thread):
def __init__(self, notifications, title_map, tdir, total):
Thread.__init__(self)
self.daemon = True
self.notifications, self.title_map = notifications, title_map
self.tdir, self.total = tdir, total
self.seen = set()
self.keep_going = True
def run(self):
while self.keep_going:
try:
names = os.listdir(self.tdir)
except:
pass
else:
for x in names:
if x.endswith('.log'):
try:
book_id = int(x.partition('.')[0])
except:
continue
if book_id not in self.seen and book_id in self.title_map:
self.seen.add(book_id)
self.notifications.put((
float(len(self.seen))/self.total,
_('Processed %s')%self.title_map[book_id]))
time.sleep(1)
def download(all_ids, tf, db, do_identify, covers, ensure_fields, def download(all_ids, tf, db, do_identify, covers, ensure_fields,
log=None, abort=None, notifications=None): log=None, abort=None, notifications=None):
batch_size = 10 batch_size = 10
@ -184,49 +215,52 @@ def download(all_ids, tf, db, do_identify, covers, ensure_fields,
all_failed = True all_failed = True
aborted = False aborted = False
count = 0 count = 0
notifier = Notifier(notifications, title_map, tdir, len(all_ids))
notifier.start()
try:
for ids in batches:
if abort.is_set():
log.error('Aborting...')
break
metadata = {i:db.get_metadata(i, index_is_id=True,
get_user_categories=False) for i in ids}
for i in ids:
title_map[i] = metadata[i].title
lm_map[i] = metadata[i].last_modified
metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
metadata.iteritems()}
try:
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
(do_identify, covers, metadata, ensure_fields),
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
except WorkerError as e:
if e.orig_tb:
raise Exception('Failed to download metadata. Original '
'traceback: \n\n'+e.orig_tb)
raise
count += batch_size
fids, fcovs, allf = ret['result']
if not allf:
all_failed = False
failed_ids = failed_ids.union(fids)
failed_covers = failed_covers.union(fcovs)
ans = ans.union(set(ids) - fids)
for book_id in ids:
lp = os.path.join(tdir, '%d.log'%book_id)
if os.path.exists(lp):
with open(tf, 'ab') as dest, open(lp, 'rb') as src:
dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
'#'*20+'\n').encode('utf-8'))
shutil.copyfileobj(src, dest)
for ids in batches:
if abort.is_set(): if abort.is_set():
log.error('Aborting...') aborted = True
break log('Download complete, with %d failures'%len(failed_ids))
metadata = {i:db.get_metadata(i, index_is_id=True, return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
get_user_categories=False) for i in ids} lm_map, all_failed)
for i in ids: finally:
title_map[i] = metadata[i].title notifier.keep_going = False
lm_map[i] = metadata[i].last_modified
metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
metadata.iteritems()}
try:
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
(do_identify, covers, metadata, ensure_fields),
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
except WorkerError as e:
if e.orig_tb:
raise Exception('Failed to download metadata. Original '
'traceback: \n\n'+e.orig_tb)
raise
count += batch_size
notifications.put((count/len(ids),
_('Downloaded %(num)d of %(tot)d')%dict(
num=count, tot=len(all_ids))))
fids, fcovs, allf = ret['result']
if not allf:
all_failed = False
failed_ids = failed_ids.union(fids)
failed_covers = failed_covers.union(fcovs)
ans = ans.union(set(ids) - fids)
for book_id in ids:
lp = os.path.join(tdir, '%d.log'%book_id)
if os.path.exists(lp):
with open(tf, 'ab') as dest, open(lp, 'rb') as src:
dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
'#'*20+'\n').encode('utf-8'))
shutil.copyfileobj(src, dest)
if abort.is_set():
aborted = True
log('Download complete, with %d failures'%len(failed_ids))
return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
lm_map, all_failed)