Run single metadata downloads in a worker process as well to workaround memory leaks in third party plugins

This commit is contained in:
Kovid Goyal 2012-04-05 18:40:06 +05:30
parent 270d36f59f
commit fbddf37b80
4 changed files with 171 additions and 28 deletions

View File

@ -112,6 +112,18 @@ def get_cached_cover_urls(mi):
if url: if url:
yield (p, url) yield (p, url)
def dump_caches():
from calibre.customize.ui import metadata_plugins
return {p.name:p.dump_caches() for p in metadata_plugins(['identify'])}
def load_caches(dump):
from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins(['identify']))
for p in plugins:
cache = dump.get(p.name, None)
if cache:
p.load_caches(cache)
def cap_author_token(token): def cap_author_token(token):
lt = lower(token) lt = lower(token)
if lt in ('von', 'de', 'el', 'van', 'le'): if lt in ('von', 'de', 'el', 'van', 'le'):
@ -293,6 +305,16 @@ class Source(Plugin):
with self.cache_lock: with self.cache_lock:
return self._identifier_to_cover_url_cache.get(id_, None) return self._identifier_to_cover_url_cache.get(id_, None)
def dump_caches(self):
with self.cache_lock:
return {'isbn_to_identifier':self._isbn_to_identifier_cache.copy(),
'identifier_to_cover':self._identifier_to_cover_url_cache.copy()}
def load_caches(self, dump):
with self.cache_lock:
self._isbn_to_identifier_cache.update(dump['isbn_to_identifier'])
self._identifier_to_cover_url_cache.update(dump['identifier_to_cover'])
# }}} # }}}
# Utility functions {{{ # Utility functions {{{

View File

@ -8,14 +8,17 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
from threading import Event from threading import Event, Thread
from Queue import Queue, Empty
from io import BytesIO from io import BytesIO
from calibre.utils.date import as_utc from calibre.utils.date import as_utc
from calibre.ebooks.metadata.sources.identify import identify, msprefs from calibre.ebooks.metadata.sources.identify import identify, msprefs
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata.sources.covers import download_cover from calibre.ebooks.metadata.sources.covers import (download_cover,
run_download)
from calibre.ebooks.metadata.sources.base import dump_caches, load_caches
from calibre.utils.logging import GUILog from calibre.utils.logging import GUILog
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
@ -93,3 +96,31 @@ def main(do_identify, covers, metadata, ensure_fields):
return failed_ids, failed_covers, all_failed return failed_ids, failed_covers, all_failed
def single_identify(title, authors, identifiers):
log = GUILog()
results = identify(log, Event(), title=title, authors=authors,
identifiers=identifiers)
return [metadata_to_opf(r) for r in results], dump_caches(), log.dump()
def single_covers(title, authors, identifiers, caches):
load_caches(caches)
log = GUILog()
results = Queue()
worker = Thread(target=run_download, args=(log, results, Event()),
kwargs=dict(title=title, authors=authors, identifiers=identifiers))
worker.daemon = True
worker.start()
while worker.is_alive():
try:
plugin, width, height, fmt, data = results.get(True, 1)
except Empty:
continue
else:
name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt)
with open(name, 'wb') as f:
f.write(data)
os.mkdir(name+'.done')
return log.dump()

View File

@ -8,11 +8,16 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
DEBUG_DIALOG = False DEBUG_DIALOG = False
# Test: turn fields off, error
# handling
# Do some testing in windows as filesystem model is different
# Imports {{{ # Imports {{{
import os, time
from threading import Thread, Event from threading import Thread, Event
from operator import attrgetter from operator import attrgetter
from Queue import Queue, Empty from Queue import Queue, Empty
from io import BytesIO
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox, QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
@ -24,16 +29,17 @@ from PyQt4.QtWebKit import QWebView
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.utils.logging import GUILog as Log from calibre.utils.logging import GUILog as Log
from calibre.ebooks.metadata.sources.identify import (identify, from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
urls_from_identifiers)
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import OPF
from calibre.gui2 import error_dialog, NONE, rating_font from calibre.gui2 import error_dialog, NONE, rating_font
from calibre.utils.date import (utcnow, fromordinal, format_date, from calibre.utils.date import (utcnow, fromordinal, format_date,
UNDEFINED_DATE, as_utc) UNDEFINED_DATE, as_utc)
from calibre.library.comments import comments_to_html from calibre.library.comments import comments_to_html
from calibre import force_unicode from calibre import force_unicode
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.ptempfile import TemporaryDirectory
# }}} # }}}
class RichTextDelegate(QStyledItemDelegate): # {{{ class RichTextDelegate(QStyledItemDelegate): # {{{
@ -357,7 +363,7 @@ class Comments(QWebView): # {{{
class IdentifyWorker(Thread): # {{{ class IdentifyWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers): def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
@ -367,6 +373,7 @@ class IdentifyWorker(Thread): # {{{
self.results = [] self.results = []
self.error = None self.error = None
self.caches = caches
def sample_results(self): def sample_results(self):
m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald']) m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald'])
@ -390,25 +397,36 @@ class IdentifyWorker(Thread): # {{{
if DEBUG_DIALOG: if DEBUG_DIALOG:
self.results = self.sample_results() self.results = self.sample_results()
else: else:
self.results = identify(self.log, self.abort, title=self.title, res = fork_job(
authors=self.authors, identifiers=self.identifiers) 'calibre.ebooks.metadata.sources.worker',
'single_identify', (self.title, self.authors,
self.identifiers), no_output=True, abort=self.abort)
self.results, caches, log_dump = res['result']
self.results = [OPF(BytesIO(r), basedir=os.getcwdu(),
populate_spine=False).to_book_metadata() for r in self.results]
self.caches.update(caches)
self.log.load(log_dump)
for i, result in enumerate(self.results): for i, result in enumerate(self.results):
result.gui_rank = i result.gui_rank = i
except WorkerError as e:
self.error = force_unicode(e.orig_tb)
except: except:
import traceback import traceback
self.error = force_unicode(traceback.format_exc()) self.error = force_unicode(traceback.format_exc())
# }}} # }}}
class IdentifyWidget(QWidget): # {{{ class IdentifyWidget(QWidget): # {{{
rejected = pyqtSignal() rejected = pyqtSignal()
results_found = pyqtSignal() results_found = pyqtSignal()
book_selected = pyqtSignal(object) book_selected = pyqtSignal(object, object)
def __init__(self, log, parent=None): def __init__(self, log, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.log = log self.log = log
self.abort = Event() self.abort = Event()
self.caches = {}
self.l = l = QGridLayout() self.l = l = QGridLayout()
self.setLayout(l) self.setLayout(l)
@ -421,7 +439,7 @@ class IdentifyWidget(QWidget): # {{{
l.addWidget(self.top, 0, 0) l.addWidget(self.top, 0, 0)
self.results_view = ResultsView(self) self.results_view = ResultsView(self)
self.results_view.book_selected.connect(self.book_selected.emit) self.results_view.book_selected.connect(self.emit_book_selected)
self.get_result = self.results_view.get_result self.get_result = self.results_view.get_result
l.addWidget(self.results_view, 1, 0) l.addWidget(self.results_view, 1, 0)
@ -455,6 +473,9 @@ class IdentifyWidget(QWidget): # {{{
</script> </script>
''') ''')
def emit_book_selected(self, book):
self.book_selected.emit(book, self.caches)
def start(self, title=None, authors=None, identifiers={}): def start(self, title=None, authors=None, identifiers={}):
self.log.clear() self.log.clear()
self.log('Starting download') self.log('Starting download')
@ -470,7 +491,7 @@ class IdentifyWidget(QWidget): # {{{
self.log(unicode(self.query.text())) self.log(unicode(self.query.text()))
self.worker = IdentifyWorker(self.log, self.abort, title, self.worker = IdentifyWorker(self.log, self.abort, title,
authors, identifiers) authors, identifiers, self.caches)
self.worker.start() self.worker.start()
@ -513,20 +534,20 @@ class IdentifyWidget(QWidget): # {{{
class CoverWorker(Thread): # {{{ class CoverWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers): def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
self.log, self.abort = log, abort self.log, self.abort = log, abort
self.title, self.authors, self.identifiers = (title, authors, self.title, self.authors, self.identifiers = (title, authors,
identifiers) identifiers)
self.caches = caches
self.rq = Queue() self.rq = Queue()
self.error = None self.error = None
def fake_run(self): def fake_run(self):
images = ['donate.png', 'config.png', 'column.png', 'eject.png', ] images = ['donate.png', 'config.png', 'column.png', 'eject.png', ]
import time
time.sleep(2) time.sleep(2)
for pl, im in zip(metadata_plugins(['cover']), images): for pl, im in zip(metadata_plugins(['cover']), images):
self.rq.put((pl, 1, 1, 'png', I(im, data=True))) self.rq.put((pl, 1, 1, 'png', I(im, data=True)))
@ -536,12 +557,56 @@ class CoverWorker(Thread): # {{{
if DEBUG_DIALOG: if DEBUG_DIALOG:
self.fake_run() self.fake_run()
else: else:
from calibre.ebooks.metadata.sources.covers import run_download self.run_fork()
run_download(self.log, self.rq, self.abort, title=self.title, except WorkerError as e:
authors=self.authors, identifiers=self.identifiers) self.error = force_unicode(e.orig_tb)
except: except:
import traceback import traceback
self.error = force_unicode(traceback.format_exc()) self.error = force_unicode(traceback.format_exc())
def run_fork(self):
with TemporaryDirectory('_single_metadata_download') as tdir:
self.keep_going = True
t = Thread(target=self.monitor_tdir, args=(tdir,))
t.daemon = True
t.start()
try:
res = fork_job('calibre.ebooks.metadata.sources.worker',
'single_covers',
(self.title, self.authors, self.identifiers, self.caches),
cwd=tdir, no_output=True, abort=self.abort)
self.log.append_dump(res['result'])
finally:
self.keep_going = False
t.join()
def scan_once(self, tdir, seen):
for x in list(os.listdir(tdir)):
if x in seen: continue
if x.endswith('.cover') and os.path.exists(os.path.join(tdir,
x+'.done')):
name = x.rpartition('.')[0]
try:
plugin_name, width, height, fmt = name.split(',,')
width, height = int(width), int(height)
with open(os.path.join(tdir, x), 'rb') as f:
data = f.read()
except:
import traceback
traceback.print_exc()
else:
seen.add(x)
self.rq.put((plugin_name, width, height, fmt, data))
def monitor_tdir(self, tdir):
seen = set()
while self.keep_going:
time.sleep(1)
self.scan_once(tdir, seen)
# One last scan after the download process has ended
self.scan_once(tdir, seen)
# }}} # }}}
class CoversModel(QAbstractListModel): # {{{ class CoversModel(QAbstractListModel): # {{{
@ -620,16 +685,19 @@ class CoversModel(QAbstractListModel): # {{{
idx = self.plugin_map.get(plugin, 0) idx = self.plugin_map.get(plugin, 0)
return self.index(idx) return self.index(idx)
def update_result(self, plugin, width, height, data): def update_result(self, plugin_name, width, height, data):
try: idx = None
idx = self.plugin_map[plugin] for plugin, i in self.plugin_map.iteritems():
except: if plugin.name == plugin_name:
idx = i
break
if idx is None:
return return
pmap = QPixmap() pmap = QPixmap()
pmap.loadFromData(data) pmap.loadFromData(data)
if pmap.isNull(): if pmap.isNull():
return return
self.covers[idx] = self.get_item(plugin.name, pmap, waiting=False) self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False)
self.dataChanged.emit(self.index(idx), self.index(idx)) self.dataChanged.emit(self.index(idx), self.index(idx))
def cover_pixmap(self, index): def cover_pixmap(self, index):
@ -709,7 +777,7 @@ class CoversWidget(QWidget): # {{{
def reset_covers(self): def reset_covers(self):
self.covers_view.reset_covers() self.covers_view.reset_covers()
def start(self, book, current_cover, title, authors): def start(self, book, current_cover, title, authors, caches):
self.continue_processing = True self.continue_processing = True
self.abort.clear() self.abort.clear()
self.book, self.current_cover = book, current_cover self.book, self.current_cover = book, current_cover
@ -721,7 +789,7 @@ class CoversWidget(QWidget): # {{{
self.covers_view.start() self.covers_view.start()
self.worker = CoverWorker(self.log, self.abort, self.title, self.worker = CoverWorker(self.log, self.abort, self.title,
self.authors, book.identifiers) self.authors, book.identifiers, caches)
self.worker.start() self.worker.start()
QTimer.singleShot(50, self.check) QTimer.singleShot(50, self.check)
self.covers_view.setFocus(Qt.OtherFocusReason) self.covers_view.setFocus(Qt.OtherFocusReason)
@ -766,8 +834,8 @@ class CoversWidget(QWidget): # {{{
def process_result(self, result): def process_result(self, result):
if not self.continue_processing: if not self.continue_processing:
return return
plugin, width, height, fmt, data = result plugin_name, width, height, fmt, data = result
self.covers_view.model().update_result(plugin, width, height, data) self.covers_view.model().update_result(plugin_name, width, height, data)
def cleanup(self): def cleanup(self):
self.covers_view.delegate.stop_animation() self.covers_view.delegate.stop_animation()
@ -894,7 +962,7 @@ class FullFetch(QDialog): # {{{
def view_log(self): def view_log(self):
self._lv = LogViewer(self.log, self) self._lv = LogViewer(self.log, self)
def book_selected(self, book): def book_selected(self, book, caches):
self.next_button.setVisible(False) self.next_button.setVisible(False)
self.ok_button.setVisible(True) self.ok_button.setVisible(True)
self.prev_button.setVisible(True) self.prev_button.setVisible(True)
@ -902,7 +970,7 @@ class FullFetch(QDialog): # {{{
self.stack.setCurrentIndex(1) self.stack.setCurrentIndex(1)
self.log('\n\n') self.log('\n\n')
self.covers_widget.start(book, self.current_cover, self.covers_widget.start(book, self.current_cover,
self.title, self.authors) self.title, self.authors, caches)
def back_clicked(self): def back_clicked(self):
self.next_button.setVisible(True) self.next_button.setVisible(True)
@ -993,7 +1061,7 @@ class CoverFetch(QDialog): # {{{
book = Metadata(title, authors) book = Metadata(title, authors)
book.identifiers = identifiers book.identifiers = identifiers
self.covers_widget.start(book, self.current_cover, self.covers_widget.start(book, self.current_cover,
title, authors) title, authors, {})
return self.exec_() return self.exec_()
def view_log(self): def view_log(self):

View File

@ -122,6 +122,18 @@ class UnicodeHTMLStream(HTMLStream):
end = self.normal if self.data else u'' end = self.normal if self.data else u''
return u''.join(self.data) + end return u''.join(self.data) + end
def dump(self):
return [self.data, self.plain_text, self.last_col]
def load(self, dump):
self.data, self.plain_text, self.last_col = dump
def append_dump(self, dump):
d, p, lc = dump
self.data.extend(d)
self.plain_text.extend(p)
self.last_col = lc
class Log(object): class Log(object):
@ -186,4 +198,14 @@ class GUILog(ThreadSafeLog):
def plain_text(self): def plain_text(self):
return u''.join(self.outputs[0].plain_text) return u''.join(self.outputs[0].plain_text)
def dump(self):
return self.outputs[0].dump()
def load(self, dump):
return self.outputs[0].load(dump)
def append_dump(self, dump):
return self.outputs[0].append_dump(dump)
default_log = Log() default_log = Log()