Run single metadata downloads in a worker process as well to workaround memory leaks in third party plugins

This commit is contained in:
Kovid Goyal 2012-04-05 18:40:06 +05:30
parent 270d36f59f
commit fbddf37b80
4 changed files with 171 additions and 28 deletions

View File

@ -112,6 +112,18 @@ def get_cached_cover_urls(mi):
if url:
yield (p, url)
def dump_caches():
from calibre.customize.ui import metadata_plugins
return {p.name:p.dump_caches() for p in metadata_plugins(['identify'])}
def load_caches(dump):
from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins(['identify']))
for p in plugins:
cache = dump.get(p.name, None)
if cache:
p.load_caches(cache)
def cap_author_token(token):
lt = lower(token)
if lt in ('von', 'de', 'el', 'van', 'le'):
@ -293,6 +305,16 @@ class Source(Plugin):
with self.cache_lock:
return self._identifier_to_cover_url_cache.get(id_, None)
def dump_caches(self):
with self.cache_lock:
return {'isbn_to_identifier':self._isbn_to_identifier_cache.copy(),
'identifier_to_cover':self._identifier_to_cover_url_cache.copy()}
def load_caches(self, dump):
with self.cache_lock:
self._isbn_to_identifier_cache.update(dump['isbn_to_identifier'])
self._identifier_to_cover_url_cache.update(dump['identifier_to_cover'])
# }}}
# Utility functions {{{

View File

@ -8,14 +8,17 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from threading import Event
from threading import Event, Thread
from Queue import Queue, Empty
from io import BytesIO
from calibre.utils.date import as_utc
from calibre.ebooks.metadata.sources.identify import identify, msprefs
from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.ebooks.metadata.sources.covers import (download_cover,
run_download)
from calibre.ebooks.metadata.sources.base import dump_caches, load_caches
from calibre.utils.logging import GUILog
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
@ -93,3 +96,31 @@ def main(do_identify, covers, metadata, ensure_fields):
return failed_ids, failed_covers, all_failed
def single_identify(title, authors, identifiers):
log = GUILog()
results = identify(log, Event(), title=title, authors=authors,
identifiers=identifiers)
return [metadata_to_opf(r) for r in results], dump_caches(), log.dump()
def single_covers(title, authors, identifiers, caches):
load_caches(caches)
log = GUILog()
results = Queue()
worker = Thread(target=run_download, args=(log, results, Event()),
kwargs=dict(title=title, authors=authors, identifiers=identifiers))
worker.daemon = True
worker.start()
while worker.is_alive():
try:
plugin, width, height, fmt, data = results.get(True, 1)
except Empty:
continue
else:
name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt)
with open(name, 'wb') as f:
f.write(data)
os.mkdir(name+'.done')
return log.dump()

View File

@ -8,11 +8,16 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
DEBUG_DIALOG = False
# Test: turn fields off, error
# handling
# Do some testing in windows as filesystem model is different
# Imports {{{
import os, time
from threading import Thread, Event
from operator import attrgetter
from Queue import Queue, Empty
from io import BytesIO
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
@ -24,16 +29,17 @@ from PyQt4.QtWebKit import QWebView
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.logging import GUILog as Log
from calibre.ebooks.metadata.sources.identify import (identify,
urls_from_identifiers)
from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import OPF
from calibre.gui2 import error_dialog, NONE, rating_font
from calibre.utils.date import (utcnow, fromordinal, format_date,
UNDEFINED_DATE, as_utc)
from calibre.library.comments import comments_to_html
from calibre import force_unicode
from calibre.utils.config import tweaks
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.ptempfile import TemporaryDirectory
# }}}
class RichTextDelegate(QStyledItemDelegate): # {{{
@ -357,7 +363,7 @@ class Comments(QWebView): # {{{
class IdentifyWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers):
def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self)
self.daemon = True
@ -367,6 +373,7 @@ class IdentifyWorker(Thread): # {{{
self.results = []
self.error = None
self.caches = caches
def sample_results(self):
m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald'])
@ -390,25 +397,36 @@ class IdentifyWorker(Thread): # {{{
if DEBUG_DIALOG:
self.results = self.sample_results()
else:
self.results = identify(self.log, self.abort, title=self.title,
authors=self.authors, identifiers=self.identifiers)
res = fork_job(
'calibre.ebooks.metadata.sources.worker',
'single_identify', (self.title, self.authors,
self.identifiers), no_output=True, abort=self.abort)
self.results, caches, log_dump = res['result']
self.results = [OPF(BytesIO(r), basedir=os.getcwdu(),
populate_spine=False).to_book_metadata() for r in self.results]
self.caches.update(caches)
self.log.load(log_dump)
for i, result in enumerate(self.results):
result.gui_rank = i
except WorkerError as e:
self.error = force_unicode(e.orig_tb)
except:
import traceback
self.error = force_unicode(traceback.format_exc())
# }}}
class IdentifyWidget(QWidget): # {{{
rejected = pyqtSignal()
results_found = pyqtSignal()
book_selected = pyqtSignal(object)
book_selected = pyqtSignal(object, object)
def __init__(self, log, parent=None):
QWidget.__init__(self, parent)
self.log = log
self.abort = Event()
self.caches = {}
self.l = l = QGridLayout()
self.setLayout(l)
@ -421,7 +439,7 @@ class IdentifyWidget(QWidget): # {{{
l.addWidget(self.top, 0, 0)
self.results_view = ResultsView(self)
self.results_view.book_selected.connect(self.book_selected.emit)
self.results_view.book_selected.connect(self.emit_book_selected)
self.get_result = self.results_view.get_result
l.addWidget(self.results_view, 1, 0)
@ -455,6 +473,9 @@ class IdentifyWidget(QWidget): # {{{
</script>
''')
def emit_book_selected(self, book):
self.book_selected.emit(book, self.caches)
def start(self, title=None, authors=None, identifiers={}):
self.log.clear()
self.log('Starting download')
@ -470,7 +491,7 @@ class IdentifyWidget(QWidget): # {{{
self.log(unicode(self.query.text()))
self.worker = IdentifyWorker(self.log, self.abort, title,
authors, identifiers)
authors, identifiers, self.caches)
self.worker.start()
@ -513,20 +534,20 @@ class IdentifyWidget(QWidget): # {{{
class CoverWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers):
def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self)
self.daemon = True
self.log, self.abort = log, abort
self.title, self.authors, self.identifiers = (title, authors,
identifiers)
self.caches = caches
self.rq = Queue()
self.error = None
def fake_run(self):
images = ['donate.png', 'config.png', 'column.png', 'eject.png', ]
import time
time.sleep(2)
for pl, im in zip(metadata_plugins(['cover']), images):
self.rq.put((pl, 1, 1, 'png', I(im, data=True)))
@ -536,12 +557,56 @@ class CoverWorker(Thread): # {{{
if DEBUG_DIALOG:
self.fake_run()
else:
from calibre.ebooks.metadata.sources.covers import run_download
run_download(self.log, self.rq, self.abort, title=self.title,
authors=self.authors, identifiers=self.identifiers)
self.run_fork()
except WorkerError as e:
self.error = force_unicode(e.orig_tb)
except:
import traceback
self.error = force_unicode(traceback.format_exc())
def run_fork(self):
with TemporaryDirectory('_single_metadata_download') as tdir:
self.keep_going = True
t = Thread(target=self.monitor_tdir, args=(tdir,))
t.daemon = True
t.start()
try:
res = fork_job('calibre.ebooks.metadata.sources.worker',
'single_covers',
(self.title, self.authors, self.identifiers, self.caches),
cwd=tdir, no_output=True, abort=self.abort)
self.log.append_dump(res['result'])
finally:
self.keep_going = False
t.join()
def scan_once(self, tdir, seen):
for x in list(os.listdir(tdir)):
if x in seen: continue
if x.endswith('.cover') and os.path.exists(os.path.join(tdir,
x+'.done')):
name = x.rpartition('.')[0]
try:
plugin_name, width, height, fmt = name.split(',,')
width, height = int(width), int(height)
with open(os.path.join(tdir, x), 'rb') as f:
data = f.read()
except:
import traceback
traceback.print_exc()
else:
seen.add(x)
self.rq.put((plugin_name, width, height, fmt, data))
def monitor_tdir(self, tdir):
seen = set()
while self.keep_going:
time.sleep(1)
self.scan_once(tdir, seen)
# One last scan after the download process has ended
self.scan_once(tdir, seen)
# }}}
class CoversModel(QAbstractListModel): # {{{
@ -620,16 +685,19 @@ class CoversModel(QAbstractListModel): # {{{
idx = self.plugin_map.get(plugin, 0)
return self.index(idx)
def update_result(self, plugin, width, height, data):
try:
idx = self.plugin_map[plugin]
except:
def update_result(self, plugin_name, width, height, data):
idx = None
for plugin, i in self.plugin_map.iteritems():
if plugin.name == plugin_name:
idx = i
break
if idx is None:
return
pmap = QPixmap()
pmap.loadFromData(data)
if pmap.isNull():
return
self.covers[idx] = self.get_item(plugin.name, pmap, waiting=False)
self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False)
self.dataChanged.emit(self.index(idx), self.index(idx))
def cover_pixmap(self, index):
@ -709,7 +777,7 @@ class CoversWidget(QWidget): # {{{
def reset_covers(self):
self.covers_view.reset_covers()
def start(self, book, current_cover, title, authors):
def start(self, book, current_cover, title, authors, caches):
self.continue_processing = True
self.abort.clear()
self.book, self.current_cover = book, current_cover
@ -721,7 +789,7 @@ class CoversWidget(QWidget): # {{{
self.covers_view.start()
self.worker = CoverWorker(self.log, self.abort, self.title,
self.authors, book.identifiers)
self.authors, book.identifiers, caches)
self.worker.start()
QTimer.singleShot(50, self.check)
self.covers_view.setFocus(Qt.OtherFocusReason)
@ -766,8 +834,8 @@ class CoversWidget(QWidget): # {{{
def process_result(self, result):
if not self.continue_processing:
return
plugin, width, height, fmt, data = result
self.covers_view.model().update_result(plugin, width, height, data)
plugin_name, width, height, fmt, data = result
self.covers_view.model().update_result(plugin_name, width, height, data)
def cleanup(self):
self.covers_view.delegate.stop_animation()
@ -894,7 +962,7 @@ class FullFetch(QDialog): # {{{
def view_log(self):
self._lv = LogViewer(self.log, self)
def book_selected(self, book):
def book_selected(self, book, caches):
self.next_button.setVisible(False)
self.ok_button.setVisible(True)
self.prev_button.setVisible(True)
@ -902,7 +970,7 @@ class FullFetch(QDialog): # {{{
self.stack.setCurrentIndex(1)
self.log('\n\n')
self.covers_widget.start(book, self.current_cover,
self.title, self.authors)
self.title, self.authors, caches)
def back_clicked(self):
self.next_button.setVisible(True)
@ -993,7 +1061,7 @@ class CoverFetch(QDialog): # {{{
book = Metadata(title, authors)
book.identifiers = identifiers
self.covers_widget.start(book, self.current_cover,
title, authors)
title, authors, {})
return self.exec_()
def view_log(self):

View File

@ -122,6 +122,18 @@ class UnicodeHTMLStream(HTMLStream):
end = self.normal if self.data else u''
return u''.join(self.data) + end
def dump(self):
return [self.data, self.plain_text, self.last_col]
def load(self, dump):
self.data, self.plain_text, self.last_col = dump
def append_dump(self, dump):
d, p, lc = dump
self.data.extend(d)
self.plain_text.extend(p)
self.last_col = lc
class Log(object):
@ -186,4 +198,14 @@ class GUILog(ThreadSafeLog):
def plain_text(self):
return u''.join(self.outputs[0].plain_text)
def dump(self):
return self.outputs[0].dump()
def load(self, dump):
return self.outputs[0].load(dump)
def append_dump(self, dump):
return self.outputs[0].append_dump(dump)
default_log = Log()