mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Run single metadata downloads in a worker process as well to workaround memory leaks in third party plugins
This commit is contained in:
parent
270d36f59f
commit
fbddf37b80
@ -112,6 +112,18 @@ def get_cached_cover_urls(mi):
|
||||
if url:
|
||||
yield (p, url)
|
||||
|
||||
def dump_caches():
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
return {p.name:p.dump_caches() for p in metadata_plugins(['identify'])}
|
||||
|
||||
def load_caches(dump):
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
plugins = list(metadata_plugins(['identify']))
|
||||
for p in plugins:
|
||||
cache = dump.get(p.name, None)
|
||||
if cache:
|
||||
p.load_caches(cache)
|
||||
|
||||
def cap_author_token(token):
|
||||
lt = lower(token)
|
||||
if lt in ('von', 'de', 'el', 'van', 'le'):
|
||||
@ -293,6 +305,16 @@ class Source(Plugin):
|
||||
with self.cache_lock:
|
||||
return self._identifier_to_cover_url_cache.get(id_, None)
|
||||
|
||||
def dump_caches(self):
|
||||
with self.cache_lock:
|
||||
return {'isbn_to_identifier':self._isbn_to_identifier_cache.copy(),
|
||||
'identifier_to_cover':self._identifier_to_cover_url_cache.copy()}
|
||||
|
||||
def load_caches(self, dump):
|
||||
with self.cache_lock:
|
||||
self._isbn_to_identifier_cache.update(dump['isbn_to_identifier'])
|
||||
self._identifier_to_cover_url_cache.update(dump['identifier_to_cover'])
|
||||
|
||||
# }}}
|
||||
|
||||
# Utility functions {{{
|
||||
|
@ -8,14 +8,17 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from threading import Event
|
||||
from threading import Event, Thread
|
||||
from Queue import Queue, Empty
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.date import as_utc
|
||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
from calibre.ebooks.metadata.sources.covers import (download_cover,
|
||||
run_download)
|
||||
from calibre.ebooks.metadata.sources.base import dump_caches, load_caches
|
||||
from calibre.utils.logging import GUILog
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
|
||||
|
||||
@ -93,3 +96,31 @@ def main(do_identify, covers, metadata, ensure_fields):
|
||||
|
||||
return failed_ids, failed_covers, all_failed
|
||||
|
||||
def single_identify(title, authors, identifiers):
|
||||
log = GUILog()
|
||||
results = identify(log, Event(), title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
return [metadata_to_opf(r) for r in results], dump_caches(), log.dump()
|
||||
|
||||
def single_covers(title, authors, identifiers, caches):
|
||||
load_caches(caches)
|
||||
log = GUILog()
|
||||
results = Queue()
|
||||
worker = Thread(target=run_download, args=(log, results, Event()),
|
||||
kwargs=dict(title=title, authors=authors, identifiers=identifiers))
|
||||
worker.daemon = True
|
||||
worker.start()
|
||||
while worker.is_alive():
|
||||
try:
|
||||
plugin, width, height, fmt, data = results.get(True, 1)
|
||||
except Empty:
|
||||
continue
|
||||
else:
|
||||
name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt)
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
os.mkdir(name+'.done')
|
||||
|
||||
return log.dump()
|
||||
|
||||
|
||||
|
@ -8,11 +8,16 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
DEBUG_DIALOG = False
|
||||
# Test: turn fields off, error
|
||||
# handling
|
||||
# Do some testing in windows as filesystem model is different
|
||||
|
||||
# Imports {{{
|
||||
import os, time
|
||||
from threading import Thread, Event
|
||||
from operator import attrgetter
|
||||
from Queue import Queue, Empty
|
||||
from io import BytesIO
|
||||
|
||||
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
|
||||
QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
|
||||
@ -24,16 +29,17 @@ from PyQt4.QtWebKit import QWebView
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.logging import GUILog as Log
|
||||
from calibre.ebooks.metadata.sources.identify import (identify,
|
||||
urls_from_identifiers)
|
||||
from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.gui2 import error_dialog, NONE, rating_font
|
||||
from calibre.utils.date import (utcnow, fromordinal, format_date,
|
||||
UNDEFINED_DATE, as_utc)
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre import force_unicode
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
# }}}
|
||||
|
||||
class RichTextDelegate(QStyledItemDelegate): # {{{
|
||||
@ -357,7 +363,7 @@ class Comments(QWebView): # {{{
|
||||
|
||||
class IdentifyWorker(Thread): # {{{
|
||||
|
||||
def __init__(self, log, abort, title, authors, identifiers):
|
||||
def __init__(self, log, abort, title, authors, identifiers, caches):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
@ -367,6 +373,7 @@ class IdentifyWorker(Thread): # {{{
|
||||
|
||||
self.results = []
|
||||
self.error = None
|
||||
self.caches = caches
|
||||
|
||||
def sample_results(self):
|
||||
m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald'])
|
||||
@ -390,25 +397,36 @@ class IdentifyWorker(Thread): # {{{
|
||||
if DEBUG_DIALOG:
|
||||
self.results = self.sample_results()
|
||||
else:
|
||||
self.results = identify(self.log, self.abort, title=self.title,
|
||||
authors=self.authors, identifiers=self.identifiers)
|
||||
res = fork_job(
|
||||
'calibre.ebooks.metadata.sources.worker',
|
||||
'single_identify', (self.title, self.authors,
|
||||
self.identifiers), no_output=True, abort=self.abort)
|
||||
self.results, caches, log_dump = res['result']
|
||||
self.results = [OPF(BytesIO(r), basedir=os.getcwdu(),
|
||||
populate_spine=False).to_book_metadata() for r in self.results]
|
||||
self.caches.update(caches)
|
||||
self.log.load(log_dump)
|
||||
for i, result in enumerate(self.results):
|
||||
result.gui_rank = i
|
||||
except WorkerError as e:
|
||||
self.error = force_unicode(e.orig_tb)
|
||||
except:
|
||||
import traceback
|
||||
self.error = force_unicode(traceback.format_exc())
|
||||
|
||||
# }}}
|
||||
|
||||
class IdentifyWidget(QWidget): # {{{
|
||||
|
||||
rejected = pyqtSignal()
|
||||
results_found = pyqtSignal()
|
||||
book_selected = pyqtSignal(object)
|
||||
book_selected = pyqtSignal(object, object)
|
||||
|
||||
def __init__(self, log, parent=None):
|
||||
QWidget.__init__(self, parent)
|
||||
self.log = log
|
||||
self.abort = Event()
|
||||
self.caches = {}
|
||||
|
||||
self.l = l = QGridLayout()
|
||||
self.setLayout(l)
|
||||
@ -421,7 +439,7 @@ class IdentifyWidget(QWidget): # {{{
|
||||
l.addWidget(self.top, 0, 0)
|
||||
|
||||
self.results_view = ResultsView(self)
|
||||
self.results_view.book_selected.connect(self.book_selected.emit)
|
||||
self.results_view.book_selected.connect(self.emit_book_selected)
|
||||
self.get_result = self.results_view.get_result
|
||||
l.addWidget(self.results_view, 1, 0)
|
||||
|
||||
@ -455,6 +473,9 @@ class IdentifyWidget(QWidget): # {{{
|
||||
</script>
|
||||
''')
|
||||
|
||||
def emit_book_selected(self, book):
|
||||
self.book_selected.emit(book, self.caches)
|
||||
|
||||
def start(self, title=None, authors=None, identifiers={}):
|
||||
self.log.clear()
|
||||
self.log('Starting download')
|
||||
@ -470,7 +491,7 @@ class IdentifyWidget(QWidget): # {{{
|
||||
self.log(unicode(self.query.text()))
|
||||
|
||||
self.worker = IdentifyWorker(self.log, self.abort, title,
|
||||
authors, identifiers)
|
||||
authors, identifiers, self.caches)
|
||||
|
||||
self.worker.start()
|
||||
|
||||
@ -513,20 +534,20 @@ class IdentifyWidget(QWidget): # {{{
|
||||
|
||||
class CoverWorker(Thread): # {{{
|
||||
|
||||
def __init__(self, log, abort, title, authors, identifiers):
|
||||
def __init__(self, log, abort, title, authors, identifiers, caches):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
self.log, self.abort = log, abort
|
||||
self.title, self.authors, self.identifiers = (title, authors,
|
||||
identifiers)
|
||||
self.caches = caches
|
||||
|
||||
self.rq = Queue()
|
||||
self.error = None
|
||||
|
||||
def fake_run(self):
|
||||
images = ['donate.png', 'config.png', 'column.png', 'eject.png', ]
|
||||
import time
|
||||
time.sleep(2)
|
||||
for pl, im in zip(metadata_plugins(['cover']), images):
|
||||
self.rq.put((pl, 1, 1, 'png', I(im, data=True)))
|
||||
@ -536,12 +557,56 @@ class CoverWorker(Thread): # {{{
|
||||
if DEBUG_DIALOG:
|
||||
self.fake_run()
|
||||
else:
|
||||
from calibre.ebooks.metadata.sources.covers import run_download
|
||||
run_download(self.log, self.rq, self.abort, title=self.title,
|
||||
authors=self.authors, identifiers=self.identifiers)
|
||||
self.run_fork()
|
||||
except WorkerError as e:
|
||||
self.error = force_unicode(e.orig_tb)
|
||||
except:
|
||||
import traceback
|
||||
self.error = force_unicode(traceback.format_exc())
|
||||
|
||||
def run_fork(self):
|
||||
with TemporaryDirectory('_single_metadata_download') as tdir:
|
||||
self.keep_going = True
|
||||
t = Thread(target=self.monitor_tdir, args=(tdir,))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
try:
|
||||
res = fork_job('calibre.ebooks.metadata.sources.worker',
|
||||
'single_covers',
|
||||
(self.title, self.authors, self.identifiers, self.caches),
|
||||
cwd=tdir, no_output=True, abort=self.abort)
|
||||
self.log.append_dump(res['result'])
|
||||
finally:
|
||||
self.keep_going = False
|
||||
t.join()
|
||||
|
||||
def scan_once(self, tdir, seen):
|
||||
for x in list(os.listdir(tdir)):
|
||||
if x in seen: continue
|
||||
if x.endswith('.cover') and os.path.exists(os.path.join(tdir,
|
||||
x+'.done')):
|
||||
name = x.rpartition('.')[0]
|
||||
try:
|
||||
plugin_name, width, height, fmt = name.split(',,')
|
||||
width, height = int(width), int(height)
|
||||
with open(os.path.join(tdir, x), 'rb') as f:
|
||||
data = f.read()
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
else:
|
||||
seen.add(x)
|
||||
self.rq.put((plugin_name, width, height, fmt, data))
|
||||
|
||||
def monitor_tdir(self, tdir):
|
||||
seen = set()
|
||||
while self.keep_going:
|
||||
time.sleep(1)
|
||||
self.scan_once(tdir, seen)
|
||||
# One last scan after the download process has ended
|
||||
self.scan_once(tdir, seen)
|
||||
|
||||
# }}}
|
||||
|
||||
class CoversModel(QAbstractListModel): # {{{
|
||||
@ -620,16 +685,19 @@ class CoversModel(QAbstractListModel): # {{{
|
||||
idx = self.plugin_map.get(plugin, 0)
|
||||
return self.index(idx)
|
||||
|
||||
def update_result(self, plugin, width, height, data):
|
||||
try:
|
||||
idx = self.plugin_map[plugin]
|
||||
except:
|
||||
def update_result(self, plugin_name, width, height, data):
|
||||
idx = None
|
||||
for plugin, i in self.plugin_map.iteritems():
|
||||
if plugin.name == plugin_name:
|
||||
idx = i
|
||||
break
|
||||
if idx is None:
|
||||
return
|
||||
pmap = QPixmap()
|
||||
pmap.loadFromData(data)
|
||||
if pmap.isNull():
|
||||
return
|
||||
self.covers[idx] = self.get_item(plugin.name, pmap, waiting=False)
|
||||
self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False)
|
||||
self.dataChanged.emit(self.index(idx), self.index(idx))
|
||||
|
||||
def cover_pixmap(self, index):
|
||||
@ -709,7 +777,7 @@ class CoversWidget(QWidget): # {{{
|
||||
def reset_covers(self):
|
||||
self.covers_view.reset_covers()
|
||||
|
||||
def start(self, book, current_cover, title, authors):
|
||||
def start(self, book, current_cover, title, authors, caches):
|
||||
self.continue_processing = True
|
||||
self.abort.clear()
|
||||
self.book, self.current_cover = book, current_cover
|
||||
@ -721,7 +789,7 @@ class CoversWidget(QWidget): # {{{
|
||||
self.covers_view.start()
|
||||
|
||||
self.worker = CoverWorker(self.log, self.abort, self.title,
|
||||
self.authors, book.identifiers)
|
||||
self.authors, book.identifiers, caches)
|
||||
self.worker.start()
|
||||
QTimer.singleShot(50, self.check)
|
||||
self.covers_view.setFocus(Qt.OtherFocusReason)
|
||||
@ -766,8 +834,8 @@ class CoversWidget(QWidget): # {{{
|
||||
def process_result(self, result):
|
||||
if not self.continue_processing:
|
||||
return
|
||||
plugin, width, height, fmt, data = result
|
||||
self.covers_view.model().update_result(plugin, width, height, data)
|
||||
plugin_name, width, height, fmt, data = result
|
||||
self.covers_view.model().update_result(plugin_name, width, height, data)
|
||||
|
||||
def cleanup(self):
|
||||
self.covers_view.delegate.stop_animation()
|
||||
@ -894,7 +962,7 @@ class FullFetch(QDialog): # {{{
|
||||
def view_log(self):
|
||||
self._lv = LogViewer(self.log, self)
|
||||
|
||||
def book_selected(self, book):
|
||||
def book_selected(self, book, caches):
|
||||
self.next_button.setVisible(False)
|
||||
self.ok_button.setVisible(True)
|
||||
self.prev_button.setVisible(True)
|
||||
@ -902,7 +970,7 @@ class FullFetch(QDialog): # {{{
|
||||
self.stack.setCurrentIndex(1)
|
||||
self.log('\n\n')
|
||||
self.covers_widget.start(book, self.current_cover,
|
||||
self.title, self.authors)
|
||||
self.title, self.authors, caches)
|
||||
|
||||
def back_clicked(self):
|
||||
self.next_button.setVisible(True)
|
||||
@ -993,7 +1061,7 @@ class CoverFetch(QDialog): # {{{
|
||||
book = Metadata(title, authors)
|
||||
book.identifiers = identifiers
|
||||
self.covers_widget.start(book, self.current_cover,
|
||||
title, authors)
|
||||
title, authors, {})
|
||||
return self.exec_()
|
||||
|
||||
def view_log(self):
|
||||
|
@ -122,6 +122,18 @@ class UnicodeHTMLStream(HTMLStream):
|
||||
end = self.normal if self.data else u''
|
||||
return u''.join(self.data) + end
|
||||
|
||||
def dump(self):
|
||||
return [self.data, self.plain_text, self.last_col]
|
||||
|
||||
def load(self, dump):
|
||||
self.data, self.plain_text, self.last_col = dump
|
||||
|
||||
def append_dump(self, dump):
|
||||
d, p, lc = dump
|
||||
self.data.extend(d)
|
||||
self.plain_text.extend(p)
|
||||
self.last_col = lc
|
||||
|
||||
|
||||
class Log(object):
|
||||
|
||||
@ -186,4 +198,14 @@ class GUILog(ThreadSafeLog):
|
||||
def plain_text(self):
|
||||
return u''.join(self.outputs[0].plain_text)
|
||||
|
||||
def dump(self):
|
||||
return self.outputs[0].dump()
|
||||
|
||||
def load(self, dump):
|
||||
return self.outputs[0].load(dump)
|
||||
|
||||
def append_dump(self, dump):
|
||||
return self.outputs[0].append_dump(dump)
|
||||
|
||||
|
||||
default_log = Log()
|
||||
|
Loading…
x
Reference in New Issue
Block a user