E-book viewer: Run input plugins in a worker process

Prevents bugs in input plugins from causing memory leaks when opening
multiple books in the same viewer process
This commit is contained in:
Kovid Goyal 2016-01-13 12:04:09 +05:30
parent bec98698c4
commit e1226bbf17
3 changed files with 52 additions and 43 deletions

View File

@ -227,7 +227,6 @@ class EPUBInput(InputFormatPlugin):
self.removed_cover = self.rationalize_cover(opf, log) self.removed_cover = self.rationalize_cover(opf, log)
self.optimize_opf_parsing = opf
for x in opf.itermanifest(): for x in opf.itermanifest():
if x.get('media-type', '') == 'application/x-dtbook+xml': if x.get('media-type', '') == 'application/x-dtbook+xml':
raise ValueError( raise ValueError(

View File

@ -45,6 +45,46 @@ def write_oebbook(oeb, path):
if f.endswith('.opf'): if f.endswith('.opf'):
return f return f
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False):
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
from calibre.utils.logging import default_log
log = log or default_log
plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub)
plumber.setup_options()
if pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
pathtoopf = plumber.input_plugin(inf,
plumber.opts, plumber.input_fmt, log, {}, tdir)
if not only_input_plugin:
# Run the HTML preprocess/parsing from the conversion pipeline as
# well
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
not hasattr(pathtoopf, 'manifest')):
if hasattr(pathtoopf, 'manifest'):
pathtoopf = write_oebbook(pathtoopf, tdir)
pathtoopf = create_oebbook(log, pathtoopf, plumber.opts)
if hasattr(pathtoopf, 'manifest'):
pathtoopf = write_oebbook(pathtoopf, tdir)
book_format = os.path.splitext(pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
book_format = 'KF8' + fs
return book_format, pathtoopf, plumber.input_fmt
def run_extract_book(*args, **kwargs):
from calibre.utils.ipc.simple_worker import fork_job
ans = fork_job('calibre.ebooks.oeb.iterator.book', 'extract_book', args=args, kwargs=kwargs, timeout=3000, no_output=True)
return ans['result']
class EbookIterator(BookmarksMixin): class EbookIterator(BookmarksMixin):
CHARACTERS_PER_PAGE = 1000 CHARACTERS_PER_PAGE = 1000
@ -93,45 +133,12 @@ class EbookIterator(BookmarksMixin):
''' Convert an ebook file into an exploded OEB book suitable for ''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. ''' display in viewers/preprocessing etc. '''
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
self.delete_on_exit = [] self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter') self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__() self.base = self._tdir.__enter__()
plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub) self.book_format, self.pathtoopf, input_fmt = run_extract_book(
plumber.setup_options() self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed)
if self.pathtoebook.lower().endswith('.opf'): self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
self.pathtoopf = plumber.input_plugin(inf,
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
# Run the HTML preprocess/parsing from the conversion pipeline as
# well
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
not hasattr(self.pathtoopf, 'manifest')):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
self.book_format = 'KF8' + fs
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language self.language = self.opf.language
if self.language: if self.language:
self.language = self.language.lower() self.language = self.language.lower()
@ -140,7 +147,7 @@ class EbookIterator(BookmarksMixin):
self.spine = [] self.spine = []
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
for i in ordered: for i in ordered:
spath = i.path spath = i.path
mt = None mt = None
@ -220,5 +227,3 @@ class EbookIterator(BookmarksMixin):
os.remove(x) os.remove(x)
except: except:
pass pass

View File

@ -17,7 +17,6 @@ from calibre.gui2 import (
Application, ORG_NAME, APP_UID, choose_files, info_dialog, error_dialog, Application, ORG_NAME, APP_UID, choose_files, info_dialog, error_dialog,
open_url, setup_gui_option_parser) open_url, setup_gui_option_parser)
from calibre.ebooks.oeb.iterator.book import EbookIterator from calibre.ebooks.oeb.iterator.book import EbookIterator
from calibre.ebooks import DRMError
from calibre.constants import islinux, filesystem_encoding from calibre.constants import islinux, filesystem_encoding
from calibre.utils.config import Config, StringConfig, JSONConfig from calibre.utils.config import Config, StringConfig, JSONConfig
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
@ -33,6 +32,7 @@ dprefs.defaults['word_lookups'] = {}
class Worker(Thread): class Worker(Thread):
def run(self): def run(self):
from calibre.utils.ipc.simple_worker import WorkerError
try: try:
Thread.run(self) Thread.run(self)
self.exception = self.traceback = None self.exception = self.traceback = None
@ -41,6 +41,9 @@ class Worker(Thread):
'This ebook is corrupted and cannot be opened. If you ' 'This ebook is corrupted and cannot be opened. If you '
'downloaded it from somewhere, try downloading it again.') 'downloaded it from somewhere, try downloading it again.')
self.traceback = '' self.traceback = ''
except WorkerError as err:
self.exception = Exception(_('Failed to read book, {0} click "Show Details" for more information').format(self.path_to_ebook))
self.traceback = err.orig_tb
except Exception as err: except Exception as err:
self.exception = err self.exception = err
self.traceback = traceback.format_exc() self.traceback = traceback.format_exc()
@ -863,19 +866,21 @@ class EbookViewer(MainWindow):
self.history.clear() self.history.clear()
self.open_progress_indicator(_('Loading ebook...')) self.open_progress_indicator(_('Loading ebook...'))
worker = Worker(target=partial(self.iterator.__enter__, view_kepub=True)) worker = Worker(target=partial(self.iterator.__enter__, view_kepub=True))
worker.path_to_ebook = pathtoebook
worker.start() worker.start()
while worker.isAlive(): while worker.isAlive():
worker.join(0.1) worker.join(0.1)
QApplication.processEvents() QApplication.processEvents()
if worker.exception is not None: if worker.exception is not None:
if isinstance(worker.exception, DRMError): tb = worker.traceback
if tb.strip().splitlines()[-1].startswith('DRMError:'):
from calibre.gui2.dialogs.drm_error import DRMErrorMessage from calibre.gui2.dialogs.drm_error import DRMErrorMessage
DRMErrorMessage(self).exec_() DRMErrorMessage(self).exec_()
else: else:
r = getattr(worker.exception, 'reason', worker.exception) r = getattr(worker.exception, 'reason', worker.exception)
error_dialog(self, _('Could not open ebook'), error_dialog(self, _('Could not open ebook'),
as_unicode(r) or _('Unknown error'), as_unicode(r) or _('Unknown error'),
det_msg=worker.traceback, show=True) det_msg=tb, show=True)
self.close_progress_indicator() self.close_progress_indicator()
else: else:
self.metadata.show_opf(self.iterator.opf, self.metadata.show_opf(self.iterator.opf,