EPUB Input: Handle EPUB files with components encoded in an encoding other than UTF-8 correctly, though why anyone would do that is a mystery. Fixes #7196 (Non UTF-8 in ePub)

This commit is contained in:
Kovid Goyal 2010-10-17 15:34:29 -06:00
parent c0609cd4cd
commit 5d2772eb73
3 changed files with 8 additions and 1 deletions

View File

@ -120,6 +120,11 @@ class InputFormatPlugin(Plugin):
#: to make its output suitable for viewing
for_viewer = False
#: The encoding that this input plugin creates files in. A value of
#: None means that the encoding is undefined and must be
#: detected individually
output_encoding = 'utf-8'
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :attr:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.

View File

@ -838,7 +838,8 @@ OptionRecommendation(name='timestamp',
self.opts_to_mi(self.user_metadata)
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
self.input_plugin)
self.input_plugin,
encoding=self.input_plugin.output_encoding)
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
self.opts.is_image_collection = self.input_plugin.is_image_collection
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)

View File

@ -16,6 +16,7 @@ class EPUBInput(InputFormatPlugin):
author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML'
file_types = set(['epub'])
output_encoding = None
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])