From 24d8f758e7fa94c17cb749853b3ae32a59c645ef Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Mar 2011 22:08:48 -0700 Subject: [PATCH] CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file. Fixes #9173 ("Input character encoding" is useless for chm file.) --- src/calibre/ebooks/chm/input.py | 5 +++-- src/calibre/ebooks/chm/reader.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py index 89efa2b4d1..f55a76d67e 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/chm/input.py @@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin): def _chmtohtml(self, output_dir, chm_path, no_images, log): from calibre.ebooks.chm.reader import CHMReader log.debug('Opening CHM file') - rdr = CHMReader(chm_path, log) + rdr = CHMReader(chm_path, log, self.opts) log.debug('Extracting CHM to %s' % output_dir) rdr.extract_content(output_dir) self._chm_reader = rdr @@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.chm.metadata import get_metadata_from_reader from calibre.customize.ui import plugin_for_input_format + self.opts = options log.debug('Processing CHM...') with TemporaryDirectory('_chm2oeb') as tdir: html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(options, opt.option.name, opt.recommended_value) - options.input_encoding = 'utf-8' no_images = False #options.no_images chm_name = stream.name #chm_data = stream.read() @@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin): odi = options.debug_pipeline options.debug_pipeline = None + options.input_encoding = 'utf-8' # try a custom conversion: #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata) # try using html converter: diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index fe8b4fdbde..34d228ef3b 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -40,13 +40,14 @@ class CHMError(Exception): pass class CHMReader(CHMFile): - def __init__(self, input, log): + def __init__(self, input, log, opts): CHMFile.__init__(self) if isinstance(input, unicode): input = input.encode(filesystem_encoding) if not self.LoadCHM(input): raise CHMError("Unable to open CHM file '%s'"%(input,)) self.log = log + self.opts = opts self._sourcechm = input self._contents = None self._playorder = 0 @@ -151,6 +152,8 @@ class CHMReader(CHMFile): break def _reformat(self, data, htmlpath): + if self.opts.input_encoding: + data = data.decode(self.opts.input_encoding) try: data = xml_to_unicode(data, strip_encoding_pats=True)[0] soup = BeautifulSoup(data)