CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file. Fixes #9173 ("Input character encoding" is useless for chm file.)

This commit is contained in:
Kovid Goyal 2011-03-07 22:08:48 -07:00
parent 0285c22a84
commit 24d8f758e7
2 changed files with 7 additions and 3 deletions

View File

@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
def _chmtohtml(self, output_dir, chm_path, no_images, log):
from calibre.ebooks.chm.reader import CHMReader
log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log)
rdr = CHMReader(chm_path, log, self.opts)
log.debug('Extracting CHM to %s' % output_dir)
rdr.extract_content(output_dir)
self._chm_reader = rdr
@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.chm.metadata import get_metadata_from_reader
from calibre.customize.ui import plugin_for_input_format
self.opts = options
log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir:
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
no_images = False #options.no_images
chm_name = stream.name
#chm_data = stream.read()
@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin):
odi = options.debug_pipeline
options.debug_pipeline = None
options.input_encoding = 'utf-8'
# try a custom conversion:
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
# try using html converter:

View File

@ -40,13 +40,14 @@ class CHMError(Exception):
pass
class CHMReader(CHMFile):
def __init__(self, input, log):
def __init__(self, input, log, opts):
CHMFile.__init__(self)
if isinstance(input, unicode):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self.opts = opts
self._sourcechm = input
self._contents = None
self._playorder = 0
@ -151,6 +152,8 @@ class CHMReader(CHMFile):
break
def _reformat(self, data, htmlpath):
if self.opts.input_encoding:
data = data.decode(self.opts.input_encoding)
try:
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
soup = BeautifulSoup(data)