mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file. Fixes #9173 ("Input character encoding" is useless for chm file.)
This commit is contained in:
parent
0285c22a84
commit
24d8f758e7
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
rdr = CHMReader(chm_path, log, self.opts)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
self._chm_reader = rdr
|
||||
@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
self.opts = options
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
options.input_encoding = 'utf-8'
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
|
@ -40,13 +40,14 @@ class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
def __init__(self, input, log, opts):
|
||||
CHMFile.__init__(self)
|
||||
if isinstance(input, unicode):
|
||||
input = input.encode(filesystem_encoding)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self.opts = opts
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
@ -151,6 +152,8 @@ class CHMReader(CHMFile):
|
||||
break
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
if self.opts.input_encoding:
|
||||
data = data.decode(self.opts.input_encoding)
|
||||
try:
|
||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||
soup = BeautifulSoup(data)
|
||||
|
Loading…
x
Reference in New Issue
Block a user