mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file. Fixes #9173 ("Input character encoding" is useless for chm file.)
This commit is contained in:
parent
0285c22a84
commit
24d8f758e7
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||||
from calibre.ebooks.chm.reader import CHMReader
|
from calibre.ebooks.chm.reader import CHMReader
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log)
|
rdr = CHMReader(chm_path, log, self.opts)
|
||||||
log.debug('Extracting CHM to %s' % output_dir)
|
log.debug('Extracting CHM to %s' % output_dir)
|
||||||
rdr.extract_content(output_dir)
|
rdr.extract_content(output_dir)
|
||||||
self._chm_reader = rdr
|
self._chm_reader = rdr
|
||||||
@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin):
|
|||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
self.opts = options
|
||||||
|
|
||||||
log.debug('Processing CHM...')
|
log.debug('Processing CHM...')
|
||||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
for opt in html_input.options:
|
for opt in html_input.options:
|
||||||
setattr(options, opt.option.name, opt.recommended_value)
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
options.input_encoding = 'utf-8'
|
|
||||||
no_images = False #options.no_images
|
no_images = False #options.no_images
|
||||||
chm_name = stream.name
|
chm_name = stream.name
|
||||||
#chm_data = stream.read()
|
#chm_data = stream.read()
|
||||||
@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
|
options.input_encoding = 'utf-8'
|
||||||
# try a custom conversion:
|
# try a custom conversion:
|
||||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||||
# try using html converter:
|
# try using html converter:
|
||||||
|
@ -40,13 +40,14 @@ class CHMError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class CHMReader(CHMFile):
|
class CHMReader(CHMFile):
|
||||||
def __init__(self, input, log):
|
def __init__(self, input, log, opts):
|
||||||
CHMFile.__init__(self)
|
CHMFile.__init__(self)
|
||||||
if isinstance(input, unicode):
|
if isinstance(input, unicode):
|
||||||
input = input.encode(filesystem_encoding)
|
input = input.encode(filesystem_encoding)
|
||||||
if not self.LoadCHM(input):
|
if not self.LoadCHM(input):
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
self.log = log
|
self.log = log
|
||||||
|
self.opts = opts
|
||||||
self._sourcechm = input
|
self._sourcechm = input
|
||||||
self._contents = None
|
self._contents = None
|
||||||
self._playorder = 0
|
self._playorder = 0
|
||||||
@ -151,6 +152,8 @@ class CHMReader(CHMFile):
|
|||||||
break
|
break
|
||||||
|
|
||||||
def _reformat(self, data, htmlpath):
|
def _reformat(self, data, htmlpath):
|
||||||
|
if self.opts.input_encoding:
|
||||||
|
data = data.decode(self.opts.input_encoding)
|
||||||
try:
|
try:
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user