CHM Input: Fix incorrect decoding for CHM files whose hhc file is also a content file. Fixes #1151721 (Private bug)

This commit is contained in:
Kovid Goyal 2013-03-07 22:58:52 +05:30
parent 55030cc0b0
commit d46af974bc
2 changed files with 10 additions and 5 deletions

View File

@ -53,6 +53,7 @@ class CHMReader(CHMFile):
self._playorder = 0
self._metadata = False
self._extracted = False
self.re_encoded_files = set()
# location of '.hhc' file, which is the CHM TOC.
if self.topics is None:
@ -147,8 +148,8 @@ class CHMReader(CHMFile):
f.write(data)
self._extracted = True
files = [x for x in os.listdir(output_dir) if
os.path.isfile(os.path.join(output_dir, x))]
files = [y for y in os.listdir(output_dir) if
os.path.isfile(os.path.join(output_dir, y))]
if self.hhc_path not in files:
for f in files:
if f.lower() == self.hhc_path.lower():
@ -249,7 +250,9 @@ class CHMReader(CHMFile):
pass
# do not prettify, it would reformat the <pre> tags!
try:
return str(soup)
ans = str(soup)
self.re_encoded_files.add(os.path.abspath(htmlpath))
return ans
except RuntimeError:
return data

View File

@ -25,7 +25,6 @@ class CHMInput(InputFormatPlugin):
self._chm_reader = rdr
return rdr.hhc_path
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.chm.metadata import get_metadata_from_reader
from calibre.customize.ui import plugin_for_input_format
@ -63,7 +62,10 @@ class CHMInput(InputFormatPlugin):
options.debug_pipeline = None
options.input_encoding = 'utf-8'
htmlpath, toc = self._create_html_root(mainpath, log, encoding)
uenc = encoding
if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files:
uenc = 'utf-8'
htmlpath, toc = self._create_html_root(mainpath, log, uenc)
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
options.debug_pipeline = odi
if toc.count() > 1: