CHM Input: Do not choke on CHM files with non ascii internal filenames on windows. Fixes #917696 (Several problems in handling CHM file)

This commit is contained in:
Kovid Goyal 2012-01-17 22:59:46 +05:30
parent b36b552ec6
commit ca2ccdcff7
2 changed files with 15 additions and 2 deletions

View File

@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
from calibre.constants import filesystem_encoding
class CHMInput(InputFormatPlugin):
@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir:
if not isinstance(tdir, unicode):
tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, re
import os, re, codecs
from calibre import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@ -99,8 +99,17 @@ class CHMReader(CHMFile):
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
html_files = set([])
try:
x = self.GetEncoding()
codecs.lookup(x)
enc = x
except:
enc = None
for path in self.Contents():
lpath = os.path.join(output_dir, path)
fpath = path
if not isinstance(path, unicode) and enc:
fpath = path.decode(enc)
lpath = os.path.join(output_dir, fpath)
self._ensure_dir(lpath)
try:
data = self.GetFile(path)
@ -123,6 +132,7 @@ class CHMReader(CHMFile):
self.log.warn('%r filename too long, skipping'%path)
continue
raise
if debug_dump:
import shutil
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))