diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py index 820178408c..e3c3deba78 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/chm/input.py @@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin): log.debug('stream.name=%s' % stream.name) mainname = self._chmtohtml(tdir, chm_name, no_images, log) mainpath = os.path.join(tdir, mainname) - #raw_input() metadata = get_metadata_from_reader(self._chm_reader) @@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin): log.debug('Found %d section nodes' % len(chapters)) htmlpath = os.path.splitext(hhcpath)[0] + ".html" f = open(htmlpath, 'wb') - f.write('\n') - if chapters: + f.write('\n') path0 = chapters[0][1] subpath = os.path.dirname(path0) @@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin): url = url.encode('utf-8') f.write(url) - f.write("") + f.write("") + else: + f.write(hhcdata) f.close() return htmlpath diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index d0a81e8e7f..67a2d36607 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -8,7 +8,7 @@ import os, re from mimetypes import guess_type as guess_mimetype from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString -from calibre.constants import iswindows +from calibre.constants import iswindows, filesystem_encoding from calibre.utils.chm.chm import CHMFile from calibre.utils.chm.chmlib import ( CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, @@ -78,6 +78,8 @@ class CHMError(Exception): class CHMReader(CHMFile): def __init__(self, input, log): CHMFile.__init__(self) + if isinstance(input, unicode): + input = input.encode(filesystem_encoding) if not self.LoadCHM(input): raise CHMError("Unable to open CHM file '%s'"%(input,)) self.log = log @@ -91,7 +93,6 @@ class CHMReader(CHMFile): self.root, ext = os.path.splitext(self.topics.lstrip('/')) self.hhc_path = self.root + ".hhc" - def _parse_toc(self, ul, basedir=os.getcwdu()): toc = TOC(play_order=self._playorder, base_path=basedir, text='') self._playorder += 1 @@ -152,6 +153,8 @@ class CHMReader(CHMFile): if f.lower() == self.hhc_path.lower(): self.hhc_path = f break + if self.hhc_path not in files and files: + self.hhc_path = files[0] def _reformat(self, data): try: @@ -159,7 +162,7 @@ class CHMReader(CHMFile): soup = BeautifulSoup(data) except ValueError: # hit some strange encoding problems... - print "Unable to parse html for cleaning, leaving it :(" + self.log.exception("Unable to parse html for cleaning, leaving it") return data # nuke javascript... [s.extract() for s in soup('script')]