Fix #6087 (Error running conversion)

This commit is contained in:
Kovid Goyal 2010-07-09 11:42:47 -06:00
parent 104ddac228
commit 29afb523cd
2 changed files with 11 additions and 8 deletions

View File

@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
log.debug('stream.name=%s' % stream.name) log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log) mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname) mainpath = os.path.join(tdir, mainname)
#raw_input()
metadata = get_metadata_from_reader(self._chm_reader) metadata = get_metadata_from_reader(self._chm_reader)
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
log.debug('Found %d section nodes' % len(chapters)) log.debug('Found %d section nodes' % len(chapters))
htmlpath = os.path.splitext(hhcpath)[0] + ".html" htmlpath = os.path.splitext(hhcpath)[0] + ".html"
f = open(htmlpath, 'wb') f = open(htmlpath, 'wb')
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
if chapters: if chapters:
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
path0 = chapters[0][1] path0 = chapters[0][1]
subpath = os.path.dirname(path0) subpath = os.path.dirname(path0)
@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin):
url = url.encode('utf-8') url = url.encode('utf-8')
f.write(url) f.write(url)
f.write("</body></html>") f.write("</body></html>")
else:
f.write(hhcdata)
f.close() f.close()
return htmlpath return htmlpath

View File

@ -8,7 +8,7 @@ import os, re
from mimetypes import guess_type as guess_mimetype from mimetypes import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.constants import iswindows from calibre.constants import iswindows, filesystem_encoding
from calibre.utils.chm.chm import CHMFile from calibre.utils.chm.chm import CHMFile
from calibre.utils.chm.chmlib import ( from calibre.utils.chm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -78,6 +78,8 @@ class CHMError(Exception):
class CHMReader(CHMFile): class CHMReader(CHMFile):
def __init__(self, input, log): def __init__(self, input, log):
CHMFile.__init__(self) CHMFile.__init__(self)
if isinstance(input, unicode):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input): if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,)) raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log self.log = log
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
self.root, ext = os.path.splitext(self.topics.lstrip('/')) self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc" self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()): def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='') toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1 self._playorder += 1
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
if f.lower() == self.hhc_path.lower(): if f.lower() == self.hhc_path.lower():
self.hhc_path = f self.hhc_path = f
break break
if self.hhc_path not in files and files:
self.hhc_path = files[0]
def _reformat(self, data): def _reformat(self, data):
try: try:
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
except ValueError: except ValueError:
# hit some strange encoding problems... # hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :(" self.log.exception("Unable to parse html for cleaning, leaving it")
return data return data
# nuke javascript... # nuke javascript...
[s.extract() for s in soup('script')] [s.extract() for s in soup('script')]