mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: Handle chm files that contain files with url unsafe filenames. Fixes #1100610 (Error in "Table of contents" when convert CHM to epub)
This commit is contained in:
parent
b9581e5905
commit
79823fbf61
@ -58,9 +58,9 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
metadata = get_metadata_from_reader(self._chm_reader)
|
metadata = get_metadata_from_reader(self._chm_reader)
|
||||||
self._chm_reader.CloseCHM()
|
self._chm_reader.CloseCHM()
|
||||||
#print tdir
|
# print tdir, mainpath
|
||||||
#from calibre import ipython
|
# from calibre import ipython
|
||||||
#ipython()
|
# ipython()
|
||||||
|
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
options.input_encoding = 'utf-8'
|
options.input_encoding = 'utf-8'
|
||||||
@ -143,6 +143,8 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def _create_html_root(self, hhcpath, log):
|
def _create_html_root(self, hhcpath, log):
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from urllib import unquote as _unquote
|
||||||
|
from calibre.ebooks.oeb.base import urlquote
|
||||||
hhcdata = self._read_file(hhcpath)
|
hhcdata = self._read_file(hhcpath)
|
||||||
hhcroot = html.fromstring(hhcdata)
|
hhcroot = html.fromstring(hhcdata)
|
||||||
chapters = self._process_nodes(hhcroot)
|
chapters = self._process_nodes(hhcroot)
|
||||||
@ -152,23 +154,41 @@ class CHMInput(InputFormatPlugin):
|
|||||||
#print "============================="
|
#print "============================="
|
||||||
log.debug('Found %d section nodes' % len(chapters))
|
log.debug('Found %d section nodes' % len(chapters))
|
||||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||||
|
base = os.path.dirname(os.path.abspath(htmlpath))
|
||||||
|
|
||||||
|
def unquote(x):
|
||||||
|
if isinstance(x, unicode):
|
||||||
|
x = x.encode('utf-8')
|
||||||
|
return _unquote(x).decode('utf-8')
|
||||||
|
|
||||||
|
def unquote_path(x):
|
||||||
|
y = unquote(x)
|
||||||
|
if (not os.path.exists(os.path.join(base, x)) and
|
||||||
|
os.path.exists(os.path.join(base, y))):
|
||||||
|
x = y
|
||||||
|
return x
|
||||||
|
|
||||||
with open(htmlpath, 'wb') as f:
|
with open(htmlpath, 'wb') as f:
|
||||||
if chapters:
|
if chapters:
|
||||||
f.write('<html><head><meta http-equiv="Content-type"'
|
f.write('<html><head><meta http-equiv="Content-type"'
|
||||||
' content="text/html;charset=UTF-8" /></head><body>\n')
|
' content="text/html;charset=UTF-8" /></head><body>\n')
|
||||||
path0 = chapters[0][1]
|
path0 = chapters[0][1]
|
||||||
|
path0 = unquote_path(path0)
|
||||||
subpath = os.path.dirname(path0)
|
subpath = os.path.dirname(path0)
|
||||||
base = os.path.dirname(f.name)
|
base = os.path.dirname(f.name)
|
||||||
|
|
||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
title = chapter[0]
|
title = chapter[0]
|
||||||
rsrcname = os.path.basename(chapter[1])
|
raw = unquote_path(chapter[1])
|
||||||
|
rsrcname = os.path.basename(raw)
|
||||||
rsrcpath = os.path.join(subpath, rsrcname)
|
rsrcpath = os.path.join(subpath, rsrcname)
|
||||||
if (not os.path.exists(os.path.join(base, rsrcpath)) and
|
if (not os.path.exists(os.path.join(base, rsrcpath)) and
|
||||||
os.path.exists(os.path.join(base, chapter[1]))):
|
os.path.exists(os.path.join(base, raw))):
|
||||||
rsrcpath = chapter[1]
|
rsrcpath = raw
|
||||||
|
|
||||||
# title should already be url encoded
|
# title should already be url encoded
|
||||||
|
if '%' not in rsrcpath:
|
||||||
|
rsrcpath = urlquote(rsrcpath)
|
||||||
url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\n"
|
url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\n"
|
||||||
if isinstance(url, unicode):
|
if isinstance(url, unicode):
|
||||||
url = url.encode('utf-8')
|
url = url.encode('utf-8')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user