Misc CHM Input fixes

Mostly to deal with chm files with broken filename encoding
This commit is contained in:
Kovid Goyal 2019-07-30 19:31:06 +05:30
parent c2062a8b3b
commit 4da23549bf
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 15 additions and 6 deletions

View File

@ -97,9 +97,14 @@ class CHMReader(CHMFile):
return toc
def ResolveObject(self, path):
opath = path
if not isinstance(path, bytes):
path = path.encode(self.chm_encoding)
return CHMFile.ResolveObject(self, path)
ans = CHMFile.ResolveObject(self, path)
if ans[0] != chmlib.CHM_RESOLVE_SUCCESS and not isinstance(opath, bytes):
path = opath.encode('utf-8')
ans = CHMFile.ResolveObject(self, path)
return ans
def GetFile(self, path):
# have to have abs paths for ResolveObject, but Contents() deliberately
@ -280,7 +285,11 @@ class CHMReader(CHMFile):
paths = []
def get_paths(chm, ui, ctx):
path = as_unicode(ui.path, self.chm_encoding)
try:
path = as_unicode(ui.path, self.chm_encoding)
except UnicodeDecodeError:
path = as_unicode(ui.path, 'utf-8')
# skip directories
# note this path refers to the internal CHM structure
if path[-1] != '/':

View File

@ -10,7 +10,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type
from polyglot.builtins import unicode_type, as_bytes
class CHMInput(InputFormatPlugin):
@ -170,7 +170,7 @@ class CHMInput(InputFormatPlugin):
pretty_print=True)
f.write(raw)
else:
f.write(hhcdata)
f.write(as_bytes(hhcdata))
return htmlpath, toc
def _read_file(self, name):
@ -180,7 +180,7 @@ class CHMInput(InputFormatPlugin):
def add_node(self, node, toc, ancestor_map):
from calibre.ebooks.chm.reader import match_string
if match_string(node.attrib['type'], 'text/sitemap'):
if match_string(node.attrib.get('type', ''), 'text/sitemap'):
p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]')
parent = p[0] if p else None
toc = ancestor_map.get(parent, toc)

View File

@ -506,7 +506,7 @@ class CHMFile:
if not self.topics:
self.topics = self.GetString(text, toc_index)
if not self.topics.startswith(b"/"):
self.topics = "b/" + self.topics
self.topics = b"/" + self.topics
if not self.index:
self.index = self.GetString(text, idx_index)