mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Misc CHM Input fixes
Mostly to deal with chm files with broken filename encoding
This commit is contained in:
parent
c2062a8b3b
commit
4da23549bf
@ -97,9 +97,14 @@ class CHMReader(CHMFile):
|
|||||||
return toc
|
return toc
|
||||||
|
|
||||||
def ResolveObject(self, path):
|
def ResolveObject(self, path):
|
||||||
|
opath = path
|
||||||
if not isinstance(path, bytes):
|
if not isinstance(path, bytes):
|
||||||
path = path.encode(self.chm_encoding)
|
path = path.encode(self.chm_encoding)
|
||||||
return CHMFile.ResolveObject(self, path)
|
ans = CHMFile.ResolveObject(self, path)
|
||||||
|
if ans[0] != chmlib.CHM_RESOLVE_SUCCESS and not isinstance(opath, bytes):
|
||||||
|
path = opath.encode('utf-8')
|
||||||
|
ans = CHMFile.ResolveObject(self, path)
|
||||||
|
return ans
|
||||||
|
|
||||||
def GetFile(self, path):
|
def GetFile(self, path):
|
||||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||||
@ -280,7 +285,11 @@ class CHMReader(CHMFile):
|
|||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
def get_paths(chm, ui, ctx):
|
def get_paths(chm, ui, ctx):
|
||||||
path = as_unicode(ui.path, self.chm_encoding)
|
try:
|
||||||
|
path = as_unicode(ui.path, self.chm_encoding)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
path = as_unicode(ui.path, 'utf-8')
|
||||||
|
|
||||||
# skip directories
|
# skip directories
|
||||||
# note this path refers to the internal CHM structure
|
# note this path refers to the internal CHM structure
|
||||||
if path[-1] != '/':
|
if path[-1] != '/':
|
||||||
|
@ -10,7 +10,7 @@ import os
|
|||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type, as_bytes
|
||||||
|
|
||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
@ -170,7 +170,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
pretty_print=True)
|
pretty_print=True)
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
else:
|
else:
|
||||||
f.write(hhcdata)
|
f.write(as_bytes(hhcdata))
|
||||||
return htmlpath, toc
|
return htmlpath, toc
|
||||||
|
|
||||||
def _read_file(self, name):
|
def _read_file(self, name):
|
||||||
@ -180,7 +180,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def add_node(self, node, toc, ancestor_map):
|
def add_node(self, node, toc, ancestor_map):
|
||||||
from calibre.ebooks.chm.reader import match_string
|
from calibre.ebooks.chm.reader import match_string
|
||||||
if match_string(node.attrib['type'], 'text/sitemap'):
|
if match_string(node.attrib.get('type', ''), 'text/sitemap'):
|
||||||
p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]')
|
p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]')
|
||||||
parent = p[0] if p else None
|
parent = p[0] if p else None
|
||||||
toc = ancestor_map.get(parent, toc)
|
toc = ancestor_map.get(parent, toc)
|
||||||
|
@ -506,7 +506,7 @@ class CHMFile:
|
|||||||
if not self.topics:
|
if not self.topics:
|
||||||
self.topics = self.GetString(text, toc_index)
|
self.topics = self.GetString(text, toc_index)
|
||||||
if not self.topics.startswith(b"/"):
|
if not self.topics.startswith(b"/"):
|
||||||
self.topics = "b/" + self.topics
|
self.topics = b"/" + self.topics
|
||||||
|
|
||||||
if not self.index:
|
if not self.index:
|
||||||
self.index = self.GetString(text, idx_index)
|
self.index = self.GetString(text, idx_index)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user