mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: Fix a regression that broke processing of some CHM files
Fixes #1831511 [Can't open chm file](https://bugs.launchpad.net/calibre/+bug/1831511) Bug was introduced due to adding of unicode_literals. Also fix CHM input on python3
This commit is contained in:
parent
a7fd7a0aee
commit
3f903cbdd1
@ -84,7 +84,7 @@
|
||||
"windows_libraries": "ChmLib",
|
||||
"inc_dirs": "!chmlib_inc_dirs",
|
||||
"lib_dirs": "!chmlib_lib_dirs",
|
||||
"defines": "SWIG_COBJECT_TYPES"
|
||||
"defines": "SWIG_COBJECT_TYPES SWIG_PYTHON_STRICT_BYTE_CHAR"
|
||||
},
|
||||
{
|
||||
"name": "lzx",
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, re, codecs
|
||||
import os, re
|
||||
|
||||
from calibre import guess_type as guess_mimetype
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
@ -15,7 +15,7 @@ from calibre.utils.chm.chm import CHMFile
|
||||
from calibre.constants import plugins
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import unicode_type, getcwd
|
||||
from polyglot.builtins import unicode_type, getcwd, as_unicode
|
||||
|
||||
|
||||
chmlib, chmlib_err = plugins['chmlib']
|
||||
@ -56,12 +56,17 @@ class CHMReader(CHMFile):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self.input_encoding = input_encoding
|
||||
self.chm_encoding = self.get_encoding() or 'cp1252'
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
self._metadata = False
|
||||
self._extracted = False
|
||||
self.re_encoded_files = set()
|
||||
if self.home:
|
||||
self.home = as_unicode(self.home, self.chm_encoding)
|
||||
if self.topics:
|
||||
self.topics = as_unicode(self.topics, self.chm_encoding)
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
if self.topics is None:
|
||||
@ -91,6 +96,11 @@ class CHMReader(CHMFile):
|
||||
# print toc
|
||||
return toc
|
||||
|
||||
def ResolveObject(self, path):
|
||||
if not isinstance(path, bytes):
|
||||
path = path.encode(self.chm_encoding)
|
||||
return CHMFile.ResolveObject(self, path)
|
||||
|
||||
def GetFile(self, path):
|
||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||
@ -107,12 +117,7 @@ class CHMReader(CHMFile):
|
||||
|
||||
def ExtractFiles(self, output_dir=getcwd(), debug_dump=False):
|
||||
html_files = set()
|
||||
try:
|
||||
x = self.get_encoding()
|
||||
codecs.lookup(x)
|
||||
enc = x
|
||||
except:
|
||||
enc = 'cp1252'
|
||||
enc = self.chm_encoding
|
||||
for path in self.Contents():
|
||||
fpath = path
|
||||
if not isinstance(path, unicode_type):
|
||||
@ -275,11 +280,12 @@ class CHMReader(CHMFile):
|
||||
paths = []
|
||||
|
||||
def get_paths(chm, ui, ctx):
|
||||
path = as_unicode(ui.path, self.chm_encoding)
|
||||
# skip directories
|
||||
# note this path refers to the internal CHM structure
|
||||
if ui.path[-1] != '/':
|
||||
if path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
paths.append(path.lstrip('/'))
|
||||
chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
@ -30,7 +30,7 @@ import codecs
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.constants import plugins, filesystem_encoding
|
||||
from polyglot.builtins import long_type
|
||||
|
||||
chmlib, chmlib_err = plugins['chmlib']
|
||||
@ -226,7 +226,10 @@ class CHMFile:
|
||||
if self.filename is not None:
|
||||
self.CloseCHM()
|
||||
|
||||
self.file = chmlib.chm_open(archiveName)
|
||||
path = archiveName
|
||||
if not isinstance(path, bytes):
|
||||
path = path.encode(filesystem_encoding)
|
||||
self.file = chmlib.chm_open(path)
|
||||
if self.file is None:
|
||||
return 0
|
||||
|
||||
@ -382,11 +385,8 @@ class CHMFile:
|
||||
The UnitInfo is used to retrieve the document contents
|
||||
'''
|
||||
if self.file:
|
||||
# path = os.path.abspath(document)
|
||||
path = document
|
||||
return chmlib.chm_resolve_object(self.file, path)
|
||||
else:
|
||||
return (1, None)
|
||||
return chmlib.chm_resolve_object(self.file, document)
|
||||
return 1, None
|
||||
|
||||
def RetrieveObject(self, ui, start=-1, length=-1):
|
||||
'''Retrieves the contents of a document.
|
||||
@ -442,7 +442,7 @@ class CHMFile:
|
||||
if ans:
|
||||
try:
|
||||
codecs.lookup(ans)
|
||||
except:
|
||||
except Exception:
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
|
@ -3827,18 +3827,18 @@ int dummy_enumerator (struct chmFile *h,
|
||||
if (arglist) {
|
||||
result = PyEval_CallObject(my_callback, arglist);
|
||||
Py_DECREF(arglist);
|
||||
Py_DECREF(result);
|
||||
|
||||
Py_DECREF(py_h);
|
||||
Py_DECREF(py_ui);
|
||||
|
||||
if (result == NULL) {
|
||||
return 0; /* Pass error back */
|
||||
} else {
|
||||
return 1;
|
||||
PyErr_Print();
|
||||
return 0; /* Pass error back */
|
||||
}
|
||||
} else
|
||||
return 0;
|
||||
Py_DECREF(result);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -60,20 +60,20 @@ int dummy_enumerator (struct chmFile *h,
|
||||
/* Time to call the callback */
|
||||
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
|
||||
if (arglist) {
|
||||
result = PyEval_CallObject(my_callback, arglist);
|
||||
Py_DECREF(arglist);
|
||||
Py_DECREF(result);
|
||||
|
||||
Py_DECREF(py_h);
|
||||
Py_DECREF(py_ui);
|
||||
|
||||
if (result == NULL) {
|
||||
return 0; /* Pass error back */
|
||||
} else {
|
||||
result = PyEval_CallObject(my_callback, arglist);
|
||||
Py_DECREF(arglist);
|
||||
|
||||
Py_DECREF(py_h);
|
||||
Py_DECREF(py_ui);
|
||||
|
||||
if (result == NULL) {
|
||||
PyErr_Print();
|
||||
return 0; /* Pass error back */
|
||||
}
|
||||
Py_DECREF(result);
|
||||
return 1;
|
||||
}
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
%}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user