mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: Fix a regression that broke processing of some CHM files
Fixes #1831511 [Can't open chm file](https://bugs.launchpad.net/calibre/+bug/1831511) Bug was introduced due to adding of unicode_literals. Also fix CHM input on python3
This commit is contained in:
parent
a7fd7a0aee
commit
3f903cbdd1
@ -84,7 +84,7 @@
|
|||||||
"windows_libraries": "ChmLib",
|
"windows_libraries": "ChmLib",
|
||||||
"inc_dirs": "!chmlib_inc_dirs",
|
"inc_dirs": "!chmlib_inc_dirs",
|
||||||
"lib_dirs": "!chmlib_lib_dirs",
|
"lib_dirs": "!chmlib_lib_dirs",
|
||||||
"defines": "SWIG_COBJECT_TYPES"
|
"defines": "SWIG_COBJECT_TYPES SWIG_PYTHON_STRICT_BYTE_CHAR"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "lzx",
|
"name": "lzx",
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
import os, re, codecs
|
import os, re
|
||||||
|
|
||||||
from calibre import guess_type as guess_mimetype
|
from calibre import guess_type as guess_mimetype
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
@ -15,7 +15,7 @@ from calibre.utils.chm.chm import CHMFile
|
|||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from polyglot.builtins import unicode_type, getcwd
|
from polyglot.builtins import unicode_type, getcwd, as_unicode
|
||||||
|
|
||||||
|
|
||||||
chmlib, chmlib_err = plugins['chmlib']
|
chmlib, chmlib_err = plugins['chmlib']
|
||||||
@ -56,12 +56,17 @@ class CHMReader(CHMFile):
|
|||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
self.log = log
|
self.log = log
|
||||||
self.input_encoding = input_encoding
|
self.input_encoding = input_encoding
|
||||||
|
self.chm_encoding = self.get_encoding() or 'cp1252'
|
||||||
self._sourcechm = input
|
self._sourcechm = input
|
||||||
self._contents = None
|
self._contents = None
|
||||||
self._playorder = 0
|
self._playorder = 0
|
||||||
self._metadata = False
|
self._metadata = False
|
||||||
self._extracted = False
|
self._extracted = False
|
||||||
self.re_encoded_files = set()
|
self.re_encoded_files = set()
|
||||||
|
if self.home:
|
||||||
|
self.home = as_unicode(self.home, self.chm_encoding)
|
||||||
|
if self.topics:
|
||||||
|
self.topics = as_unicode(self.topics, self.chm_encoding)
|
||||||
|
|
||||||
# location of '.hhc' file, which is the CHM TOC.
|
# location of '.hhc' file, which is the CHM TOC.
|
||||||
if self.topics is None:
|
if self.topics is None:
|
||||||
@ -91,6 +96,11 @@ class CHMReader(CHMFile):
|
|||||||
# print toc
|
# print toc
|
||||||
return toc
|
return toc
|
||||||
|
|
||||||
|
def ResolveObject(self, path):
|
||||||
|
if not isinstance(path, bytes):
|
||||||
|
path = path.encode(self.chm_encoding)
|
||||||
|
return CHMFile.ResolveObject(self, path)
|
||||||
|
|
||||||
def GetFile(self, path):
|
def GetFile(self, path):
|
||||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||||
@ -107,12 +117,7 @@ class CHMReader(CHMFile):
|
|||||||
|
|
||||||
def ExtractFiles(self, output_dir=getcwd(), debug_dump=False):
|
def ExtractFiles(self, output_dir=getcwd(), debug_dump=False):
|
||||||
html_files = set()
|
html_files = set()
|
||||||
try:
|
enc = self.chm_encoding
|
||||||
x = self.get_encoding()
|
|
||||||
codecs.lookup(x)
|
|
||||||
enc = x
|
|
||||||
except:
|
|
||||||
enc = 'cp1252'
|
|
||||||
for path in self.Contents():
|
for path in self.Contents():
|
||||||
fpath = path
|
fpath = path
|
||||||
if not isinstance(path, unicode_type):
|
if not isinstance(path, unicode_type):
|
||||||
@ -275,11 +280,12 @@ class CHMReader(CHMFile):
|
|||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
def get_paths(chm, ui, ctx):
|
def get_paths(chm, ui, ctx):
|
||||||
|
path = as_unicode(ui.path, self.chm_encoding)
|
||||||
# skip directories
|
# skip directories
|
||||||
# note this path refers to the internal CHM structure
|
# note this path refers to the internal CHM structure
|
||||||
if ui.path[-1] != '/':
|
if path[-1] != '/':
|
||||||
# and make paths relative
|
# and make paths relative
|
||||||
paths.append(ui.path.lstrip('/'))
|
paths.append(path.lstrip('/'))
|
||||||
chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
|
chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||||
self._contents = paths
|
self._contents = paths
|
||||||
return self._contents
|
return self._contents
|
||||||
|
@ -30,7 +30,7 @@ import codecs
|
|||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from calibre.constants import plugins
|
from calibre.constants import plugins, filesystem_encoding
|
||||||
from polyglot.builtins import long_type
|
from polyglot.builtins import long_type
|
||||||
|
|
||||||
chmlib, chmlib_err = plugins['chmlib']
|
chmlib, chmlib_err = plugins['chmlib']
|
||||||
@ -226,7 +226,10 @@ class CHMFile:
|
|||||||
if self.filename is not None:
|
if self.filename is not None:
|
||||||
self.CloseCHM()
|
self.CloseCHM()
|
||||||
|
|
||||||
self.file = chmlib.chm_open(archiveName)
|
path = archiveName
|
||||||
|
if not isinstance(path, bytes):
|
||||||
|
path = path.encode(filesystem_encoding)
|
||||||
|
self.file = chmlib.chm_open(path)
|
||||||
if self.file is None:
|
if self.file is None:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@ -382,11 +385,8 @@ class CHMFile:
|
|||||||
The UnitInfo is used to retrieve the document contents
|
The UnitInfo is used to retrieve the document contents
|
||||||
'''
|
'''
|
||||||
if self.file:
|
if self.file:
|
||||||
# path = os.path.abspath(document)
|
return chmlib.chm_resolve_object(self.file, document)
|
||||||
path = document
|
return 1, None
|
||||||
return chmlib.chm_resolve_object(self.file, path)
|
|
||||||
else:
|
|
||||||
return (1, None)
|
|
||||||
|
|
||||||
def RetrieveObject(self, ui, start=-1, length=-1):
|
def RetrieveObject(self, ui, start=-1, length=-1):
|
||||||
'''Retrieves the contents of a document.
|
'''Retrieves the contents of a document.
|
||||||
@ -442,7 +442,7 @@ class CHMFile:
|
|||||||
if ans:
|
if ans:
|
||||||
try:
|
try:
|
||||||
codecs.lookup(ans)
|
codecs.lookup(ans)
|
||||||
except:
|
except Exception:
|
||||||
ans = None
|
ans = None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -3827,18 +3827,18 @@ int dummy_enumerator (struct chmFile *h,
|
|||||||
if (arglist) {
|
if (arglist) {
|
||||||
result = PyEval_CallObject(my_callback, arglist);
|
result = PyEval_CallObject(my_callback, arglist);
|
||||||
Py_DECREF(arglist);
|
Py_DECREF(arglist);
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_DECREF(py_h);
|
Py_DECREF(py_h);
|
||||||
Py_DECREF(py_ui);
|
Py_DECREF(py_ui);
|
||||||
|
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
return 0; /* Pass error back */
|
PyErr_Print();
|
||||||
} else {
|
return 0; /* Pass error back */
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
} else
|
Py_DECREF(result);
|
||||||
return 0;
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,20 +60,20 @@ int dummy_enumerator (struct chmFile *h,
|
|||||||
/* Time to call the callback */
|
/* Time to call the callback */
|
||||||
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
|
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
|
||||||
if (arglist) {
|
if (arglist) {
|
||||||
result = PyEval_CallObject(my_callback, arglist);
|
result = PyEval_CallObject(my_callback, arglist);
|
||||||
Py_DECREF(arglist);
|
Py_DECREF(arglist);
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_DECREF(py_h);
|
Py_DECREF(py_h);
|
||||||
Py_DECREF(py_ui);
|
Py_DECREF(py_ui);
|
||||||
|
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
return 0; /* Pass error back */
|
PyErr_Print();
|
||||||
} else {
|
return 0; /* Pass error back */
|
||||||
|
}
|
||||||
|
Py_DECREF(result);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else
|
return 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user