CHM Input: Fix a regression that broke processing of some CHM files

Fixes #1831511 [Can't open chm file](https://bugs.launchpad.net/calibre/+bug/1831511)

Bug was introduced due to adding of unicode_literals.
Also fix CHM input on python3
This commit is contained in:
Kovid Goyal 2019-06-05 09:08:17 +05:30
parent a7fd7a0aee
commit 3f903cbdd1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 44 additions and 38 deletions

View File

@ -84,7 +84,7 @@
"windows_libraries": "ChmLib",
"inc_dirs": "!chmlib_inc_dirs",
"lib_dirs": "!chmlib_lib_dirs",
"defines": "SWIG_COBJECT_TYPES"
"defines": "SWIG_COBJECT_TYPES SWIG_PYTHON_STRICT_BYTE_CHAR"
},
{
"name": "lzx",

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, re, codecs
import os, re
from calibre import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@ -15,7 +15,7 @@ from calibre.utils.chm.chm import CHMFile
from calibre.constants import plugins
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type, getcwd
from polyglot.builtins import unicode_type, getcwd, as_unicode
chmlib, chmlib_err = plugins['chmlib']
@ -56,12 +56,17 @@ class CHMReader(CHMFile):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self.input_encoding = input_encoding
self.chm_encoding = self.get_encoding() or 'cp1252'
self._sourcechm = input
self._contents = None
self._playorder = 0
self._metadata = False
self._extracted = False
self.re_encoded_files = set()
if self.home:
self.home = as_unicode(self.home, self.chm_encoding)
if self.topics:
self.topics = as_unicode(self.topics, self.chm_encoding)
# location of '.hhc' file, which is the CHM TOC.
if self.topics is None:
@ -91,6 +96,11 @@ class CHMReader(CHMFile):
# print toc
return toc
def ResolveObject(self, path):
if not isinstance(path, bytes):
path = path.encode(self.chm_encoding)
return CHMFile.ResolveObject(self, path)
def GetFile(self, path):
# have to have abs paths for ResolveObject, but Contents() deliberately
# makes them relative. So we don't have to worry, re-add the leading /.
@ -107,12 +117,7 @@ class CHMReader(CHMFile):
def ExtractFiles(self, output_dir=getcwd(), debug_dump=False):
html_files = set()
try:
x = self.get_encoding()
codecs.lookup(x)
enc = x
except:
enc = 'cp1252'
enc = self.chm_encoding
for path in self.Contents():
fpath = path
if not isinstance(path, unicode_type):
@ -275,11 +280,12 @@ class CHMReader(CHMFile):
paths = []
def get_paths(chm, ui, ctx):
path = as_unicode(ui.path, self.chm_encoding)
# skip directories
# note this path refers to the internal CHM structure
if ui.path[-1] != '/':
if path[-1] != '/':
# and make paths relative
paths.append(ui.path.lstrip('/'))
paths.append(path.lstrip('/'))
chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
self._contents = paths
return self._contents

View File

@ -30,7 +30,7 @@ import codecs
import struct
import sys
from calibre.constants import plugins
from calibre.constants import plugins, filesystem_encoding
from polyglot.builtins import long_type
chmlib, chmlib_err = plugins['chmlib']
@ -226,7 +226,10 @@ class CHMFile:
if self.filename is not None:
self.CloseCHM()
self.file = chmlib.chm_open(archiveName)
path = archiveName
if not isinstance(path, bytes):
path = path.encode(filesystem_encoding)
self.file = chmlib.chm_open(path)
if self.file is None:
return 0
@ -382,11 +385,8 @@ class CHMFile:
The UnitInfo is used to retrieve the document contents
'''
if self.file:
# path = os.path.abspath(document)
path = document
return chmlib.chm_resolve_object(self.file, path)
else:
return (1, None)
return chmlib.chm_resolve_object(self.file, document)
return 1, None
def RetrieveObject(self, ui, start=-1, length=-1):
'''Retrieves the contents of a document.
@ -442,7 +442,7 @@ class CHMFile:
if ans:
try:
codecs.lookup(ans)
except:
except Exception:
ans = None
return ans

View File

@ -3827,18 +3827,18 @@ int dummy_enumerator (struct chmFile *h,
if (arglist) {
result = PyEval_CallObject(my_callback, arglist);
Py_DECREF(arglist);
Py_DECREF(result);
Py_DECREF(py_h);
Py_DECREF(py_ui);
if (result == NULL) {
return 0; /* Pass error back */
} else {
return 1;
PyErr_Print();
return 0; /* Pass error back */
}
} else
return 0;
Py_DECREF(result);
return 1;
}
return 0;
}

View File

@ -60,20 +60,20 @@ int dummy_enumerator (struct chmFile *h,
/* Time to call the callback */
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
if (arglist) {
result = PyEval_CallObject(my_callback, arglist);
Py_DECREF(arglist);
Py_DECREF(result);
Py_DECREF(py_h);
Py_DECREF(py_ui);
if (result == NULL) {
return 0; /* Pass error back */
} else {
result = PyEval_CallObject(my_callback, arglist);
Py_DECREF(arglist);
Py_DECREF(py_h);
Py_DECREF(py_ui);
if (result == NULL) {
PyErr_Print();
return 0; /* Pass error back */
}
Py_DECREF(result);
return 1;
}
} else
return 0;
}
return 0;
}
%}