Update the SWIG bindings for chmlib

Also get rid of the xhm_extra module since we dont care about searching CHM files and get_lcid can be implemented in pure python.
2025-11-28 09:15:02 -05:00 · 2018-12-27 14:44:43 +05:30 · 2018-12-27 14:44:43 +05:30 · e47a2d888b
commit e47a2d888b
parent 07f81ed878
7 changed files with 5683 additions and 1767 deletions
--- a/setup/extensions.json
+++ b/setup/extensions.json
@ -86,15 +86,6 @@
        "lib_dirs": "!chmlib_lib_dirs",
        "defines": "SWIG_COBJECT_TYPES"
    },
-    {
-        "name": "chm_extra",
-        "sources": "calibre/utils/chm/extra.c",
-        "libraries": "chm",
-        "windows_libraries": "ChmLib",
-        "inc_dirs": "!chmlib_inc_dirs",
-        "lib_dirs": "!chmlib_lib_dirs",
-        "defines": "__PYTHON__"
-    },
    {
        "name": "lzx",
        "sources": "calibre/utils/lzx/lzxmodule.c calibre/utils/lzx/compressor.c calibre/utils/lzx/lzxd.c calibre/utils/lzx/lzc.c calibre/utils/lzx/lzxc.c",
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -165,7 +165,6 @@ class Plugins(collections.Mapping):
                'cPalmdoc',
                'progress_indicator',
                'chmlib',
-                'chm_extra',
                'icu',
                'speedup',
                'unicode_names',
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -10,15 +10,17 @@ from calibre import guess_type as guess_mimetype
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
 from calibre.constants import iswindows, filesystem_encoding
 from calibre.utils.chm.chm import CHMFile
-from calibre.utils.chm.chmlib import (
-  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
-  chm_enumerate,
-)

+from calibre.constants import plugins
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.chardet import xml_to_unicode


+chmlib, chmlib_err = plugins['chmlib']
+if chmlib_err:
+    raise RuntimeError('Failed to load chmlib: ' + chmlib_err)
+
+
 def match_string(s1, s2_already_lowered):
    if s1 is not None and s2_already_lowered is not None:
        if s1.lower()==s2_already_lowered:
@ -94,7 +96,7 @@ class CHMReader(CHMFile):
        if path[0] != '/':
            path = '/' + path
        res, ui = self.ResolveObject(path)
-        if res != CHM_RESOLVE_SUCCESS:
+        if res != chmlib.CHM_RESOLVE_SUCCESS:
            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
        size, data = self.RetrieveObject(ui)
        if size == 0:
@ -276,7 +278,7 @@ class CHMReader(CHMFile):
            if ui.path[-1] != '/':
                # and make paths relative
                paths.append(ui.path.lstrip('/'))
-        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
+        chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
        self._contents = paths
        return self._contents

--- a/src/calibre/utils/chm/chm.py
+++ b/src/calibre/utils/chm/chm.py
@ -26,15 +26,15 @@
 '''

 import array
-import sys
 import codecs
+import struct
+import sys

-import calibre.utils.chm.chmlib as chmlib
 from calibre.constants import plugins

-extra, extra_err = plugins['chm_extra']
-if extra_err:
-    raise RuntimeError('Failed to load chm.extra: '+extra_err)
+chmlib, chmlib_err = plugins['chmlib']
+if chmlib_err:
+    raise RuntimeError('Failed to load chmlib: ' + chmlib_err)

 charset_table = {
    0   : 'iso8859_1',  # ANSI_CHARSET
@ -188,6 +188,19 @@ locale_table = {
 }


+def get_lcid(chm_file_obj):
+    for lang, offset in (
+            (b"/$FIftiMain",               0x7E),
+            (b"$WWKeywordLinks/BTree",     0x34),
+            (b"$WWAssociativeLinks/BTree", 0x34),
+            ):
+        result, ui = chmlib.chm_resolve_object(chm_file_obj, lang)
+        if result == chmlib.CHM_RESOLVE_SUCCESS:
+            size, text = chmlib.chm_retrieve_object(chm_file_obj, ui, offset, 4)
+            if size == 4:
+                return struct.unpack("I", text)[0]
+
+
 class CHMFile:
    "A class to manage access to CHM files."
    filename = ""
@ -242,9 +255,6 @@ class CHMFile:
        obtain the index, home page, topics, encoding and title. It is called
        from LoadCHM.
        '''
-
-        # extra.is_searchable crashed...
-        # self.searchable = extra.is_searchable (self.file)
        self.searchable = False
        self.lcid = None

@ -319,7 +329,9 @@ class CHMFile:
        self.GetWindowsInfo()

        if not self.lcid:
-            self.lcid = extra.get_lcid(self.file)
+            lcid = get_lcid(self.file)
+            if lcid is not None:
+                self.lcid = lcid

        return 1

@ -394,21 +406,6 @@ class CHMFile:
        else:
            return (0, '')

-    def Search(self, text, wholewords=0, titleonly=0):
-        '''Performs full-text search on the archive.
-        The first parameter is the word to look for, the second
-        indicates if the search should be for whole words only, and
-        the third parameter indicates if the search should be
-        restricted to page titles.
-        This method will return a tuple, the first item
-        indicating if the search results were partial, and the second
-        item being a dictionary containing the results.'''
-        if text and text != '' and self.file:
-            return extra.search(self.file, text, wholewords,
-                                titleonly)
-        else:
-            return None
-
    def IsSearchable(self):
        '''Indicates if the full-text search is available for this
        archive - this flag is updated when GetArchiveInfo is called'''
--- a/src/calibre/utils/chm/chmlib.py
+++ b/src/calibre/utils/chm/chmlib.py
@ -1,120 +0,0 @@
-# This file was created automatically by SWIG.
-# Don't modify this file, modify the SWIG interface instead.
-# This file is compatible with both classic and new-style classes.
-
-from calibre.constants import plugins
-
-_chmlib, chmlib_err = plugins['chmlib']
-
-if chmlib_err:
-    raise RuntimeError('Failed to load chmlib: '+chmlib_err)
-
-
-def _swig_setattr(self,class_type,name,value):
-    if (name == "this"):
-        if isinstance(value, class_type):
-            self.__dict__[name] = value.this
-            if hasattr(value,"thisown"):
-                self.__dict__["thisown"] = value.thisown
-            del value.thisown
-            return
-    method = class_type.__swig_setmethods__.get(name,None)
-    if method:
-        return method(self,value)
-    self.__dict__[name] = value
-
-
-def _swig_getattr(self,class_type,name):
-    method = class_type.__swig_getmethods__.get(name,None)
-    if method:
-        return method(self)
-    raise AttributeError(name)
-
-import types
-try:
-    _object = types.ObjectType
-    _newclass = 1
-except AttributeError:
-    class _object :
-        pass
-    _newclass = 0
-
-
-CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
-CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
-CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
-
-
-class chmUnitInfo(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
-    __swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
-    __swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
-    if _newclass:
-        start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
-    __swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
-    __swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
-    if _newclass:
-        length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
-    __swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
-    __swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
-    if _newclass:
-        space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
-    __swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
-    __swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
-    if _newclass:
-        path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
-
-    def __init__(self,*args):
-        _swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
-        _swig_setattr(self, chmUnitInfo, 'thisown', 1)
-
-    def __del__(self, destroy=_chmlib.delete_chmUnitInfo):
-        try:
-            if self.thisown:
-                destroy(self)
-        except:
-            pass
-
-    def __repr__(self):
-        return "<C chmUnitInfo instance at %s>" % (self.this,)
-
-
-class chmUnitInfoPtr(chmUnitInfo):
-
-    def __init__(self,this):
-        _swig_setattr(self, chmUnitInfo, 'this', this)
-        if not hasattr(self,"thisown"):
-            _swig_setattr(self, chmUnitInfo, 'thisown', 0)
-        _swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
-_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
-
-chm_open = _chmlib.chm_open
-
-chm_close = _chmlib.chm_close
-
-CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
-chm_set_param = _chmlib.chm_set_param
-
-CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
-CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
-chm_resolve_object = _chmlib.chm_resolve_object
-
-chm_retrieve_object = _chmlib.chm_retrieve_object
-
-CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
-CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
-CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
-CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
-CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
-CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
-CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
-CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
-CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
-chm_enumerate = _chmlib.chm_enumerate
-
-chm_enumerate_dir = _chmlib.chm_enumerate_dir
-
-
--- a/src/calibre/utils/chm/extra.c
+++ b/src/calibre/utils/chm/extra.c
@ -1,754 +0,0 @@
-/*
- * extra.c - full-text search support for pychm
- *
- * Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Author: Rubens Ramos <rubensr@users.sourceforge.net>
- *
- * Heavily based on work done by:
- * Pabs <pabs@zip.to> - chmdeco
- * Razvan Cojocaru <razvanco@gmx.net> - xCHM
- *
- */
-
-#include "chm_lib.h"
-#ifdef __PYTHON__
-#include "Python.h"
-#else
-#include <stdio.h>
-#define PyObject void
-#endif
-
-#include <stdlib.h>
-
-#ifdef _MSC_VER
-#include <stdint.h>
-#define strcasecmp stricmp
-#define strncasecmp strnicmp
-#else
-#include <inttypes.h>
-#include <strings.h>
-#endif
-
-#if defined( _MSC_VER ) && !defined( __cplusplus )
-# define inline __inline
-#endif
-
-#if defined(_WIN32) || defined(__WIN32__)
-#       if defined(_MSC_VER)
-#               if defined(STATIC_LINKED)
-#                       define MODEXPORT(a) a
-#                       define MODIMPORT(a) extern a
-#               else
-#                       define MODEXPORT(a) __declspec(dllexport) a
-#                       define MODIMPORT(a) extern a
-#               endif
-#       else
-#               if defined(__BORLANDC__)
-#                       define MODEXPORT(a) a _export
-#                       define MODIMPORT(a) a _export
-#               else
-#                       define MODEXPORT(a) a
-#                       define MODIMPORT(a) a
-#               endif
-#       endif
-#else
-#       define MODEXPORT(a) a
-#       define MODIMPORT(a) a
-#endif
-
-#define false 0
-#define true 1
-
-#define FTS_HEADER_LEN 0x32
-#define TOPICS_ENTRY_LEN 16
-#define COMMON_BUF_LEN 1025
-
-#define FREE(x) free (x); x = NULL
-
-inline uint16_t
-get_uint16 (uint8_t* b) {
-  return b[0] |
-    b[1]<<8;
-}
-
-inline uint32_t
-get_uint32 (uint8_t* b) {
-  return b[0] |
-    b[1]<<8   |
-    b[2]<<16  |
-    b[3]<<24;
-}
-
-inline uint64_t
-get_uint64 (uint8_t* b) {
-  return b[0]           |
-    b[1]<<8             |
-    b[2]<<16            |
-    b[3]<<24            |
-    (uint64_t) b[4]<<32 |
-    (uint64_t) b[5]<<40 |
-    (uint64_t) b[6]<<48 |
-    (uint64_t) b[7]<<56;
-}
-
-inline uint64_t
-be_encint (unsigned char *buffer, size_t *length)
-{
-  uint64_t result = 0;
-  int shift=0;
-  *length = 0;
-
-  do {
-    result |= ((*buffer) & 0x7f) << shift;
-    shift += 7;
-    *length = *length + 1;
-
-  } while (*(buffer++) & 0x80);
-
-  return result;
-}
-
-/*
-  Finds the first unset bit in memory. Returns the number of set bits found.
-  Returns -1 if the buffer runs out before we find an unset bit.
-*/
-inline int
-ffus (unsigned char* byte, int* bit, size_t *length) {
-  int bits = 0;
-  *length = 0;
-
-  while(*byte & (1 << *bit)){
-    if(*bit)
-      --(*bit);
-    else {
-      ++byte;
-      ++(*length);
-      *bit = 7;
-    }
-    ++bits;
-  }
-
-  if(*bit)
-    --(*bit);
-  else {
-    ++(*length);
-    *bit = 7;
-  }
-
-  return bits;
-}
-
-
-static inline uint64_t
-sr_int(unsigned char* byte, int* bit,
-       unsigned char s, unsigned char r, size_t *length)
-{
-  uint64_t ret;
-  unsigned char mask;
-  int n, n_bits, num_bits, base, count;
-  size_t fflen;
-
-  *length = 0;
-
-  if(!bit || *bit > 7 || s != 2)
-    return ~(uint64_t)0;
-  ret = 0;
-
-  count = ffus(byte, bit, &fflen);
-  *length += fflen;
-  byte += *length;
-
-  n_bits = n = r + (count ? count-1 : 0) ;
-
-  while (n > 0) {
-    num_bits = n > *bit ? *bit : n-1;
-    base = n > *bit ? 0 : *bit - (n-1);
-
-    switch (num_bits){
-    case 0:
-      mask = 1;
-      break;
-    case 1:
-      mask = 3;
-      break;
-    case 2:
-      mask = 7;
-      break;
-    case 3:
-      mask = 0xf;
-      break;
-    case 4:
-      mask = 0x1f;
-      break;
-    case 5:
-      mask = 0x3f;
-      break;
-    case 6:
-      mask = 0x7f;
-      break;
-    case 7:
-      mask = 0xff;
-      break;
-    default:
-      mask = 0xff;
-      break;
-    }
-
-    mask <<= base;
-    ret = (ret << (num_bits+1)) |
-      (uint64_t)((*byte & mask) >> base);
-
-    if( n > *bit ){
-      ++byte;
-      ++(*length);
-      n -= *bit+1;
-      *bit = 7;
-    } else {
-      *bit -= n;
-      n = 0;
-    }
-  }
-
-  if(count)
-    ret |= (uint64_t)1 << n_bits;
-
-  return ret;
-}
-
-
-static inline uint32_t
-get_leaf_node_offset(struct chmFile *chmfile,
-                     const char *text,
-                     uint32_t initial_offset,
-                     uint32_t buff_size,
-                     uint16_t tree_depth,
-                     struct chmUnitInfo *ui)
-{
-  unsigned char word_len;
-  unsigned char pos;
-  uint16_t free_space;
-  char *wrd_buf;
-  char *word = NULL;
-  uint32_t test_offset = 0;
-  uint32_t i = sizeof(uint16_t);
-  unsigned char *buffer = (unsigned char *)malloc (buff_size);
-
-  if (NULL == buffer)
-    return 0;
-
-  while (--tree_depth) {
-    if (initial_offset == test_offset) {
-      FREE(buffer);
-      return 0;
-    }
-
-    test_offset = initial_offset;
-    if (chm_retrieve_object (chmfile, ui, buffer,
-                             initial_offset, buff_size) == 0) {
-      FREE(buffer);
-      return 0;
-    }
-
-    free_space = get_uint16 (buffer);
-
-    while (i < buff_size - free_space) {
-
-      word_len = *(buffer + i);
-      pos = *(buffer + i + 1);
-
-      wrd_buf = (char*)malloc (word_len);
-      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
-      wrd_buf[word_len - 1] = 0;
-
-      if (pos == 0) {
-        FREE (word);
-        word = (char *) strdup (wrd_buf);
-      } else {
-        word = (char*)realloc (word, word_len + pos + 1);
-        strcpy (word + pos, wrd_buf);
-      }
-
-      FREE(wrd_buf);
-
-      if (strcasecmp (text, word) <= 0) {
-        initial_offset = get_uint32 (buffer + i + word_len + 1);
-        break;
-      }
-
-      i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
-        sizeof(uint16_t);
-    }
-  }
-
-  if(initial_offset == test_offset)
-    initial_offset = 0;
-
-  FREE(word);
-  FREE(buffer);
-
-  return initial_offset;
-}
-
-static inline int
-pychm_process_wlc (struct chmFile *chmfile,
-                   uint64_t wlc_count, uint64_t wlc_size,
-                   uint32_t wlc_offset, unsigned char ds,
-                   unsigned char dr, unsigned char cs,
-                   unsigned char cr, unsigned char ls,
-                   unsigned char lr, struct chmUnitInfo *uimain,
-                   struct chmUnitInfo* uitbl,
-                   struct chmUnitInfo *uistrings,
-                   struct chmUnitInfo* topics,
-                   struct chmUnitInfo *urlstr,
-                   PyObject *dict)
-{
-  uint32_t stroff, urloff;
-  uint64_t i, j, count;
-  size_t length;
-  int wlc_bit = 7;
-  size_t off = 0;
-  uint64_t index = 0;
-  unsigned char entry[TOPICS_ENTRY_LEN];
-  unsigned char combuf[COMMON_BUF_LEN];
-  unsigned char *buffer = (unsigned char *)malloc (wlc_size);
-  char *url = NULL;
-  char *topic = NULL;
-
-  if (chm_retrieve_object(chmfile, uimain, buffer,
-                          wlc_offset, wlc_size) == 0) {
-    FREE(buffer);
-    return false;
-  }
-
-  for (i = 0; i < wlc_count; ++i) {
-
-    if(wlc_bit != 7) {
-      ++off;
-      wlc_bit = 7;
-    }
-
-    index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
-    off += length;
-
-    if(chm_retrieve_object(chmfile, topics, entry,
-                           index * 16, TOPICS_ENTRY_LEN) == 0) {
-      FREE(topic);
-      FREE(url);
-      FREE(buffer);
-      return false;
-    }
-
-    combuf[COMMON_BUF_LEN - 1] = 0;
-    stroff = get_uint32 (entry + 4);
-
-    FREE (topic);
-    if (chm_retrieve_object (chmfile, uistrings, combuf,
-                             stroff, COMMON_BUF_LEN - 1) == 0) {
-      topic = strdup ("Untitled in index");
-
-    } else {
-      combuf[COMMON_BUF_LEN - 1] = 0;
-
-      topic = strdup ((char*)combuf);
-    }
-
-    urloff = get_uint32 (entry + 8);
-
-    if(chm_retrieve_object (chmfile, uitbl, combuf,
-                            urloff, 12) == 0) {
-      FREE(buffer);
-      return false;
-    }
-
-    urloff = get_uint32 (combuf + 8);
-
-    if (chm_retrieve_object (chmfile, urlstr, combuf,
-                             urloff + 8, COMMON_BUF_LEN - 1) == 0) {
-      FREE(topic);
-      FREE(url);
-      FREE(buffer);
-      return false;
-    }
-
-    combuf[COMMON_BUF_LEN - 1] = 0;
-
-    FREE (url);
-    url = strdup ((char*)combuf);
-
-    if (url && topic) {
-#ifdef __PYTHON__
-      PyDict_SetItemString (dict, topic,
-                            PyString_FromString (url));
-#else
-      printf ("%s ==> %s\n", url, topic);
-#endif
-    }
-
-    count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
-    off += length;
-
-    for (j = 0; j < count; ++j) {
-      sr_int (buffer + off, &wlc_bit, ls, lr, &length);
-      off += length;
-    }
-  }
-
-  FREE(topic);
-  FREE(url);
-  FREE(buffer);
-
-  return true;
-}
-
-int
-chm_search (struct chmFile *chmfile,
-            const char *text, int whole_words,
-            int titles_only, PyObject *dict)
-{
-  unsigned char header[FTS_HEADER_LEN];
-  unsigned char doc_index_s;
-  unsigned char doc_index_r;
-  unsigned char code_count_s;
-  unsigned char code_count_r;
-  unsigned char loc_codes_s;
-  unsigned char loc_codes_r;
-  unsigned char word_len, pos;
-  unsigned char *buffer;
-  char *word = NULL;
-  uint32_t node_offset;
-  uint32_t node_len;
-  uint16_t tree_depth;
-  uint32_t i;
-  uint16_t free_space;
-  uint64_t wlc_count, wlc_size;
-  uint32_t wlc_offset;
-  char *wrd_buf;
-  unsigned char title;
-  size_t encsz;
-  struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
-  int partial = false;
-
-  if (NULL == text)
-    return -1;
-
-  if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
-      CHM_RESOLVE_SUCCESS ||
-      chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
-      CHM_RESOLVE_SUCCESS ||
-      chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
-      CHM_RESOLVE_SUCCESS ||
-      chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
-      CHM_RESOLVE_SUCCESS ||
-      chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
-      CHM_RESOLVE_SUCCESS)
-    return false;
-
-  if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
-    return false;
-
-  doc_index_s = header[0x1E];
-  doc_index_r = header[0x1F];
-  code_count_s = header[0x20];
-  code_count_r = header[0x21];
-  loc_codes_s = header[0x22];
-  loc_codes_r = header[0x23];
-
-  if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
-    return false;
-  }
-
-  node_offset = get_uint32 (header + 0x14);
-  node_len = get_uint32 (header + 0x2e);
-  tree_depth = get_uint16 (header + 0x18);
-
-  i = sizeof(uint16_t);
-
-  buffer = (unsigned char*)malloc (node_len);
-
-  node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
-                                      tree_depth, &ui);
-
-  if (!node_offset) {
-    FREE(buffer);
-    return false;
-  }
-
-  do {
-
-    if (chm_retrieve_object (chmfile, &ui, buffer,
-                             node_offset, node_len) == 0) {
-      FREE(word);
-      FREE(buffer);
-      return false;
-    }
-
-    free_space = get_uint16 (buffer + 6);
-
-    i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
-
-    encsz = 0;
-
-    while (i < node_len - free_space) {
-      word_len = *(buffer + i);
-      pos = *(buffer + i + 1);
-
-      wrd_buf = (char*)malloc (word_len);
-      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
-      wrd_buf[word_len - 1] = 0;
-
-      if (pos == 0) {
-        FREE(word);
-        word = (char *) strdup (wrd_buf);
-      } else {
-        word = (char*)realloc (word, word_len + pos + 1);
-        strcpy (word + pos, wrd_buf);
-      }
-
-      FREE(wrd_buf);
-
-      i += 2 + word_len;
-      title = *(buffer + i - 1);
-
-      wlc_count = be_encint (buffer + i, &encsz);
-      i += encsz;
-
-      wlc_offset = get_uint32 (buffer + i);
-
-      i += sizeof(uint32_t) + sizeof(uint16_t);
-      wlc_size =  be_encint (buffer + i, &encsz);
-      i += encsz;
-
-      node_offset = get_uint32 (buffer);
-
-      if (!title && titles_only)
-        continue;
-
-      if (whole_words && !strcasecmp(text, word)) {
-        partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
-                                     wlc_offset, doc_index_s,
-                                     doc_index_r,code_count_s,
-                                     code_count_r, loc_codes_s,
-                                     loc_codes_r, &ui, &uiurltbl,
-                                     &uistrings, &uitopics,
-                                     &uiurlstr, dict);
-        FREE(word);
-        FREE(buffer);
-        return partial;
-      }
-
-      if (!whole_words) {
-        if (!strncasecmp (word, text, strlen(text))) {
-          partial = true;
-          pychm_process_wlc (chmfile, wlc_count, wlc_size,
-                             wlc_offset, doc_index_s,
-                             doc_index_r,code_count_s,
-                             code_count_r, loc_codes_s,
-                             loc_codes_r, &ui, &uiurltbl,
-                             &uistrings, &uitopics,
-                             &uiurlstr, dict);
-
-        } else if (strncasecmp (text, word, strlen(text)) < -1)
-          break;
-      }
-
-    }
-  } while (!whole_words &&
-           !strncmp (word, text, strlen(text)) &&
-           node_offset);
-
-  FREE(word);
-  FREE(buffer);
-
-  return partial;
-}
-
-typedef struct {
-  const char *file;
-  int offset;
-} Langrec;
-
-Langrec lang_files[] = {
-  {"/$FIftiMain",               0x7E},
-  {"$WWKeywordLinks/BTree",     0x34},
-  {"$WWAssociativeLinks/BTree", 0x34}
-};
-
-#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
-
-int
-chm_get_lcid (struct chmFile *chmfile) {
-  struct chmUnitInfo ui;
-  uint32_t lang;
-  int i;
-
-  for (i=0; i<LANG_FILES_SIZE; i++) {
-
-    if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
-        CHM_RESOLVE_SUCCESS) {
-
-      if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
-                               lang_files[i].offset, sizeof(uint32_t)) != 0)
-        return lang;
-    }
-  }
-
-  return -1;
-}
-
-#ifdef __PYTHON__
-
-static PyObject *
-is_searchable (PyObject *self, PyObject *args) {
-  struct chmFile *file;
-  PyObject *obj0;
-  struct chmUnitInfo ui;
-
-  if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
-
-    file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
-
-    if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
-        CHM_RESOLVE_SUCCESS ||
-        chm_resolve_object (file, "/#TOPICS", &ui) !=
-        CHM_RESOLVE_SUCCESS ||
-        chm_resolve_object (file, "/#STRINGS", &ui) !=
-        CHM_RESOLVE_SUCCESS ||
-        chm_resolve_object (file, "/#URLTBL", &ui) !=
-        CHM_RESOLVE_SUCCESS ||
-        chm_resolve_object (file, "/#URLSTR", &ui) !=
-        CHM_RESOLVE_SUCCESS)
-      return Py_BuildValue ("i", 0);
-    else
-      return Py_BuildValue ("i", 1);
-  } else {
-    PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
-    return NULL;
-  }
-}
-
-static PyObject *
-search (PyObject *self, PyObject *args) {
-  char *text;
-  int whole_words;
-  int titles_only;
-  int partial;
-  struct chmFile *file;
-  PyObject *obj0;
-  PyObject *dict;
-
-  if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
-                        &whole_words, &titles_only)) {
-
-    dict = PyDict_New();
-
-    if (dict) {
-      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
-
-      partial = chm_search (file,
-                            text, whole_words, titles_only, dict);
-
-      return Py_BuildValue ("(iO)", partial, dict);
-
-    } else {
-      PyErr_NoMemory();
-      return NULL;
-    }
-  } else {
-    PyErr_SetString(PyExc_TypeError,
-                    "Expected chmfile (not CHMFile!), string, int, int");
-    return NULL;
-  }
-}
-
-static PyObject *
-get_lcid (PyObject *self, PyObject *args) {
-  int code;
-  struct chmFile *file;
-  PyObject *obj0;
-
-  if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
-
-      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
-
-      code = chm_get_lcid (file);
-
-      if (code != -1)
-        return Py_BuildValue ("i", code);
-      else
-        Py_INCREF(Py_None);
-      return Py_None;
-  } else {
-    PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
-    return NULL;
-  }
-}
-
-static PyMethodDef
-IndexMethods[] = {
-  {"get_lcid", get_lcid, METH_VARARGS,
-   "Returns LCID (Locale ID) for archive."},
-  {"search", search, METH_VARARGS,
-   "Perform Full-Text search."},
-  {"is_searchable", is_searchable, METH_VARARGS,
-   "Return 1 if it is possible to search the archive, 0 otherwise."},
-  {NULL, NULL, 0, NULL}
-};
-
-#ifdef __cplusplus
-extern "C"
-#endif
-CALIBRE_MODINIT_FUNC
-initchm_extra (void) {
-  Py_InitModule ("chm_extra", IndexMethods);
-}
-
-#else
-
-int
-main (int argc, char **argv) {
-  struct chmFile *file;
-  char text[255];
-  int whole_words, titles_only;
-  int partial;
-
-  if (argc == 2) {
-    file = chm_open (argv[1]);
-
-    if (file) {
-      printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
-      while (1) {
-        printf ("\n<whole_words> <titles_only> <string>\n");
-        printf ("> ");
-        if (scanf ("%d %d %s", &whole_words, &titles_only, text))
-          partial = chm_search (file,
-                                text, whole_words, titles_only, NULL);
-        else
-          break;
-
-        printf ("Partial = %d\n", partial);
-      }
-
-      chm_close (file);
-      return 0;
-    }
-
-    return -1;
-
-  } else {
-    printf ("\n%s <filename>\n", argv[0]);
-    return 0;
-  }
-}
-
-#endif
--- a/src/calibre/utils/chm/swig_chm.c
+++ b/src/calibre/utils/chm/swig_chm.c