mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update the SWIG bindings for chmlib
Also get rid of the xhm_extra module since we dont care about searching CHM files and get_lcid can be implemented in pure python.
This commit is contained in:
parent
07f81ed878
commit
e47a2d888b
@ -86,15 +86,6 @@
|
||||
"lib_dirs": "!chmlib_lib_dirs",
|
||||
"defines": "SWIG_COBJECT_TYPES"
|
||||
},
|
||||
{
|
||||
"name": "chm_extra",
|
||||
"sources": "calibre/utils/chm/extra.c",
|
||||
"libraries": "chm",
|
||||
"windows_libraries": "ChmLib",
|
||||
"inc_dirs": "!chmlib_inc_dirs",
|
||||
"lib_dirs": "!chmlib_lib_dirs",
|
||||
"defines": "__PYTHON__"
|
||||
},
|
||||
{
|
||||
"name": "lzx",
|
||||
"sources": "calibre/utils/lzx/lzxmodule.c calibre/utils/lzx/compressor.c calibre/utils/lzx/lzxd.c calibre/utils/lzx/lzc.c calibre/utils/lzx/lzxc.c",
|
||||
|
@ -165,7 +165,6 @@ class Plugins(collections.Mapping):
|
||||
'cPalmdoc',
|
||||
'progress_indicator',
|
||||
'chmlib',
|
||||
'chm_extra',
|
||||
'icu',
|
||||
'speedup',
|
||||
'unicode_names',
|
||||
|
@ -10,15 +10,17 @@ from calibre import guess_type as guess_mimetype
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
from calibre.constants import iswindows, filesystem_encoding
|
||||
from calibre.utils.chm.chm import CHMFile
|
||||
from calibre.utils.chm.chmlib import (
|
||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||
chm_enumerate,
|
||||
)
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
|
||||
chmlib, chmlib_err = plugins['chmlib']
|
||||
if chmlib_err:
|
||||
raise RuntimeError('Failed to load chmlib: ' + chmlib_err)
|
||||
|
||||
|
||||
def match_string(s1, s2_already_lowered):
|
||||
if s1 is not None and s2_already_lowered is not None:
|
||||
if s1.lower()==s2_already_lowered:
|
||||
@ -94,7 +96,7 @@ class CHMReader(CHMFile):
|
||||
if path[0] != '/':
|
||||
path = '/' + path
|
||||
res, ui = self.ResolveObject(path)
|
||||
if res != CHM_RESOLVE_SUCCESS:
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||
size, data = self.RetrieveObject(ui)
|
||||
if size == 0:
|
||||
@ -276,7 +278,7 @@ class CHMReader(CHMFile):
|
||||
if ui.path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
chmlib.chm_enumerate(self.file, chmlib.CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
||||
|
@ -26,15 +26,15 @@
|
||||
'''
|
||||
|
||||
import array
|
||||
import sys
|
||||
import codecs
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import calibre.utils.chm.chmlib as chmlib
|
||||
from calibre.constants import plugins
|
||||
|
||||
extra, extra_err = plugins['chm_extra']
|
||||
if extra_err:
|
||||
raise RuntimeError('Failed to load chm.extra: '+extra_err)
|
||||
chmlib, chmlib_err = plugins['chmlib']
|
||||
if chmlib_err:
|
||||
raise RuntimeError('Failed to load chmlib: ' + chmlib_err)
|
||||
|
||||
charset_table = {
|
||||
0 : 'iso8859_1', # ANSI_CHARSET
|
||||
@ -188,6 +188,19 @@ locale_table = {
|
||||
}
|
||||
|
||||
|
||||
def get_lcid(chm_file_obj):
|
||||
for lang, offset in (
|
||||
(b"/$FIftiMain", 0x7E),
|
||||
(b"$WWKeywordLinks/BTree", 0x34),
|
||||
(b"$WWAssociativeLinks/BTree", 0x34),
|
||||
):
|
||||
result, ui = chmlib.chm_resolve_object(chm_file_obj, lang)
|
||||
if result == chmlib.CHM_RESOLVE_SUCCESS:
|
||||
size, text = chmlib.chm_retrieve_object(chm_file_obj, ui, offset, 4)
|
||||
if size == 4:
|
||||
return struct.unpack("I", text)[0]
|
||||
|
||||
|
||||
class CHMFile:
|
||||
"A class to manage access to CHM files."
|
||||
filename = ""
|
||||
@ -242,9 +255,6 @@ class CHMFile:
|
||||
obtain the index, home page, topics, encoding and title. It is called
|
||||
from LoadCHM.
|
||||
'''
|
||||
|
||||
# extra.is_searchable crashed...
|
||||
# self.searchable = extra.is_searchable (self.file)
|
||||
self.searchable = False
|
||||
self.lcid = None
|
||||
|
||||
@ -319,7 +329,9 @@ class CHMFile:
|
||||
self.GetWindowsInfo()
|
||||
|
||||
if not self.lcid:
|
||||
self.lcid = extra.get_lcid(self.file)
|
||||
lcid = get_lcid(self.file)
|
||||
if lcid is not None:
|
||||
self.lcid = lcid
|
||||
|
||||
return 1
|
||||
|
||||
@ -394,21 +406,6 @@ class CHMFile:
|
||||
else:
|
||||
return (0, '')
|
||||
|
||||
def Search(self, text, wholewords=0, titleonly=0):
|
||||
'''Performs full-text search on the archive.
|
||||
The first parameter is the word to look for, the second
|
||||
indicates if the search should be for whole words only, and
|
||||
the third parameter indicates if the search should be
|
||||
restricted to page titles.
|
||||
This method will return a tuple, the first item
|
||||
indicating if the search results were partial, and the second
|
||||
item being a dictionary containing the results.'''
|
||||
if text and text != '' and self.file:
|
||||
return extra.search(self.file, text, wholewords,
|
||||
titleonly)
|
||||
else:
|
||||
return None
|
||||
|
||||
def IsSearchable(self):
|
||||
'''Indicates if the full-text search is available for this
|
||||
archive - this flag is updated when GetArchiveInfo is called'''
|
||||
|
@ -1,120 +0,0 @@
|
||||
# This file was created automatically by SWIG.
|
||||
# Don't modify this file, modify the SWIG interface instead.
|
||||
# This file is compatible with both classic and new-style classes.
|
||||
|
||||
from calibre.constants import plugins
|
||||
|
||||
_chmlib, chmlib_err = plugins['chmlib']
|
||||
|
||||
if chmlib_err:
|
||||
raise RuntimeError('Failed to load chmlib: '+chmlib_err)
|
||||
|
||||
|
||||
def _swig_setattr(self,class_type,name,value):
|
||||
if (name == "this"):
|
||||
if isinstance(value, class_type):
|
||||
self.__dict__[name] = value.this
|
||||
if hasattr(value,"thisown"):
|
||||
self.__dict__["thisown"] = value.thisown
|
||||
del value.thisown
|
||||
return
|
||||
method = class_type.__swig_setmethods__.get(name,None)
|
||||
if method:
|
||||
return method(self,value)
|
||||
self.__dict__[name] = value
|
||||
|
||||
|
||||
def _swig_getattr(self,class_type,name):
|
||||
method = class_type.__swig_getmethods__.get(name,None)
|
||||
if method:
|
||||
return method(self)
|
||||
raise AttributeError(name)
|
||||
|
||||
import types
|
||||
try:
|
||||
_object = types.ObjectType
|
||||
_newclass = 1
|
||||
except AttributeError:
|
||||
class _object :
|
||||
pass
|
||||
_newclass = 0
|
||||
|
||||
|
||||
CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
|
||||
CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
|
||||
CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
|
||||
|
||||
|
||||
class chmUnitInfo(_object):
|
||||
__swig_setmethods__ = {}
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
|
||||
__swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
|
||||
__swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
|
||||
if _newclass:
|
||||
start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
|
||||
__swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
|
||||
__swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
|
||||
if _newclass:
|
||||
length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
|
||||
__swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
|
||||
__swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
|
||||
if _newclass:
|
||||
space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
|
||||
__swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
|
||||
__swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
|
||||
if _newclass:
|
||||
path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
|
||||
|
||||
def __init__(self,*args):
|
||||
_swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
|
||||
_swig_setattr(self, chmUnitInfo, 'thisown', 1)
|
||||
|
||||
def __del__(self, destroy=_chmlib.delete_chmUnitInfo):
|
||||
try:
|
||||
if self.thisown:
|
||||
destroy(self)
|
||||
except:
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "<C chmUnitInfo instance at %s>" % (self.this,)
|
||||
|
||||
|
||||
class chmUnitInfoPtr(chmUnitInfo):
|
||||
|
||||
def __init__(self,this):
|
||||
_swig_setattr(self, chmUnitInfo, 'this', this)
|
||||
if not hasattr(self,"thisown"):
|
||||
_swig_setattr(self, chmUnitInfo, 'thisown', 0)
|
||||
_swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
|
||||
_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
|
||||
|
||||
chm_open = _chmlib.chm_open
|
||||
|
||||
chm_close = _chmlib.chm_close
|
||||
|
||||
CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
|
||||
chm_set_param = _chmlib.chm_set_param
|
||||
|
||||
CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
|
||||
CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
|
||||
chm_resolve_object = _chmlib.chm_resolve_object
|
||||
|
||||
chm_retrieve_object = _chmlib.chm_retrieve_object
|
||||
|
||||
CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
|
||||
CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
|
||||
CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
|
||||
CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
|
||||
CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
|
||||
CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
|
||||
CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
|
||||
CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
|
||||
CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
|
||||
chm_enumerate = _chmlib.chm_enumerate
|
||||
|
||||
chm_enumerate_dir = _chmlib.chm_enumerate_dir
|
||||
|
||||
|
@ -1,754 +0,0 @@
|
||||
/*
|
||||
* extra.c - full-text search support for pychm
|
||||
*
|
||||
* Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* Author: Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
*
|
||||
* Heavily based on work done by:
|
||||
* Pabs <pabs@zip.to> - chmdeco
|
||||
* Razvan Cojocaru <razvanco@gmx.net> - xCHM
|
||||
*
|
||||
*/
|
||||
|
||||
#include "chm_lib.h"
|
||||
#ifdef __PYTHON__
|
||||
#include "Python.h"
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define PyObject void
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stdint.h>
|
||||
#define strcasecmp stricmp
|
||||
#define strncasecmp strnicmp
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER ) && !defined( __cplusplus )
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(__WIN32__)
|
||||
# if defined(_MSC_VER)
|
||||
# if defined(STATIC_LINKED)
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) extern a
|
||||
# else
|
||||
# define MODEXPORT(a) __declspec(dllexport) a
|
||||
# define MODIMPORT(a) extern a
|
||||
# endif
|
||||
# else
|
||||
# if defined(__BORLANDC__)
|
||||
# define MODEXPORT(a) a _export
|
||||
# define MODIMPORT(a) a _export
|
||||
# else
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) a
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) a
|
||||
#endif
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
#define FTS_HEADER_LEN 0x32
|
||||
#define TOPICS_ENTRY_LEN 16
|
||||
#define COMMON_BUF_LEN 1025
|
||||
|
||||
#define FREE(x) free (x); x = NULL
|
||||
|
||||
inline uint16_t
|
||||
get_uint16 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8;
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
get_uint32 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8 |
|
||||
b[2]<<16 |
|
||||
b[3]<<24;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
get_uint64 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8 |
|
||||
b[2]<<16 |
|
||||
b[3]<<24 |
|
||||
(uint64_t) b[4]<<32 |
|
||||
(uint64_t) b[5]<<40 |
|
||||
(uint64_t) b[6]<<48 |
|
||||
(uint64_t) b[7]<<56;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
be_encint (unsigned char *buffer, size_t *length)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int shift=0;
|
||||
*length = 0;
|
||||
|
||||
do {
|
||||
result |= ((*buffer) & 0x7f) << shift;
|
||||
shift += 7;
|
||||
*length = *length + 1;
|
||||
|
||||
} while (*(buffer++) & 0x80);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
Finds the first unset bit in memory. Returns the number of set bits found.
|
||||
Returns -1 if the buffer runs out before we find an unset bit.
|
||||
*/
|
||||
inline int
|
||||
ffus (unsigned char* byte, int* bit, size_t *length) {
|
||||
int bits = 0;
|
||||
*length = 0;
|
||||
|
||||
while(*byte & (1 << *bit)){
|
||||
if(*bit)
|
||||
--(*bit);
|
||||
else {
|
||||
++byte;
|
||||
++(*length);
|
||||
*bit = 7;
|
||||
}
|
||||
++bits;
|
||||
}
|
||||
|
||||
if(*bit)
|
||||
--(*bit);
|
||||
else {
|
||||
++(*length);
|
||||
*bit = 7;
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
||||
static inline uint64_t
|
||||
sr_int(unsigned char* byte, int* bit,
|
||||
unsigned char s, unsigned char r, size_t *length)
|
||||
{
|
||||
uint64_t ret;
|
||||
unsigned char mask;
|
||||
int n, n_bits, num_bits, base, count;
|
||||
size_t fflen;
|
||||
|
||||
*length = 0;
|
||||
|
||||
if(!bit || *bit > 7 || s != 2)
|
||||
return ~(uint64_t)0;
|
||||
ret = 0;
|
||||
|
||||
count = ffus(byte, bit, &fflen);
|
||||
*length += fflen;
|
||||
byte += *length;
|
||||
|
||||
n_bits = n = r + (count ? count-1 : 0) ;
|
||||
|
||||
while (n > 0) {
|
||||
num_bits = n > *bit ? *bit : n-1;
|
||||
base = n > *bit ? 0 : *bit - (n-1);
|
||||
|
||||
switch (num_bits){
|
||||
case 0:
|
||||
mask = 1;
|
||||
break;
|
||||
case 1:
|
||||
mask = 3;
|
||||
break;
|
||||
case 2:
|
||||
mask = 7;
|
||||
break;
|
||||
case 3:
|
||||
mask = 0xf;
|
||||
break;
|
||||
case 4:
|
||||
mask = 0x1f;
|
||||
break;
|
||||
case 5:
|
||||
mask = 0x3f;
|
||||
break;
|
||||
case 6:
|
||||
mask = 0x7f;
|
||||
break;
|
||||
case 7:
|
||||
mask = 0xff;
|
||||
break;
|
||||
default:
|
||||
mask = 0xff;
|
||||
break;
|
||||
}
|
||||
|
||||
mask <<= base;
|
||||
ret = (ret << (num_bits+1)) |
|
||||
(uint64_t)((*byte & mask) >> base);
|
||||
|
||||
if( n > *bit ){
|
||||
++byte;
|
||||
++(*length);
|
||||
n -= *bit+1;
|
||||
*bit = 7;
|
||||
} else {
|
||||
*bit -= n;
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(count)
|
||||
ret |= (uint64_t)1 << n_bits;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t
|
||||
get_leaf_node_offset(struct chmFile *chmfile,
|
||||
const char *text,
|
||||
uint32_t initial_offset,
|
||||
uint32_t buff_size,
|
||||
uint16_t tree_depth,
|
||||
struct chmUnitInfo *ui)
|
||||
{
|
||||
unsigned char word_len;
|
||||
unsigned char pos;
|
||||
uint16_t free_space;
|
||||
char *wrd_buf;
|
||||
char *word = NULL;
|
||||
uint32_t test_offset = 0;
|
||||
uint32_t i = sizeof(uint16_t);
|
||||
unsigned char *buffer = (unsigned char *)malloc (buff_size);
|
||||
|
||||
if (NULL == buffer)
|
||||
return 0;
|
||||
|
||||
while (--tree_depth) {
|
||||
if (initial_offset == test_offset) {
|
||||
FREE(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
test_offset = initial_offset;
|
||||
if (chm_retrieve_object (chmfile, ui, buffer,
|
||||
initial_offset, buff_size) == 0) {
|
||||
FREE(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
free_space = get_uint16 (buffer);
|
||||
|
||||
while (i < buff_size - free_space) {
|
||||
|
||||
word_len = *(buffer + i);
|
||||
pos = *(buffer + i + 1);
|
||||
|
||||
wrd_buf = (char*)malloc (word_len);
|
||||
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||
wrd_buf[word_len - 1] = 0;
|
||||
|
||||
if (pos == 0) {
|
||||
FREE (word);
|
||||
word = (char *) strdup (wrd_buf);
|
||||
} else {
|
||||
word = (char*)realloc (word, word_len + pos + 1);
|
||||
strcpy (word + pos, wrd_buf);
|
||||
}
|
||||
|
||||
FREE(wrd_buf);
|
||||
|
||||
if (strcasecmp (text, word) <= 0) {
|
||||
initial_offset = get_uint32 (buffer + i + word_len + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
|
||||
sizeof(uint16_t);
|
||||
}
|
||||
}
|
||||
|
||||
if(initial_offset == test_offset)
|
||||
initial_offset = 0;
|
||||
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
|
||||
return initial_offset;
|
||||
}
|
||||
|
||||
static inline int
|
||||
pychm_process_wlc (struct chmFile *chmfile,
|
||||
uint64_t wlc_count, uint64_t wlc_size,
|
||||
uint32_t wlc_offset, unsigned char ds,
|
||||
unsigned char dr, unsigned char cs,
|
||||
unsigned char cr, unsigned char ls,
|
||||
unsigned char lr, struct chmUnitInfo *uimain,
|
||||
struct chmUnitInfo* uitbl,
|
||||
struct chmUnitInfo *uistrings,
|
||||
struct chmUnitInfo* topics,
|
||||
struct chmUnitInfo *urlstr,
|
||||
PyObject *dict)
|
||||
{
|
||||
uint32_t stroff, urloff;
|
||||
uint64_t i, j, count;
|
||||
size_t length;
|
||||
int wlc_bit = 7;
|
||||
size_t off = 0;
|
||||
uint64_t index = 0;
|
||||
unsigned char entry[TOPICS_ENTRY_LEN];
|
||||
unsigned char combuf[COMMON_BUF_LEN];
|
||||
unsigned char *buffer = (unsigned char *)malloc (wlc_size);
|
||||
char *url = NULL;
|
||||
char *topic = NULL;
|
||||
|
||||
if (chm_retrieve_object(chmfile, uimain, buffer,
|
||||
wlc_offset, wlc_size) == 0) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < wlc_count; ++i) {
|
||||
|
||||
if(wlc_bit != 7) {
|
||||
++off;
|
||||
wlc_bit = 7;
|
||||
}
|
||||
|
||||
index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
|
||||
off += length;
|
||||
|
||||
if(chm_retrieve_object(chmfile, topics, entry,
|
||||
index * 16, TOPICS_ENTRY_LEN) == 0) {
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
stroff = get_uint32 (entry + 4);
|
||||
|
||||
FREE (topic);
|
||||
if (chm_retrieve_object (chmfile, uistrings, combuf,
|
||||
stroff, COMMON_BUF_LEN - 1) == 0) {
|
||||
topic = strdup ("Untitled in index");
|
||||
|
||||
} else {
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
|
||||
topic = strdup ((char*)combuf);
|
||||
}
|
||||
|
||||
urloff = get_uint32 (entry + 8);
|
||||
|
||||
if(chm_retrieve_object (chmfile, uitbl, combuf,
|
||||
urloff, 12) == 0) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
urloff = get_uint32 (combuf + 8);
|
||||
|
||||
if (chm_retrieve_object (chmfile, urlstr, combuf,
|
||||
urloff + 8, COMMON_BUF_LEN - 1) == 0) {
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
|
||||
FREE (url);
|
||||
url = strdup ((char*)combuf);
|
||||
|
||||
if (url && topic) {
|
||||
#ifdef __PYTHON__
|
||||
PyDict_SetItemString (dict, topic,
|
||||
PyString_FromString (url));
|
||||
#else
|
||||
printf ("%s ==> %s\n", url, topic);
|
||||
#endif
|
||||
}
|
||||
|
||||
count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
|
||||
off += length;
|
||||
|
||||
for (j = 0; j < count; ++j) {
|
||||
sr_int (buffer + off, &wlc_bit, ls, lr, &length);
|
||||
off += length;
|
||||
}
|
||||
}
|
||||
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
chm_search (struct chmFile *chmfile,
|
||||
const char *text, int whole_words,
|
||||
int titles_only, PyObject *dict)
|
||||
{
|
||||
unsigned char header[FTS_HEADER_LEN];
|
||||
unsigned char doc_index_s;
|
||||
unsigned char doc_index_r;
|
||||
unsigned char code_count_s;
|
||||
unsigned char code_count_r;
|
||||
unsigned char loc_codes_s;
|
||||
unsigned char loc_codes_r;
|
||||
unsigned char word_len, pos;
|
||||
unsigned char *buffer;
|
||||
char *word = NULL;
|
||||
uint32_t node_offset;
|
||||
uint32_t node_len;
|
||||
uint16_t tree_depth;
|
||||
uint32_t i;
|
||||
uint16_t free_space;
|
||||
uint64_t wlc_count, wlc_size;
|
||||
uint32_t wlc_offset;
|
||||
char *wrd_buf;
|
||||
unsigned char title;
|
||||
size_t encsz;
|
||||
struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
|
||||
int partial = false;
|
||||
|
||||
if (NULL == text)
|
||||
return -1;
|
||||
|
||||
if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
|
||||
CHM_RESOLVE_SUCCESS)
|
||||
return false;
|
||||
|
||||
if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
|
||||
return false;
|
||||
|
||||
doc_index_s = header[0x1E];
|
||||
doc_index_r = header[0x1F];
|
||||
code_count_s = header[0x20];
|
||||
code_count_r = header[0x21];
|
||||
loc_codes_s = header[0x22];
|
||||
loc_codes_r = header[0x23];
|
||||
|
||||
if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
node_offset = get_uint32 (header + 0x14);
|
||||
node_len = get_uint32 (header + 0x2e);
|
||||
tree_depth = get_uint16 (header + 0x18);
|
||||
|
||||
i = sizeof(uint16_t);
|
||||
|
||||
buffer = (unsigned char*)malloc (node_len);
|
||||
|
||||
node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
|
||||
tree_depth, &ui);
|
||||
|
||||
if (!node_offset) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
do {
|
||||
|
||||
if (chm_retrieve_object (chmfile, &ui, buffer,
|
||||
node_offset, node_len) == 0) {
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
free_space = get_uint16 (buffer + 6);
|
||||
|
||||
i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
|
||||
|
||||
encsz = 0;
|
||||
|
||||
while (i < node_len - free_space) {
|
||||
word_len = *(buffer + i);
|
||||
pos = *(buffer + i + 1);
|
||||
|
||||
wrd_buf = (char*)malloc (word_len);
|
||||
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||
wrd_buf[word_len - 1] = 0;
|
||||
|
||||
if (pos == 0) {
|
||||
FREE(word);
|
||||
word = (char *) strdup (wrd_buf);
|
||||
} else {
|
||||
word = (char*)realloc (word, word_len + pos + 1);
|
||||
strcpy (word + pos, wrd_buf);
|
||||
}
|
||||
|
||||
FREE(wrd_buf);
|
||||
|
||||
i += 2 + word_len;
|
||||
title = *(buffer + i - 1);
|
||||
|
||||
wlc_count = be_encint (buffer + i, &encsz);
|
||||
i += encsz;
|
||||
|
||||
wlc_offset = get_uint32 (buffer + i);
|
||||
|
||||
i += sizeof(uint32_t) + sizeof(uint16_t);
|
||||
wlc_size = be_encint (buffer + i, &encsz);
|
||||
i += encsz;
|
||||
|
||||
node_offset = get_uint32 (buffer);
|
||||
|
||||
if (!title && titles_only)
|
||||
continue;
|
||||
|
||||
if (whole_words && !strcasecmp(text, word)) {
|
||||
partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||
wlc_offset, doc_index_s,
|
||||
doc_index_r,code_count_s,
|
||||
code_count_r, loc_codes_s,
|
||||
loc_codes_r, &ui, &uiurltbl,
|
||||
&uistrings, &uitopics,
|
||||
&uiurlstr, dict);
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
return partial;
|
||||
}
|
||||
|
||||
if (!whole_words) {
|
||||
if (!strncasecmp (word, text, strlen(text))) {
|
||||
partial = true;
|
||||
pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||
wlc_offset, doc_index_s,
|
||||
doc_index_r,code_count_s,
|
||||
code_count_r, loc_codes_s,
|
||||
loc_codes_r, &ui, &uiurltbl,
|
||||
&uistrings, &uitopics,
|
||||
&uiurlstr, dict);
|
||||
|
||||
} else if (strncasecmp (text, word, strlen(text)) < -1)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
} while (!whole_words &&
|
||||
!strncmp (word, text, strlen(text)) &&
|
||||
node_offset);
|
||||
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
|
||||
return partial;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char *file;
|
||||
int offset;
|
||||
} Langrec;
|
||||
|
||||
Langrec lang_files[] = {
|
||||
{"/$FIftiMain", 0x7E},
|
||||
{"$WWKeywordLinks/BTree", 0x34},
|
||||
{"$WWAssociativeLinks/BTree", 0x34}
|
||||
};
|
||||
|
||||
#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
|
||||
|
||||
int
|
||||
chm_get_lcid (struct chmFile *chmfile) {
|
||||
struct chmUnitInfo ui;
|
||||
uint32_t lang;
|
||||
int i;
|
||||
|
||||
for (i=0; i<LANG_FILES_SIZE; i++) {
|
||||
|
||||
if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
|
||||
CHM_RESOLVE_SUCCESS) {
|
||||
|
||||
if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
|
||||
lang_files[i].offset, sizeof(uint32_t)) != 0)
|
||||
return lang;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __PYTHON__
|
||||
|
||||
static PyObject *
|
||||
is_searchable (PyObject *self, PyObject *args) {
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
struct chmUnitInfo ui;
|
||||
|
||||
if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
|
||||
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#TOPICS", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#STRINGS", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#URLTBL", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#URLSTR", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS)
|
||||
return Py_BuildValue ("i", 0);
|
||||
else
|
||||
return Py_BuildValue ("i", 1);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
search (PyObject *self, PyObject *args) {
|
||||
char *text;
|
||||
int whole_words;
|
||||
int titles_only;
|
||||
int partial;
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
PyObject *dict;
|
||||
|
||||
if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
|
||||
&whole_words, &titles_only)) {
|
||||
|
||||
dict = PyDict_New();
|
||||
|
||||
if (dict) {
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
partial = chm_search (file,
|
||||
text, whole_words, titles_only, dict);
|
||||
|
||||
return Py_BuildValue ("(iO)", partial, dict);
|
||||
|
||||
} else {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"Expected chmfile (not CHMFile!), string, int, int");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_lcid (PyObject *self, PyObject *args) {
|
||||
int code;
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
|
||||
if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
|
||||
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
code = chm_get_lcid (file);
|
||||
|
||||
if (code != -1)
|
||||
return Py_BuildValue ("i", code);
|
||||
else
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyMethodDef
|
||||
IndexMethods[] = {
|
||||
{"get_lcid", get_lcid, METH_VARARGS,
|
||||
"Returns LCID (Locale ID) for archive."},
|
||||
{"search", search, METH_VARARGS,
|
||||
"Perform Full-Text search."},
|
||||
{"is_searchable", is_searchable, METH_VARARGS,
|
||||
"Return 1 if it is possible to search the archive, 0 otherwise."},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
CALIBRE_MODINIT_FUNC
|
||||
initchm_extra (void) {
|
||||
Py_InitModule ("chm_extra", IndexMethods);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int
|
||||
main (int argc, char **argv) {
|
||||
struct chmFile *file;
|
||||
char text[255];
|
||||
int whole_words, titles_only;
|
||||
int partial;
|
||||
|
||||
if (argc == 2) {
|
||||
file = chm_open (argv[1]);
|
||||
|
||||
if (file) {
|
||||
printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
|
||||
while (1) {
|
||||
printf ("\n<whole_words> <titles_only> <string>\n");
|
||||
printf ("> ");
|
||||
if (scanf ("%d %d %s", &whole_words, &titles_only, text))
|
||||
partial = chm_search (file,
|
||||
text, whole_words, titles_only, NULL);
|
||||
else
|
||||
break;
|
||||
|
||||
printf ("Partial = %d\n", partial);
|
||||
}
|
||||
|
||||
chm_close (file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
} else {
|
||||
printf ("\n%s <filename>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user