mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Refactor hunspell to allow virtualization of file I/O
This commit is contained in:
parent
5f4110524b
commit
da7ef93e27
@ -72,7 +72,7 @@ extensions = [
|
||||
|
||||
Extension('hunspell',
|
||||
['hunspell/'+x for x in
|
||||
'affentry.cxx affixmgr.cxx csutil.cxx dictmgr.cxx filemgr.cxx hashmgr.cxx hunspell.cxx hunzip.cxx phonet.cxx replist.cxx suggestmgr.cxx'.split()
|
||||
'affentry.cxx affixmgr.cxx csutil.cxx dictmgr.cxx filemgr.cxx hashmgr.cxx hunspell.cxx phonet.cxx replist.cxx suggestmgr.cxx'.split()
|
||||
] + ['calibre/utils/spell/hunspell_wrapper.cpp',],
|
||||
inc_dirs=['hunspell'],
|
||||
cflags='/DHUNSPELL_STATIC /D_CRT_SECURE_NO_WARNINGS /DUNICODE /D_UNICODE'.split() if iswindows else ['-DHUNSPELL_STATIC'],
|
||||
|
@ -6,6 +6,7 @@
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define PY_SSIZE_T_CLEAN 1
|
||||
#include <Python.h>
|
||||
#include <new>
|
||||
#include <string>
|
||||
@ -21,15 +22,16 @@ static PyObject *HunspellError = NULL;
|
||||
|
||||
static int
|
||||
init_type(Dictionary *self, PyObject *args, PyObject *kwds) {
|
||||
char *dpath = NULL, *apath = NULL;
|
||||
char *dic = NULL, *aff = NULL;
|
||||
Py_ssize_t diclen, afflen;
|
||||
|
||||
self->handle = NULL;
|
||||
self->encoding = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "ss", &dpath, &apath)) return 1;
|
||||
if (!PyArg_ParseTuple(args, "s#s#", &dic, &diclen, &aff, &afflen)) return 1;
|
||||
|
||||
try {
|
||||
self->handle = new (std::nothrow) Hunspell(apath, dpath);
|
||||
self->handle = new (std::nothrow) Hunspell(aff, afflen, dic, diclen);
|
||||
} catch (const std::exception &ex) {
|
||||
PyErr_SetString(HunspellError, ex.what());
|
||||
return 1;
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#include "csutil.hxx"
|
||||
|
||||
AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
|
||||
AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, int * md)
|
||||
{
|
||||
// register hash manager and load affix data from aff file
|
||||
pHMgr = ptr[0];
|
||||
@ -110,8 +110,8 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k
|
||||
contclasses[j] = 0;
|
||||
}
|
||||
|
||||
if (parse_file(affpath, key)) {
|
||||
HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
|
||||
if (parse_file(aff_data, aff_len)) {
|
||||
HUNSPELL_WARNING(stderr, "Failure loading aff file\n");
|
||||
}
|
||||
|
||||
if (cpdmin == -1) cpdmin = MINCPDLEN;
|
||||
@ -255,7 +255,7 @@ AffixMgr::~AffixMgr()
|
||||
|
||||
|
||||
// read in aff file and build up prefix and suffix entry objects
|
||||
int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||
int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
{
|
||||
char * line; // io buffers
|
||||
char ft; // affix type
|
||||
@ -268,9 +268,9 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||
int firstline = 1;
|
||||
|
||||
// open the affix file
|
||||
FileMgr * afflst = new FileMgr(affpath, key);
|
||||
FileMgr * afflst = new FileMgr(aff_data, aff_len);
|
||||
if (!afflst) {
|
||||
HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
|
||||
HUNSPELL_WARNING(stderr, "error: could not open affix description file \n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -109,8 +109,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||
|
||||
public:
|
||||
|
||||
AffixMgr(const char * affpath, HashMgr** ptr, int * md,
|
||||
const char * key = NULL);
|
||||
AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, int * md);
|
||||
~AffixMgr();
|
||||
struct hentry * affix_check(const char * word, int len,
|
||||
const unsigned short needflag = (unsigned short) 0,
|
||||
@ -217,7 +216,7 @@ public:
|
||||
int get_fullstrip() const;
|
||||
|
||||
private:
|
||||
int parse_file(const char * affpath, const char * key);
|
||||
int parse_file(const char *aff_data, const size_t aff_len);
|
||||
int parse_flag(char * line, unsigned short * out, FileMgr * af);
|
||||
int parse_num(char * line, int * out, FileMgr * af);
|
||||
int parse_cpdsyllable(char * line, FileMgr * af);
|
||||
|
@ -3,45 +3,42 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "filemgr.hxx"
|
||||
|
||||
int FileMgr::fail(const char * err, const char * par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
FileMgr::FileMgr(const char * file, const char * key) {
|
||||
FileMgr::FileMgr(const char *data, const size_t dlen) {
|
||||
linenum = 0;
|
||||
hin = NULL;
|
||||
fin = fopen(file, "r");
|
||||
if (!fin) {
|
||||
// check hzipped file
|
||||
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
|
||||
if (st) {
|
||||
strcpy(st, file);
|
||||
strcat(st, HZIP_EXTENSION);
|
||||
hin = new Hunzip(st, key);
|
||||
free(st);
|
||||
}
|
||||
}
|
||||
if (!fin && !hin) fail(MSG_OPEN, file);
|
||||
last = 0;
|
||||
buf = new char[dlen+1];
|
||||
memcpy(buf, data, dlen);
|
||||
buf[dlen] = 0;
|
||||
pos = buf;
|
||||
buflen = dlen;
|
||||
}
|
||||
|
||||
FileMgr::~FileMgr()
|
||||
{
|
||||
if (fin) fclose(fin);
|
||||
if (hin) delete hin;
|
||||
if (buf != NULL) { delete[] buf; buf = NULL; }
|
||||
pos = NULL;
|
||||
}
|
||||
|
||||
char * FileMgr::getline() {
|
||||
const char * l;
|
||||
if (buf == NULL) return NULL;
|
||||
if (((size_t)(pos - buf)) >= buflen) {
|
||||
// free up the memory as it will not be needed anymore
|
||||
delete[] buf; buf = NULL; pos = NULL; return NULL;
|
||||
}
|
||||
if (pos != buf) *pos = last; // Restore the character that was previously replaced by null
|
||||
char *ans = pos;
|
||||
// Move pos to the start of the next line
|
||||
pos = (char *)memchr(pos, 10, buflen - (pos - buf));
|
||||
if (pos == NULL) pos = buf + buflen + 1;
|
||||
else pos++;
|
||||
// Ensure the current line is null terminated
|
||||
last = *pos;
|
||||
*pos = 0;
|
||||
linenum++;
|
||||
if (fin) return fgets(in, BUFSIZE - 1, fin);
|
||||
if (hin && (l = hin->getline())) return strcpy(in, l);
|
||||
linenum--;
|
||||
return NULL;
|
||||
return ans;
|
||||
}
|
||||
|
||||
int FileMgr::getlinenum() {
|
||||
|
@ -4,20 +4,17 @@
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include <stdio.h>
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
||||
{
|
||||
protected:
|
||||
FILE * fin;
|
||||
Hunzip * hin;
|
||||
char in[BUFSIZE + 50]; // input buffer
|
||||
int fail(const char * err, const char * par);
|
||||
char *buf;
|
||||
char *pos;
|
||||
size_t buflen;
|
||||
char last;
|
||||
int linenum;
|
||||
|
||||
public:
|
||||
FileMgr(const char * filename, const char * key = NULL);
|
||||
FileMgr(const char *data, const size_t dlen);
|
||||
~FileMgr();
|
||||
char * getline();
|
||||
int getlinenum();
|
||||
|
@ -10,9 +10,11 @@
|
||||
#include "csutil.hxx"
|
||||
#include "atypes.hxx"
|
||||
|
||||
#define BUFSIZE 65536
|
||||
|
||||
// build a hash table from a munched word list
|
||||
|
||||
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
||||
HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len)
|
||||
{
|
||||
tablesize = 0;
|
||||
tableptr = NULL;
|
||||
@ -31,8 +33,8 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
||||
numaliasm = 0;
|
||||
aliasm = NULL;
|
||||
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
|
||||
load_config(apath, key);
|
||||
int ec = load_tables(tpath, key);
|
||||
load_config(aff_data, aff_len);
|
||||
int ec = load_tables(dic_data, dic_len);
|
||||
if (ec) {
|
||||
/* error condition - what should we do here */
|
||||
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
|
||||
@ -349,7 +351,7 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
||||
}
|
||||
|
||||
// load a munched word list and build a hash table on the fly
|
||||
int HashMgr::load_tables(const char * tpath, const char * key)
|
||||
int HashMgr::load_tables(const char *dic_data, const size_t dic_len)
|
||||
{
|
||||
int al;
|
||||
char * ap;
|
||||
@ -359,7 +361,7 @@ int HashMgr::load_tables(const char * tpath, const char * key)
|
||||
char * ts;
|
||||
|
||||
// open dictionary file
|
||||
FileMgr * dict = new FileMgr(tpath, key);
|
||||
FileMgr * dict = new FileMgr(dic_data, dic_len);
|
||||
if (dict == NULL) return 1;
|
||||
|
||||
// first read the first line of file to get hash table size */
|
||||
@ -601,15 +603,15 @@ char * HashMgr::encode_flag(unsigned short f) {
|
||||
}
|
||||
|
||||
// read in aff file and set flag mode
|
||||
int HashMgr::load_config(const char * affpath, const char * key)
|
||||
int HashMgr::load_config(const char *aff_data, const size_t aff_len)
|
||||
{
|
||||
char * line; // io buffers
|
||||
int firstline = 1;
|
||||
|
||||
// open the affix file
|
||||
FileMgr * afflst = new FileMgr(affpath, key);
|
||||
FileMgr * afflst = new FileMgr(aff_data, aff_len);
|
||||
if (!afflst) {
|
||||
HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
|
||||
HUNSPELL_WARNING(stderr, "Error - could not open affix description file");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
||||
|
||||
|
||||
public:
|
||||
HashMgr(const char * tpath, const char * apath, const char * key = NULL);
|
||||
HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len);
|
||||
~HashMgr();
|
||||
|
||||
struct hentry * lookup(const char *) const;
|
||||
@ -54,10 +54,10 @@ public:
|
||||
|
||||
private:
|
||||
int get_clen_and_captype(const char * word, int wbl, int * captype);
|
||||
int load_tables(const char * tpath, const char * key);
|
||||
int load_tables(const char *dic_data, const size_t dic_len);
|
||||
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
|
||||
int al, const char * desc, bool onlyupcase);
|
||||
int load_config(const char * affpath, const char * key);
|
||||
int load_config(const char *aff_data, const size_t aff_len);
|
||||
int parse_aliasf(char * line, FileMgr * af);
|
||||
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||
unsigned short * flags, int al, char * dp, int captype);
|
||||
|
@ -6,28 +6,26 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "hunspell.hxx"
|
||||
#include "hunspell.h"
|
||||
#ifndef MOZILLA_CLIENT
|
||||
# include "config.h"
|
||||
#endif
|
||||
#include "csutil.hxx"
|
||||
|
||||
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
|
||||
Hunspell::Hunspell(const char *affix_data, const size_t aff_len, const char *dic_data, const size_t dic_len)
|
||||
{
|
||||
encoding = NULL;
|
||||
csconv = NULL;
|
||||
utf8 = 0;
|
||||
complexprefixes = 0;
|
||||
affixpath = mystrdup(affpath);
|
||||
maxdic = 0;
|
||||
|
||||
/* first set up the hash manager */
|
||||
pHMgr[0] = new HashMgr(dpath, affpath, key);
|
||||
pHMgr[0] = new HashMgr(affix_data, aff_len, dic_data, dic_len);
|
||||
if (pHMgr[0]) maxdic = 1;
|
||||
|
||||
/* next set up the affix manager */
|
||||
/* it needs access to the hash manager lookup methods */
|
||||
pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
|
||||
pAMgr = new AffixMgr(affix_data, aff_len, pHMgr, &maxdic);
|
||||
|
||||
/* get the preferred try string and the dictionary */
|
||||
/* encoding from the Affix Manager for that dictionary */
|
||||
@ -59,16 +57,6 @@ Hunspell::~Hunspell()
|
||||
csconv= NULL;
|
||||
if (encoding) free(encoding);
|
||||
encoding = NULL;
|
||||
if (affixpath) free(affixpath);
|
||||
affixpath = NULL;
|
||||
}
|
||||
|
||||
// load extra dictionaries
|
||||
int Hunspell::add_dic(const char * dpath, const char * key) {
|
||||
if (maxdic == MAXDIC || !affixpath) return 1;
|
||||
pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
|
||||
if (pHMgr[maxdic]) maxdic++; else return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// make a copy of src at destination while removing all leading
|
||||
@ -1919,88 +1907,4 @@ char * Hunspell::morph_with_correction(const char * word)
|
||||
|
||||
#endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
||||
|
||||
Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
|
||||
{
|
||||
return (Hunhandle*)(new Hunspell(affpath, dpath));
|
||||
}
|
||||
|
||||
Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
||||
const char * key)
|
||||
{
|
||||
return (Hunhandle*)(new Hunspell(affpath, dpath, key));
|
||||
}
|
||||
|
||||
void Hunspell_destroy(Hunhandle *pHunspell)
|
||||
{
|
||||
delete (Hunspell*)(pHunspell);
|
||||
}
|
||||
|
||||
int Hunspell_spell(Hunhandle *pHunspell, const char *word)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->spell(word);
|
||||
}
|
||||
|
||||
char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->get_dic_encoding();
|
||||
}
|
||||
|
||||
int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->suggest(slst, word);
|
||||
}
|
||||
|
||||
int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->analyze(slst, word);
|
||||
}
|
||||
|
||||
int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->stem(slst, word);
|
||||
}
|
||||
|
||||
int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->stem(slst, desc, n);
|
||||
}
|
||||
|
||||
int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
const char * word2)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->generate(slst, word, word2);
|
||||
}
|
||||
|
||||
int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
char** desc, int n)
|
||||
{
|
||||
return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
|
||||
}
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
int Hunspell_add(Hunhandle *pHunspell, const char * word) {
|
||||
return ((Hunspell*)pHunspell)->add(word);
|
||||
}
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
|
||||
const char * example) {
|
||||
return ((Hunspell*)pHunspell)->add_with_affix(word, example);
|
||||
}
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
|
||||
return ((Hunspell*)pHunspell)->remove(word);
|
||||
}
|
||||
|
||||
void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
|
||||
freelist(slst, n);
|
||||
}
|
||||
|
@ -1,95 +0,0 @@
|
||||
#ifndef _MYSPELLMGR_H_
|
||||
#define _MYSPELLMGR_H_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct Hunhandle Hunhandle;
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
||||
const char * key);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
* array of strings pointer (here *slst) may not be initialized
|
||||
* output: number of suggestions in string array, and suggestions in
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = Hunspell_analyze(result, "words");
|
||||
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
const char * word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
char** desc, int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
|
||||
|
||||
/* free suggestion lists */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -23,7 +23,6 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
int maxdic;
|
||||
SuggestMgr* pSMgr;
|
||||
char * affixpath;
|
||||
char * encoding;
|
||||
struct cs_info * csconv;
|
||||
int langnum;
|
||||
@ -34,15 +33,12 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||
public:
|
||||
|
||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||
* input: path of affix file and dictionary file
|
||||
* input: The affix and dictionary data as bytes
|
||||
*/
|
||||
|
||||
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
||||
Hunspell(const char *affix_data, const size_t affix_len, const char *dic_data, const size_t dic_len);
|
||||
~Hunspell();
|
||||
|
||||
/* load extra dictionaries (only dic files) */
|
||||
int add_dic(const char * dpath, const char * key = NULL);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*
|
||||
|
@ -1,193 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "hunzip.hxx"
|
||||
|
||||
#define CODELEN 65536
|
||||
#define BASEBITREC 5000
|
||||
|
||||
#define UNCOMPRESSED '\002'
|
||||
#define MAGIC "hz0"
|
||||
#define MAGIC_ENCRYPT "hz1"
|
||||
#define MAGICLEN (sizeof(MAGIC) - 1)
|
||||
|
||||
int Hunzip::fail(const char * err, const char * par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Hunzip::Hunzip(const char * file, const char * key) {
|
||||
bufsiz = 0;
|
||||
lastbit = 0;
|
||||
inc = 0;
|
||||
outc = 0;
|
||||
dec = NULL;
|
||||
fin = NULL;
|
||||
filename = (char *) malloc(strlen(file) + 1);
|
||||
if (filename) strcpy(filename, file);
|
||||
if (getcode(key) == -1) bufsiz = -1;
|
||||
else bufsiz = getbuf();
|
||||
}
|
||||
|
||||
int Hunzip::getcode(const char * key) {
|
||||
unsigned char c[2];
|
||||
int i, j, n, p;
|
||||
int allocatedbit = BASEBITREC;
|
||||
const char * enc = key;
|
||||
|
||||
if (!filename) return -1;
|
||||
|
||||
fin = fopen(filename, "rb");
|
||||
if (!fin) return -1;
|
||||
|
||||
// read magic number
|
||||
if ((fread(in, 1, 3, fin) < MAGICLEN)
|
||||
|| !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
|
||||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
// check encryption
|
||||
if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
|
||||
unsigned char cs;
|
||||
if (!key) return fail(MSG_KEY, filename);
|
||||
if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
|
||||
for (cs = 0; *enc; enc++) cs ^= *enc;
|
||||
if (cs != c[0]) return fail(MSG_KEY, filename);
|
||||
enc = key;
|
||||
} else key = NULL;
|
||||
|
||||
// read record count
|
||||
if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
|
||||
|
||||
if (key) {
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
|
||||
n = ((int) c[0] << 8) + c[1];
|
||||
dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
|
||||
if (!dec) return fail(MSG_MEMORY, filename);
|
||||
dec[0].v[0] = 0;
|
||||
dec[0].v[1] = 0;
|
||||
|
||||
// read codes
|
||||
for (i = 0; i < n; i++) {
|
||||
unsigned char l;
|
||||
if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
l ^= *enc;
|
||||
}
|
||||
if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename);
|
||||
if (key) for (j = 0; j <= l/8; j++) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
in[j] ^= *enc;
|
||||
}
|
||||
p = 0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
lastbit++;
|
||||
if (lastbit == allocatedbit) {
|
||||
allocatedbit += BASEBITREC;
|
||||
dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
|
||||
}
|
||||
dec[lastbit].v[0] = 0;
|
||||
dec[lastbit].v[1] = 0;
|
||||
dec[oldp].v[b] = lastbit;
|
||||
p = lastbit;
|
||||
}
|
||||
}
|
||||
dec[p].c[0] = c[0];
|
||||
dec[p].c[1] = c[1];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Hunzip::~Hunzip()
|
||||
{
|
||||
if (dec) free(dec);
|
||||
if (fin) fclose(fin);
|
||||
if (filename) free(filename);
|
||||
}
|
||||
|
||||
int Hunzip::getbuf() {
|
||||
int p = 0;
|
||||
int o = 0;
|
||||
do {
|
||||
if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
|
||||
for (; inc < inbits; inc++) {
|
||||
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
if (oldp == lastbit) {
|
||||
fclose(fin);
|
||||
fin = NULL;
|
||||
// add last odd byte
|
||||
if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1];
|
||||
return o;
|
||||
}
|
||||
out[o++] = dec[oldp].c[0];
|
||||
out[o++] = dec[oldp].c[1];
|
||||
if (o == BUFSIZE) return o;
|
||||
p = dec[p].v[b];
|
||||
}
|
||||
}
|
||||
inc = 0;
|
||||
} while (inbits == BUFSIZE * 8);
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
const char * Hunzip::getline() {
|
||||
char linebuf[BUFSIZE];
|
||||
int l = 0, eol = 0, left = 0, right = 0;
|
||||
if (bufsiz == -1) return NULL;
|
||||
while (l < bufsiz && !eol) {
|
||||
linebuf[l++] = out[outc];
|
||||
switch (out[outc]) {
|
||||
case '\t': break;
|
||||
case 31: { // escape
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
linebuf[l - 1] = out[outc];
|
||||
break;
|
||||
}
|
||||
case ' ': break;
|
||||
default: if (((unsigned char) out[outc]) < 47) {
|
||||
if (out[outc] > 32) {
|
||||
right = out[outc] - 31;
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
}
|
||||
if (out[outc] == 30) left = 9; else left = out[outc];
|
||||
linebuf[l-1] = '\n';
|
||||
eol = 1;
|
||||
}
|
||||
}
|
||||
if (++outc == bufsiz) {
|
||||
outc = 0;
|
||||
bufsiz = fin ? getbuf(): -1;
|
||||
}
|
||||
}
|
||||
if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
|
||||
else linebuf[l] = '\0';
|
||||
strcpy(line + left, linebuf);
|
||||
return line;
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||
|
||||
#ifndef _HUNZIP_HXX_
|
||||
#define _HUNZIP_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define BUFSIZE 65536
|
||||
#define HZIP_EXTENSION ".hz"
|
||||
|
||||
#define MSG_OPEN "error: %s: cannot open\n"
|
||||
#define MSG_FORMAT "error: %s: not in hzip format\n"
|
||||
#define MSG_MEMORY "error: %s: missing memory\n"
|
||||
#define MSG_KEY "error: %s: missing or bad password\n"
|
||||
|
||||
struct bit {
|
||||
unsigned char c[2];
|
||||
int v[2];
|
||||
};
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
||||
{
|
||||
|
||||
protected:
|
||||
char * filename;
|
||||
FILE * fin;
|
||||
int bufsiz, lastbit, inc, inbits, outc;
|
||||
struct bit * dec; // code table
|
||||
char in[BUFSIZE]; // input buffer
|
||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||
char line[BUFSIZE + 50]; // decoded line
|
||||
int getcode(const char * key);
|
||||
int getbuf();
|
||||
int fail(const char * err, const char * par);
|
||||
|
||||
public:
|
||||
Hunzip(const char * filename, const char * key = NULL);
|
||||
~Hunzip();
|
||||
const char * getline();
|
||||
};
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user