diff --git a/setup/extensions.py b/setup/extensions.py index fcb59fbe30..28585c1630 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -72,7 +72,7 @@ extensions = [ Extension('hunspell', ['hunspell/'+x for x in - 'affentry.cxx affixmgr.cxx csutil.cxx dictmgr.cxx filemgr.cxx hashmgr.cxx hunspell.cxx hunzip.cxx phonet.cxx replist.cxx suggestmgr.cxx'.split() + 'affentry.cxx affixmgr.cxx csutil.cxx dictmgr.cxx filemgr.cxx hashmgr.cxx hunspell.cxx phonet.cxx replist.cxx suggestmgr.cxx'.split() ] + ['calibre/utils/spell/hunspell_wrapper.cpp',], inc_dirs=['hunspell'], cflags='/DHUNSPELL_STATIC /D_CRT_SECURE_NO_WARNINGS /DUNICODE /D_UNICODE'.split() if iswindows else ['-DHUNSPELL_STATIC'], diff --git a/src/calibre/utils/spell/hunspell_wrapper.cpp b/src/calibre/utils/spell/hunspell_wrapper.cpp index a4993abfe5..5f61595777 100644 --- a/src/calibre/utils/spell/hunspell_wrapper.cpp +++ b/src/calibre/utils/spell/hunspell_wrapper.cpp @@ -6,6 +6,7 @@ * Distributed under terms of the GPL3 license. */ +#define PY_SSIZE_T_CLEAN 1 #include #include #include @@ -21,15 +22,16 @@ static PyObject *HunspellError = NULL; static int init_type(Dictionary *self, PyObject *args, PyObject *kwds) { - char *dpath = NULL, *apath = NULL; + char *dic = NULL, *aff = NULL; + Py_ssize_t diclen, afflen; self->handle = NULL; self->encoding = NULL; - if (!PyArg_ParseTuple(args, "ss", &dpath, &apath)) return 1; + if (!PyArg_ParseTuple(args, "s#s#", &dic, &diclen, &aff, &afflen)) return 1; try { - self->handle = new (std::nothrow) Hunspell(apath, dpath); + self->handle = new (std::nothrow) Hunspell(aff, afflen, dic, diclen); } catch (const std::exception &ex) { PyErr_SetString(HunspellError, ex.what()); return 1; diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx index b9108d45e3..7f3c98160a 100644 --- a/src/hunspell/affixmgr.cxx +++ b/src/hunspell/affixmgr.cxx @@ -14,7 +14,7 @@ #include "csutil.hxx" -AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) +AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, int * md) { // register hash manager and load affix data from aff file pHMgr = ptr[0]; @@ -110,8 +110,8 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k contclasses[j] = 0; } - if (parse_file(affpath, key)) { - HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); + if (parse_file(aff_data, aff_len)) { + HUNSPELL_WARNING(stderr, "Failure loading aff file\n"); } if (cpdmin == -1) cpdmin = MINCPDLEN; @@ -255,7 +255,7 @@ AffixMgr::~AffixMgr() // read in aff file and build up prefix and suffix entry objects -int AffixMgr::parse_file(const char * affpath, const char * key) +int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) { char * line; // io buffers char ft; // affix type @@ -268,9 +268,9 @@ int AffixMgr::parse_file(const char * affpath, const char * key) int firstline = 1; // open the affix file - FileMgr * afflst = new FileMgr(affpath, key); + FileMgr * afflst = new FileMgr(aff_data, aff_len); if (!afflst) { - HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath); + HUNSPELL_WARNING(stderr, "error: could not open affix description file \n"); return 1; } diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx index d9c625aed0..0f20a024fd 100644 --- a/src/hunspell/affixmgr.hxx +++ b/src/hunspell/affixmgr.hxx @@ -109,8 +109,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr public: - AffixMgr(const char * affpath, HashMgr** ptr, int * md, - const char * key = NULL); + AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, int * md); ~AffixMgr(); struct hentry * affix_check(const char * word, int len, const unsigned short needflag = (unsigned short) 0, @@ -217,7 +216,7 @@ public: int get_fullstrip() const; private: - int parse_file(const char * affpath, const char * key); + int parse_file(const char *aff_data, const size_t aff_len); int parse_flag(char * line, unsigned short * out, FileMgr * af); int parse_num(char * line, int * out, FileMgr * af); int parse_cpdsyllable(char * line, FileMgr * af); diff --git a/src/hunspell/filemgr.cxx b/src/hunspell/filemgr.cxx index 5fb82bcf80..c3854588d8 100644 --- a/src/hunspell/filemgr.cxx +++ b/src/hunspell/filemgr.cxx @@ -3,45 +3,42 @@ #include #include -#include #include "filemgr.hxx" -int FileMgr::fail(const char * err, const char * par) { - fprintf(stderr, err, par); - return -1; -} - -FileMgr::FileMgr(const char * file, const char * key) { +FileMgr::FileMgr(const char *data, const size_t dlen) { linenum = 0; - hin = NULL; - fin = fopen(file, "r"); - if (!fin) { - // check hzipped file - char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); - if (st) { - strcpy(st, file); - strcat(st, HZIP_EXTENSION); - hin = new Hunzip(st, key); - free(st); - } - } - if (!fin && !hin) fail(MSG_OPEN, file); + last = 0; + buf = new char[dlen+1]; + memcpy(buf, data, dlen); + buf[dlen] = 0; + pos = buf; + buflen = dlen; } FileMgr::~FileMgr() { - if (fin) fclose(fin); - if (hin) delete hin; + if (buf != NULL) { delete[] buf; buf = NULL; } + pos = NULL; } char * FileMgr::getline() { - const char * l; + if (buf == NULL) return NULL; + if (((size_t)(pos - buf)) >= buflen) { + // free up the memory as it will not be needed anymore + delete[] buf; buf = NULL; pos = NULL; return NULL; + } + if (pos != buf) *pos = last; // Restore the character that was previously replaced by null + char *ans = pos; + // Move pos to the start of the next line + pos = (char *)memchr(pos, 10, buflen - (pos - buf)); + if (pos == NULL) pos = buf + buflen + 1; + else pos++; + // Ensure the current line is null terminated + last = *pos; + *pos = 0; linenum++; - if (fin) return fgets(in, BUFSIZE - 1, fin); - if (hin && (l = hin->getline())) return strcpy(in, l); - linenum--; - return NULL; + return ans; } int FileMgr::getlinenum() { diff --git a/src/hunspell/filemgr.hxx b/src/hunspell/filemgr.hxx index 94cb7233d8..99edd8f758 100644 --- a/src/hunspell/filemgr.hxx +++ b/src/hunspell/filemgr.hxx @@ -4,20 +4,17 @@ #include "hunvisapi.h" -#include "hunzip.hxx" -#include - class LIBHUNSPELL_DLL_EXPORTED FileMgr { protected: - FILE * fin; - Hunzip * hin; - char in[BUFSIZE + 50]; // input buffer - int fail(const char * err, const char * par); + char *buf; + char *pos; + size_t buflen; + char last; int linenum; public: - FileMgr(const char * filename, const char * key = NULL); + FileMgr(const char *data, const size_t dlen); ~FileMgr(); char * getline(); int getlinenum(); diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx index ea93b8787c..956bb4b5c0 100644 --- a/src/hunspell/hashmgr.cxx +++ b/src/hunspell/hashmgr.cxx @@ -10,9 +10,11 @@ #include "csutil.hxx" #include "atypes.hxx" +#define BUFSIZE 65536 + // build a hash table from a munched word list -HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) +HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len) { tablesize = 0; tableptr = NULL; @@ -31,8 +33,8 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) numaliasm = 0; aliasm = NULL; forbiddenword = FORBIDDENWORD; // forbidden word signing flag - load_config(apath, key); - int ec = load_tables(tpath, key); + load_config(aff_data, aff_len); + int ec = load_tables(dic_data, dic_len); if (ec) { /* error condition - what should we do here */ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); @@ -349,7 +351,7 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const } // load a munched word list and build a hash table on the fly -int HashMgr::load_tables(const char * tpath, const char * key) +int HashMgr::load_tables(const char *dic_data, const size_t dic_len) { int al; char * ap; @@ -359,7 +361,7 @@ int HashMgr::load_tables(const char * tpath, const char * key) char * ts; // open dictionary file - FileMgr * dict = new FileMgr(tpath, key); + FileMgr * dict = new FileMgr(dic_data, dic_len); if (dict == NULL) return 1; // first read the first line of file to get hash table size */ @@ -601,15 +603,15 @@ char * HashMgr::encode_flag(unsigned short f) { } // read in aff file and set flag mode -int HashMgr::load_config(const char * affpath, const char * key) +int HashMgr::load_config(const char *aff_data, const size_t aff_len) { char * line; // io buffers int firstline = 1; // open the affix file - FileMgr * afflst = new FileMgr(affpath, key); + FileMgr * afflst = new FileMgr(aff_data, aff_len); if (!afflst) { - HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); + HUNSPELL_WARNING(stderr, "Error - could not open affix description file"); return 1; } diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx index 341b08131c..0f7b2ab090 100644 --- a/src/hunspell/hashmgr.hxx +++ b/src/hunspell/hashmgr.hxx @@ -34,7 +34,7 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr public: - HashMgr(const char * tpath, const char * apath, const char * key = NULL); + HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len); ~HashMgr(); struct hentry * lookup(const char *) const; @@ -54,10 +54,10 @@ public: private: int get_clen_and_captype(const char * word, int wbl, int * captype); - int load_tables(const char * tpath, const char * key); + int load_tables(const char *dic_data, const size_t dic_len); int add_word(const char * word, int wbl, int wcl, unsigned short * ap, int al, const char * desc, bool onlyupcase); - int load_config(const char * affpath, const char * key); + int load_config(const char *aff_data, const size_t aff_len); int parse_aliasf(char * line, FileMgr * af); int add_hidden_capitalized_word(char * word, int wbl, int wcl, unsigned short * flags, int al, char * dp, int captype); diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx index a9b261a378..83878ff60b 100644 --- a/src/hunspell/hunspell.cxx +++ b/src/hunspell/hunspell.cxx @@ -6,28 +6,26 @@ #include #include "hunspell.hxx" -#include "hunspell.h" #ifndef MOZILLA_CLIENT # include "config.h" #endif #include "csutil.hxx" -Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) +Hunspell::Hunspell(const char *affix_data, const size_t aff_len, const char *dic_data, const size_t dic_len) { encoding = NULL; csconv = NULL; utf8 = 0; complexprefixes = 0; - affixpath = mystrdup(affpath); maxdic = 0; /* first set up the hash manager */ - pHMgr[0] = new HashMgr(dpath, affpath, key); + pHMgr[0] = new HashMgr(affix_data, aff_len, dic_data, dic_len); if (pHMgr[0]) maxdic = 1; /* next set up the affix manager */ /* it needs access to the hash manager lookup methods */ - pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); + pAMgr = new AffixMgr(affix_data, aff_len, pHMgr, &maxdic); /* get the preferred try string and the dictionary */ /* encoding from the Affix Manager for that dictionary */ @@ -59,16 +57,6 @@ Hunspell::~Hunspell() csconv= NULL; if (encoding) free(encoding); encoding = NULL; - if (affixpath) free(affixpath); - affixpath = NULL; -} - -// load extra dictionaries -int Hunspell::add_dic(const char * dpath, const char * key) { - if (maxdic == MAXDIC || !affixpath) return 1; - pHMgr[maxdic] = new HashMgr(dpath, affixpath, key); - if (pHMgr[maxdic]) maxdic++; else return 1; - return 0; } // make a copy of src at destination while removing all leading @@ -1919,88 +1907,4 @@ char * Hunspell::morph_with_correction(const char * word) #endif // END OF HUNSPELL_EXPERIMENTAL CODE -Hunhandle *Hunspell_create(const char * affpath, const char * dpath) -{ - return (Hunhandle*)(new Hunspell(affpath, dpath)); -} -Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, - const char * key) -{ - return (Hunhandle*)(new Hunspell(affpath, dpath, key)); -} - -void Hunspell_destroy(Hunhandle *pHunspell) -{ - delete (Hunspell*)(pHunspell); -} - -int Hunspell_spell(Hunhandle *pHunspell, const char *word) -{ - return ((Hunspell*)pHunspell)->spell(word); -} - -char *Hunspell_get_dic_encoding(Hunhandle *pHunspell) -{ - return ((Hunspell*)pHunspell)->get_dic_encoding(); -} - -int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word) -{ - return ((Hunspell*)pHunspell)->suggest(slst, word); -} - -int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word) -{ - return ((Hunspell*)pHunspell)->analyze(slst, word); -} - -int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word) -{ - return ((Hunspell*)pHunspell)->stem(slst, word); -} - -int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n) -{ - return ((Hunspell*)pHunspell)->stem(slst, desc, n); -} - -int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word, - const char * word2) -{ - return ((Hunspell*)pHunspell)->generate(slst, word, word2); -} - -int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, - char** desc, int n) -{ - return ((Hunspell*)pHunspell)->generate(slst, word, desc, n); -} - - /* functions for run-time modification of the dictionary */ - - /* add word to the run-time dictionary */ - -int Hunspell_add(Hunhandle *pHunspell, const char * word) { - return ((Hunspell*)pHunspell)->add(word); -} - - /* add word to the run-time dictionary with affix flags of - * the example (a dictionary word): Hunspell will recognize - * affixed forms of the new word, too. - */ - -int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, - const char * example) { - return ((Hunspell*)pHunspell)->add_with_affix(word, example); -} - - /* remove word from the run-time dictionary */ - -int Hunspell_remove(Hunhandle *pHunspell, const char * word) { - return ((Hunspell*)pHunspell)->remove(word); -} - -void Hunspell_free_list(Hunhandle *, char *** slst, int n) { - freelist(slst, n); -} diff --git a/src/hunspell/hunspell.h b/src/hunspell/hunspell.h deleted file mode 100644 index 627968a3da..0000000000 --- a/src/hunspell/hunspell.h +++ /dev/null @@ -1,95 +0,0 @@ -#ifndef _MYSPELLMGR_H_ -#define _MYSPELLMGR_H_ - -#include "hunvisapi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct Hunhandle Hunhandle; - -LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath); - -LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, - const char * key); - -LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell); - -/* spell(word) - spellcheck word - * output: 0 = bad word, not 0 = good word - */ -LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *); - -LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell); - -/* suggest(suggestions, word) - search suggestions - * input: pointer to an array of strings pointer and the (bad) word - * array of strings pointer (here *slst) may not be initialized - * output: number of suggestions in string array, and suggestions in - * a newly allocated array of strings (*slts will be NULL when number - * of suggestion equals 0.) - */ -LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word); - - /* morphological functions */ - - /* analyze(result, word) - morphological analysis of the word */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word); - - /* stem(result, word) - stemmer function */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word); - - /* stem(result, analysis, n) - get stems from a morph. analysis - * example: - * char ** result, result2; - * int n1 = Hunspell_analyze(result, "words"); - * int n2 = Hunspell_stem2(result2, result, n1); - */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n); - - /* generate(result, word, word2) - morphological generation by example(s) */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word, - const char * word2); - - /* generate(result, word, desc, n) - generation by morph. description(s) - * example: - * char ** result; - * char * affix = "is:plural"; // description depends from dictionaries, too - * int n = Hunspell_generate2(result, "word", &affix, 1); - * for (int i = 0; i < n; i++) printf("%s\n", result[i]); - */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, - char** desc, int n); - - /* functions for run-time modification of the dictionary */ - - /* add word to the run-time dictionary */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word); - - /* add word to the run-time dictionary with affix flags of - * the example (a dictionary word): Hunspell will recognize - * affixed forms of the new word, too. - */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example); - - /* remove word from the run-time dictionary */ - -LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word); - - /* free suggestion lists */ - -LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx index 9b6c388108..5fc88eaa8a 100644 --- a/src/hunspell/hunspell.hxx +++ b/src/hunspell/hunspell.hxx @@ -23,7 +23,6 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell HashMgr* pHMgr[MAXDIC]; int maxdic; SuggestMgr* pSMgr; - char * affixpath; char * encoding; struct cs_info * csconv; int langnum; @@ -34,15 +33,12 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell public: /* Hunspell(aff, dic) - constructor of Hunspell class - * input: path of affix file and dictionary file + * input: The affix and dictionary data as bytes */ - Hunspell(const char * affpath, const char * dpath, const char * key = NULL); + Hunspell(const char *affix_data, const size_t affix_len, const char *dic_data, const size_t dic_len); ~Hunspell(); - /* load extra dictionaries (only dic files) */ - int add_dic(const char * dpath, const char * key = NULL); - /* spell(word) - spellcheck word * output: 0 = bad word, not 0 = good word * diff --git a/src/hunspell/hunzip.cxx b/src/hunspell/hunzip.cxx deleted file mode 100644 index b50599fa84..0000000000 --- a/src/hunspell/hunzip.cxx +++ /dev/null @@ -1,193 +0,0 @@ -#include -#include -#include - -#include "hunzip.hxx" - -#define CODELEN 65536 -#define BASEBITREC 5000 - -#define UNCOMPRESSED '\002' -#define MAGIC "hz0" -#define MAGIC_ENCRYPT "hz1" -#define MAGICLEN (sizeof(MAGIC) - 1) - -int Hunzip::fail(const char * err, const char * par) { - fprintf(stderr, err, par); - return -1; -} - -Hunzip::Hunzip(const char * file, const char * key) { - bufsiz = 0; - lastbit = 0; - inc = 0; - outc = 0; - dec = NULL; - fin = NULL; - filename = (char *) malloc(strlen(file) + 1); - if (filename) strcpy(filename, file); - if (getcode(key) == -1) bufsiz = -1; - else bufsiz = getbuf(); -} - -int Hunzip::getcode(const char * key) { - unsigned char c[2]; - int i, j, n, p; - int allocatedbit = BASEBITREC; - const char * enc = key; - - if (!filename) return -1; - - fin = fopen(filename, "rb"); - if (!fin) return -1; - - // read magic number - if ((fread(in, 1, 3, fin) < MAGICLEN) - || !(strncmp(MAGIC, in, MAGICLEN) == 0 || - strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { - return fail(MSG_FORMAT, filename); - } - - // check encryption - if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { - unsigned char cs; - if (!key) return fail(MSG_KEY, filename); - if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); - for (cs = 0; *enc; enc++) cs ^= *enc; - if (cs != c[0]) return fail(MSG_KEY, filename); - enc = key; - } else key = NULL; - - // read record count - if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); - - if (key) { - c[0] ^= *enc; - if (*(++enc) == '\0') enc = key; - c[1] ^= *enc; - } - - n = ((int) c[0] << 8) + c[1]; - dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit)); - if (!dec) return fail(MSG_MEMORY, filename); - dec[0].v[0] = 0; - dec[0].v[1] = 0; - - // read codes - for (i = 0; i < n; i++) { - unsigned char l; - if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); - if (key) { - if (*(++enc) == '\0') enc = key; - c[0] ^= *enc; - if (*(++enc) == '\0') enc = key; - c[1] ^= *enc; - } - if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); - if (key) { - if (*(++enc) == '\0') enc = key; - l ^= *enc; - } - if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename); - if (key) for (j = 0; j <= l/8; j++) { - if (*(++enc) == '\0') enc = key; - in[j] ^= *enc; - } - p = 0; - for (j = 0; j < l; j++) { - int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0; - int oldp = p; - p = dec[p].v[b]; - if (p == 0) { - lastbit++; - if (lastbit == allocatedbit) { - allocatedbit += BASEBITREC; - dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit)); - } - dec[lastbit].v[0] = 0; - dec[lastbit].v[1] = 0; - dec[oldp].v[b] = lastbit; - p = lastbit; - } - } - dec[p].c[0] = c[0]; - dec[p].c[1] = c[1]; - } - return 0; -} - -Hunzip::~Hunzip() -{ - if (dec) free(dec); - if (fin) fclose(fin); - if (filename) free(filename); -} - -int Hunzip::getbuf() { - int p = 0; - int o = 0; - do { - if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8; - for (; inc < inbits; inc++) { - int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; - int oldp = p; - p = dec[p].v[b]; - if (p == 0) { - if (oldp == lastbit) { - fclose(fin); - fin = NULL; - // add last odd byte - if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1]; - return o; - } - out[o++] = dec[oldp].c[0]; - out[o++] = dec[oldp].c[1]; - if (o == BUFSIZE) return o; - p = dec[p].v[b]; - } - } - inc = 0; - } while (inbits == BUFSIZE * 8); - return fail(MSG_FORMAT, filename); -} - -const char * Hunzip::getline() { - char linebuf[BUFSIZE]; - int l = 0, eol = 0, left = 0, right = 0; - if (bufsiz == -1) return NULL; - while (l < bufsiz && !eol) { - linebuf[l++] = out[outc]; - switch (out[outc]) { - case '\t': break; - case 31: { // escape - if (++outc == bufsiz) { - bufsiz = getbuf(); - outc = 0; - } - linebuf[l - 1] = out[outc]; - break; - } - case ' ': break; - default: if (((unsigned char) out[outc]) < 47) { - if (out[outc] > 32) { - right = out[outc] - 31; - if (++outc == bufsiz) { - bufsiz = getbuf(); - outc = 0; - } - } - if (out[outc] == 30) left = 9; else left = out[outc]; - linebuf[l-1] = '\n'; - eol = 1; - } - } - if (++outc == bufsiz) { - outc = 0; - bufsiz = fin ? getbuf(): -1; - } - } - if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1); - else linebuf[l] = '\0'; - strcpy(line + left, linebuf); - return line; -} diff --git a/src/hunspell/hunzip.hxx b/src/hunspell/hunzip.hxx deleted file mode 100644 index b58e3ab1dc..0000000000 --- a/src/hunspell/hunzip.hxx +++ /dev/null @@ -1,45 +0,0 @@ -/* hunzip: file decompression for sorted dictionaries with optional encryption, - * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */ - -#ifndef _HUNZIP_HXX_ -#define _HUNZIP_HXX_ - -#include "hunvisapi.h" - -#include - -#define BUFSIZE 65536 -#define HZIP_EXTENSION ".hz" - -#define MSG_OPEN "error: %s: cannot open\n" -#define MSG_FORMAT "error: %s: not in hzip format\n" -#define MSG_MEMORY "error: %s: missing memory\n" -#define MSG_KEY "error: %s: missing or bad password\n" - -struct bit { - unsigned char c[2]; - int v[2]; -}; - -class LIBHUNSPELL_DLL_EXPORTED Hunzip -{ - -protected: - char * filename; - FILE * fin; - int bufsiz, lastbit, inc, inbits, outc; - struct bit * dec; // code table - char in[BUFSIZE]; // input buffer - char out[BUFSIZE + 1]; // Huffman-decoded buffer - char line[BUFSIZE + 50]; // decoded line - int getcode(const char * key); - int getbuf(); - int fail(const char * err, const char * par); - -public: - Hunzip(const char * filename, const char * key = NULL); - ~Hunzip(); - const char * getline(); -}; - -#endif