mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
De-vendor hunspell
This commit is contained in:
parent
9601d1c4ba
commit
22a1481bcd
@ -254,10 +254,6 @@ License: BSD
|
|||||||
The full text of the BSD license is distributed as in
|
The full text of the BSD license is distributed as in
|
||||||
/usr/share/common-licenses/BSD on Debian systems.
|
/usr/share/common-licenses/BSD on Debian systems.
|
||||||
|
|
||||||
Files: src/hunspell/*
|
|
||||||
Copyright: Various
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: src/calibre/gui2/tweak_book/diff/_patiencediff_c.c
|
Files: src/calibre/gui2/tweak_book/diff/_patiencediff_c.c
|
||||||
Copyright: Canonical
|
Copyright: Canonical
|
||||||
License: GPL-2+
|
License: GPL-2+
|
||||||
|
@ -384,6 +384,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "hunspell",
|
||||||
|
"unix": {
|
||||||
|
"filename": "hunspell-1.7.0.tar.gz",
|
||||||
|
"hash": "sha256:57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b90435944ff951",
|
||||||
|
"urls": ["https://github.com/hunspell/hunspell/files/2573619/{filename}"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "qt-base",
|
"name": "qt-base",
|
||||||
"version": "5.13.0",
|
"version": "5.13.0",
|
||||||
|
@ -126,6 +126,8 @@ icu_inc_dirs = []
|
|||||||
icu_lib_dirs = []
|
icu_lib_dirs = []
|
||||||
zlib_inc_dirs = []
|
zlib_inc_dirs = []
|
||||||
zlib_lib_dirs = []
|
zlib_lib_dirs = []
|
||||||
|
hunspell_inc_dirs = []
|
||||||
|
hunspell_lib_dirs = []
|
||||||
openssl_inc_dirs, openssl_lib_dirs = [], []
|
openssl_inc_dirs, openssl_lib_dirs = [], []
|
||||||
ICU = sw = ''
|
ICU = sw = ''
|
||||||
|
|
||||||
@ -152,6 +154,7 @@ elif isosx:
|
|||||||
sw_inc_dir = os.path.join(sw, 'include')
|
sw_inc_dir = os.path.join(sw, 'include')
|
||||||
sw_lib_dir = os.path.join(sw, 'lib')
|
sw_lib_dir = os.path.join(sw, 'lib')
|
||||||
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
|
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
|
||||||
|
hunspell_inc_dirs = [os.path.join(sw_inc_dir, 'hunspell')]
|
||||||
podofo_lib = sw_lib_dir
|
podofo_lib = sw_lib_dir
|
||||||
ft_libs = ['freetype']
|
ft_libs = ['freetype']
|
||||||
ft_inc_dirs = [sw + '/include/freetype2']
|
ft_inc_dirs = [sw + '/include/freetype2']
|
||||||
@ -163,6 +166,8 @@ else:
|
|||||||
'/usr/include/freetype2')
|
'/usr/include/freetype2')
|
||||||
ft_lib_dirs = pkgconfig_lib_dirs('freetype2', 'FT_LIB_DIR', '/usr/lib')
|
ft_lib_dirs = pkgconfig_lib_dirs('freetype2', 'FT_LIB_DIR', '/usr/lib')
|
||||||
ft_libs = pkgconfig_libs('freetype2', '', '')
|
ft_libs = pkgconfig_libs('freetype2', '', '')
|
||||||
|
hunspell_inc_dirs = pkgconfig_include_dirs('hunspell', 'HUNSPELL_INC_DIR', '/usr/include/hunspell')
|
||||||
|
hunspell_lib_dirs = pkgconfig_lib_dirs('hunspell', 'HUNSPELL_LIB_DIR', '/usr/lib')
|
||||||
sw = os.environ.get('SW', os.path.expanduser('~/sw'))
|
sw = os.environ.get('SW', os.path.expanduser('~/sw'))
|
||||||
podofo_inc = '/usr/include/podofo'
|
podofo_inc = '/usr/include/podofo'
|
||||||
podofo_lib = '/usr/lib'
|
podofo_lib = '/usr/lib'
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"name": "hunspell",
|
"name": "hunspell",
|
||||||
"sources": "hunspell/affentry.cxx hunspell/affixmgr.cxx hunspell/csutil.cxx hunspell/dictmgr.cxx hunspell/filemgr.cxx hunspell/hashmgr.cxx hunspell/hunspell.cxx hunspell/phonet.cxx hunspell/replist.cxx hunspell/suggestmgr.cxx calibre/utils/spell/hunspell_wrapper.cpp",
|
"sources": "calibre/utils/spell/hunspell_wrapper.cpp",
|
||||||
"inc_dirs": "hunspell",
|
"inc_dirs": "!hunspell_inc_dirs",
|
||||||
"defines": "HUNSPELL_STATIC",
|
"lib_dirs": "!hunspell_lib_dirs",
|
||||||
"windows_defines": "HUNSPELL_STATIC _CRT_SECURE_NO_WARNINGS UNICODE _UNICODE",
|
"libraries": "hunspell",
|
||||||
"optimize_level": 2
|
"needs_c++11": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "monotonic",
|
"name": "monotonic",
|
||||||
|
@ -1,22 +1,27 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2014, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
import glob
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
import os
|
||||||
|
import re
|
||||||
import os, glob, shutil, re, sys
|
import shutil
|
||||||
from collections import namedtuple, defaultdict
|
import sys
|
||||||
from itertools import chain
|
from collections import defaultdict, namedtuple
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.constants import plugins, config_dir
|
from calibre.constants import (
|
||||||
|
config_dir, filesystem_encoding, ispy3, iswindows, plugins
|
||||||
|
)
|
||||||
from calibre.spell import parse_lang_code
|
from calibre.spell import parse_lang_code
|
||||||
from calibre.utils.config import JSONConfig
|
from calibre.utils.config import JSONConfig
|
||||||
from calibre.utils.icu import capitalize
|
from calibre.utils.icu import capitalize
|
||||||
from calibre.utils.localization import get_lang, get_system_locale
|
from calibre.utils.localization import get_lang, get_system_locale
|
||||||
from polyglot.builtins import iteritems, itervalues, unicode_type, filter
|
from polyglot.builtins import filter, iteritems, itervalues, unicode_type
|
||||||
|
|
||||||
|
|
||||||
Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name id')
|
Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name id')
|
||||||
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name id')
|
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name id')
|
||||||
@ -163,11 +168,18 @@ def get_dictionary(locale, exact_match=False):
|
|||||||
|
|
||||||
|
|
||||||
def load_dictionary(dictionary):
|
def load_dictionary(dictionary):
|
||||||
from calibre.spell.import_from import convert_to_utf8
|
|
||||||
with open(dictionary.dicpath, 'rb') as dic, open(dictionary.affpath, 'rb') as aff:
|
def fix_path(path):
|
||||||
dic_data, aff_data = dic.read(), aff.read()
|
if isinstance(path, bytes):
|
||||||
dic_data, aff_data = convert_to_utf8(dic_data, aff_data)
|
path = path.decode(filesystem_encoding)
|
||||||
obj = hunspell.Dictionary(dic_data, aff_data)
|
path = os.path.abspath(path)
|
||||||
|
if iswindows:
|
||||||
|
path = r'\\?\{}'.format(path)
|
||||||
|
if not ispy3:
|
||||||
|
path = path.encode('utf-8')
|
||||||
|
return path
|
||||||
|
|
||||||
|
obj = hunspell.Dictionary(fix_path(dictionary.dicpath), fix_path(dictionary.affpath))
|
||||||
return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name, dictionary.id)
|
return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name, dictionary.id)
|
||||||
|
|
||||||
|
|
||||||
@ -414,6 +426,14 @@ class Dictionaries(object):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def build_test():
|
||||||
|
dictionaries = Dictionaries()
|
||||||
|
dictionaries.initialize()
|
||||||
|
eng = parse_lang_code('en')
|
||||||
|
if not dictionaries.recognized('recognized', locale=eng):
|
||||||
|
raise AssertionError('The word recognized was not recognized')
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
def find_tests():
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
@ -438,5 +458,6 @@ def find_tests():
|
|||||||
self.assertIn('one\u2010half', self.suggestions('oone\u2010half'))
|
self.assertIn('one\u2010half', self.suggestions('oone\u2010half'))
|
||||||
self.assertIn('adequately', self.suggestions('ade-quately'))
|
self.assertIn('adequately', self.suggestions('ade-quately'))
|
||||||
self.assertIn('magic. Wand', self.suggestions('magic.wand'))
|
self.assertIn('magic. Wand', self.suggestions('magic.wand'))
|
||||||
|
self.assertIn('List', self.suggestions('Lis𝑘t'))
|
||||||
|
|
||||||
return unittest.TestLoader().loadTestsFromTestCase(TestDictionaries)
|
return unittest.TestLoader().loadTestsFromTestCase(TestDictionaries)
|
||||||
|
@ -46,6 +46,10 @@ class BuildTest(unittest.TestCase):
|
|||||||
self.assertEqual(regex.findall(r'(?i)(a)(b)', 'ab cd AB 1a1b'), [('a', 'b'), ('A', 'B')])
|
self.assertEqual(regex.findall(r'(?i)(a)(b)', 'ab cd AB 1a1b'), [('a', 'b'), ('A', 'B')])
|
||||||
self.assertEqual(regex.escape('a b', literal_spaces=True), 'a b')
|
self.assertEqual(regex.escape('a b', literal_spaces=True), 'a b')
|
||||||
|
|
||||||
|
def test_hunspell(self):
|
||||||
|
from calibre.spell.dictionary import build_test
|
||||||
|
build_test()
|
||||||
|
|
||||||
def test_chardet(self):
|
def test_chardet(self):
|
||||||
from chardet import detect
|
from chardet import detect
|
||||||
raw = 'mūsi Füße'.encode('utf-8')
|
raw = 'mūsi Füße'.encode('utf-8')
|
||||||
|
@ -23,15 +23,14 @@ static PyObject *HunspellError = NULL;
|
|||||||
static int
|
static int
|
||||||
init_type(Dictionary *self, PyObject *args, PyObject *kwds) {
|
init_type(Dictionary *self, PyObject *args, PyObject *kwds) {
|
||||||
char *dic = NULL, *aff = NULL;
|
char *dic = NULL, *aff = NULL;
|
||||||
Py_ssize_t diclen, afflen;
|
|
||||||
|
|
||||||
self->handle = NULL;
|
self->handle = NULL;
|
||||||
self->encoding = NULL;
|
self->encoding = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "s#s#", &dic, &diclen, &aff, &afflen)) return 1;
|
if (!PyArg_ParseTuple(args, "ss", &dic, &aff)) return 1;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
self->handle = new (std::nothrow) Hunspell(aff, afflen, dic, diclen);
|
self->handle = new (std::nothrow) Hunspell(aff, dic);
|
||||||
} catch (const std::exception &ex) {
|
} catch (const std::exception &ex) {
|
||||||
PyErr_SetString(HunspellError, ex.what());
|
PyErr_SetString(HunspellError, ex.what());
|
||||||
return 1;
|
return 1;
|
||||||
@ -58,35 +57,33 @@ dealloc(Dictionary *self) {
|
|||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
recognized(Dictionary *self, PyObject *args) {
|
recognized(Dictionary *self, PyObject *args) {
|
||||||
char *word = NULL;
|
char *w = NULL;
|
||||||
if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
|
if (!PyArg_ParseTuple(args, "es", self->encoding, &w)) return NULL;
|
||||||
|
std::string word(w);
|
||||||
|
PyMem_Free(w);
|
||||||
|
|
||||||
if (self->handle->spell(word) == 0) { PyMem_Free(word); Py_RETURN_FALSE;}
|
if (!self->handle->spell(word)) { Py_RETURN_FALSE;}
|
||||||
PyMem_Free(word);
|
|
||||||
Py_RETURN_TRUE;
|
Py_RETURN_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
suggest(Dictionary *self, PyObject *args) {
|
suggest(Dictionary *self, PyObject *args) {
|
||||||
char *word = NULL, **slist = NULL;
|
char *w = NULL;
|
||||||
int i, num_slist;
|
|
||||||
PyObject *ans, *temp;
|
PyObject *ans, *temp;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
|
if (!PyArg_ParseTuple(args, "es", self->encoding, &w)) return NULL;
|
||||||
|
const std::string word(w);
|
||||||
|
PyMem_Free(w);
|
||||||
|
|
||||||
num_slist = self->handle->suggest(&slist, word);
|
const std::vector<std::string>& word_list = self->handle->suggest(word);
|
||||||
ans = PyTuple_New(num_slist);
|
ans = PyTuple_New(word_list.size());
|
||||||
if (ans == NULL) PyErr_NoMemory();
|
if (ans == NULL) PyErr_NoMemory();
|
||||||
else {
|
Py_ssize_t i = 0;
|
||||||
for (i = 0; i < num_slist; i++) {
|
for(auto const& s: word_list) {
|
||||||
temp = PyUnicode_Decode(slist[i], strlen(slist[i]), self->encoding, "strict");
|
temp = PyUnicode_Decode(s.c_str(), s.size(), self->encoding, "strict");
|
||||||
if (temp == NULL) { Py_DECREF(ans); ans = NULL; break; }
|
if (temp == NULL) { Py_DECREF(ans); ans = NULL; break; }
|
||||||
PyTuple_SET_ITEM(ans, i, temp);
|
PyTuple_SET_ITEM(ans, i++, temp);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (slist != NULL) self->handle->free_list(&slist, num_slist);
|
|
||||||
PyMem_Free(word);
|
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
lib_LTLIBRARIES = libhunspell-1.3.la
|
|
||||||
libhunspell_1_3_includedir = $(includedir)/hunspell
|
|
||||||
libhunspell_1_3_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \
|
|
||||||
dictmgr.cxx hashmgr.cxx hunspell.cxx \
|
|
||||||
suggestmgr.cxx license.myspell license.hunspell \
|
|
||||||
phonet.cxx filemgr.cxx hunzip.cxx replist.cxx
|
|
||||||
|
|
||||||
libhunspell_1_3_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \
|
|
||||||
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \
|
|
||||||
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \
|
|
||||||
phonet.hxx filemgr.hxx hunzip.hxx w_char.hxx replist.hxx \
|
|
||||||
hunvisapi.h
|
|
||||||
|
|
||||||
libhunspell_1_3_la_DEPENDENCIES=utf_info.cxx
|
|
||||||
libhunspell_1_3_la_LDFLAGS=-no-undefined
|
|
||||||
|
|
||||||
AM_CXXFLAGS=$(CFLAG_VISIBILITY) -DBUILDING_LIBHUNSPELL
|
|
||||||
|
|
||||||
EXTRA_DIST=hunspell.dsp makefile.mk README utf_info.cxx
|
|
@ -1,661 +0,0 @@
|
|||||||
# Makefile.in generated by automake 1.11.3 from Makefile.am.
|
|
||||||
# @configure_input@
|
|
||||||
|
|
||||||
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
|
||||||
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
|
|
||||||
# Foundation, Inc.
|
|
||||||
# This Makefile.in is free software; the Free Software Foundation
|
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
|
||||||
# with or without modifications, as long as this notice is preserved.
|
|
||||||
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
|
||||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
||||||
# PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
@SET_MAKE@
|
|
||||||
|
|
||||||
|
|
||||||
VPATH = @srcdir@
|
|
||||||
pkgdatadir = $(datadir)/@PACKAGE@
|
|
||||||
pkgincludedir = $(includedir)/@PACKAGE@
|
|
||||||
pkglibdir = $(libdir)/@PACKAGE@
|
|
||||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
|
||||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
|
||||||
install_sh_DATA = $(install_sh) -c -m 644
|
|
||||||
install_sh_PROGRAM = $(install_sh) -c
|
|
||||||
install_sh_SCRIPT = $(install_sh) -c
|
|
||||||
INSTALL_HEADER = $(INSTALL_DATA)
|
|
||||||
transform = $(program_transform_name)
|
|
||||||
NORMAL_INSTALL = :
|
|
||||||
PRE_INSTALL = :
|
|
||||||
POST_INSTALL = :
|
|
||||||
NORMAL_UNINSTALL = :
|
|
||||||
PRE_UNINSTALL = :
|
|
||||||
POST_UNINSTALL = :
|
|
||||||
build_triplet = @build@
|
|
||||||
host_triplet = @host@
|
|
||||||
target_triplet = @target@
|
|
||||||
subdir = src/hunspell
|
|
||||||
DIST_COMMON = README $(libhunspell_1_3_include_HEADERS) \
|
|
||||||
$(srcdir)/Makefile.am $(srcdir)/Makefile.in \
|
|
||||||
$(srcdir)/hunvisapi.h.in
|
|
||||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
|
||||||
am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
|
|
||||||
$(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \
|
|
||||||
$(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
|
|
||||||
$(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \
|
|
||||||
$(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \
|
|
||||||
$(top_srcdir)/m4/inttypes-pri.m4 \
|
|
||||||
$(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \
|
|
||||||
$(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
|
|
||||||
$(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
|
|
||||||
$(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
|
|
||||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
|
||||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
|
||||||
$(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
|
|
||||||
$(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \
|
|
||||||
$(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \
|
|
||||||
$(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \
|
|
||||||
$(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \
|
|
||||||
$(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
|
|
||||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
|
||||||
$(ACLOCAL_M4)
|
|
||||||
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
|
|
||||||
CONFIG_HEADER = $(top_builddir)/config.h
|
|
||||||
CONFIG_CLEAN_FILES = hunvisapi.h
|
|
||||||
CONFIG_CLEAN_VPATH_FILES =
|
|
||||||
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
|
||||||
am__vpath_adj = case $$p in \
|
|
||||||
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
|
||||||
*) f=$$p;; \
|
|
||||||
esac;
|
|
||||||
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
|
|
||||||
am__install_max = 40
|
|
||||||
am__nobase_strip_setup = \
|
|
||||||
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
|
|
||||||
am__nobase_strip = \
|
|
||||||
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
|
|
||||||
am__nobase_list = $(am__nobase_strip_setup); \
|
|
||||||
for p in $$list; do echo "$$p $$p"; done | \
|
|
||||||
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
|
|
||||||
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
|
|
||||||
if (++n[$$2] == $(am__install_max)) \
|
|
||||||
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
|
|
||||||
END { for (dir in files) print dir, files[dir] }'
|
|
||||||
am__base_list = \
|
|
||||||
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
|
|
||||||
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
|
|
||||||
am__uninstall_files_from_dir = { \
|
|
||||||
test -z "$$files" \
|
|
||||||
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|
|
||||||
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
|
|
||||||
$(am__cd) "$$dir" && rm -f $$files; }; \
|
|
||||||
}
|
|
||||||
am__installdirs = "$(DESTDIR)$(libdir)" \
|
|
||||||
"$(DESTDIR)$(libhunspell_1_3_includedir)"
|
|
||||||
LTLIBRARIES = $(lib_LTLIBRARIES)
|
|
||||||
libhunspell_1_3_la_LIBADD =
|
|
||||||
am_libhunspell_1_3_la_OBJECTS = affentry.lo affixmgr.lo csutil.lo \
|
|
||||||
dictmgr.lo hashmgr.lo hunspell.lo suggestmgr.lo phonet.lo \
|
|
||||||
filemgr.lo hunzip.lo replist.lo
|
|
||||||
libhunspell_1_3_la_OBJECTS = $(am_libhunspell_1_3_la_OBJECTS)
|
|
||||||
libhunspell_1_3_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
|
|
||||||
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
|
|
||||||
$(CXXFLAGS) $(libhunspell_1_3_la_LDFLAGS) $(LDFLAGS) -o $@
|
|
||||||
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
|
||||||
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
|
||||||
am__depfiles_maybe = depfiles
|
|
||||||
am__mv = mv -f
|
|
||||||
CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
|
||||||
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
|
|
||||||
LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
|
||||||
--mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
|
||||||
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
|
|
||||||
CXXLD = $(CXX)
|
|
||||||
CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
|
||||||
--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
|
|
||||||
$(LDFLAGS) -o $@
|
|
||||||
SOURCES = $(libhunspell_1_3_la_SOURCES)
|
|
||||||
DIST_SOURCES = $(libhunspell_1_3_la_SOURCES)
|
|
||||||
HEADERS = $(libhunspell_1_3_include_HEADERS)
|
|
||||||
ETAGS = etags
|
|
||||||
CTAGS = ctags
|
|
||||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
|
||||||
ACLOCAL = @ACLOCAL@
|
|
||||||
ALLOCA = @ALLOCA@
|
|
||||||
AMTAR = @AMTAR@
|
|
||||||
AR = @AR@
|
|
||||||
AS = @AS@
|
|
||||||
AUTOCONF = @AUTOCONF@
|
|
||||||
AUTOHEADER = @AUTOHEADER@
|
|
||||||
AUTOMAKE = @AUTOMAKE@
|
|
||||||
AWK = @AWK@
|
|
||||||
BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
|
|
||||||
CATOBJEXT = @CATOBJEXT@
|
|
||||||
CC = @CC@
|
|
||||||
CCDEPMODE = @CCDEPMODE@
|
|
||||||
CFLAGS = @CFLAGS@
|
|
||||||
CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
|
|
||||||
CPP = @CPP@
|
|
||||||
CPPFLAGS = @CPPFLAGS@
|
|
||||||
CURSESLIB = @CURSESLIB@
|
|
||||||
CXX = @CXX@
|
|
||||||
CXXCPP = @CXXCPP@
|
|
||||||
CXXDEPMODE = @CXXDEPMODE@
|
|
||||||
CXXFLAGS = @CXXFLAGS@
|
|
||||||
CYGPATH_W = @CYGPATH_W@
|
|
||||||
DATADIRNAME = @DATADIRNAME@
|
|
||||||
DEFS = @DEFS@
|
|
||||||
DEPDIR = @DEPDIR@
|
|
||||||
DLLTOOL = @DLLTOOL@
|
|
||||||
DSYMUTIL = @DSYMUTIL@
|
|
||||||
DUMPBIN = @DUMPBIN@
|
|
||||||
ECHO_C = @ECHO_C@
|
|
||||||
ECHO_N = @ECHO_N@
|
|
||||||
ECHO_T = @ECHO_T@
|
|
||||||
EGREP = @EGREP@
|
|
||||||
EXEEXT = @EXEEXT@
|
|
||||||
FGREP = @FGREP@
|
|
||||||
GENCAT = @GENCAT@
|
|
||||||
GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
|
|
||||||
GLIBC2 = @GLIBC2@
|
|
||||||
GLIBC21 = @GLIBC21@
|
|
||||||
GMSGFMT = @GMSGFMT@
|
|
||||||
GMSGFMT_015 = @GMSGFMT_015@
|
|
||||||
GREP = @GREP@
|
|
||||||
HAVE_ASPRINTF = @HAVE_ASPRINTF@
|
|
||||||
HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
|
|
||||||
HAVE_SNPRINTF = @HAVE_SNPRINTF@
|
|
||||||
HAVE_VISIBILITY = @HAVE_VISIBILITY@
|
|
||||||
HAVE_WPRINTF = @HAVE_WPRINTF@
|
|
||||||
HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@
|
|
||||||
HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@
|
|
||||||
INSTALL = @INSTALL@
|
|
||||||
INSTALL_DATA = @INSTALL_DATA@
|
|
||||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
|
||||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
|
||||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
|
||||||
INSTOBJEXT = @INSTOBJEXT@
|
|
||||||
INTLBISON = @INTLBISON@
|
|
||||||
INTLLIBS = @INTLLIBS@
|
|
||||||
INTLOBJS = @INTLOBJS@
|
|
||||||
INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
|
|
||||||
INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
|
|
||||||
LD = @LD@
|
|
||||||
LDFLAGS = @LDFLAGS@
|
|
||||||
LIBICONV = @LIBICONV@
|
|
||||||
LIBINTL = @LIBINTL@
|
|
||||||
LIBMULTITHREAD = @LIBMULTITHREAD@
|
|
||||||
LIBOBJS = @LIBOBJS@
|
|
||||||
LIBPTH = @LIBPTH@
|
|
||||||
LIBPTH_PREFIX = @LIBPTH_PREFIX@
|
|
||||||
LIBS = @LIBS@
|
|
||||||
LIBTHREAD = @LIBTHREAD@
|
|
||||||
LIBTOOL = @LIBTOOL@
|
|
||||||
LIPO = @LIPO@
|
|
||||||
LN_S = @LN_S@
|
|
||||||
LTLIBC = @LTLIBC@
|
|
||||||
LTLIBICONV = @LTLIBICONV@
|
|
||||||
LTLIBINTL = @LTLIBINTL@
|
|
||||||
LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
|
|
||||||
LTLIBOBJS = @LTLIBOBJS@
|
|
||||||
LTLIBPTH = @LTLIBPTH@
|
|
||||||
LTLIBTHREAD = @LTLIBTHREAD@
|
|
||||||
MAKEINFO = @MAKEINFO@
|
|
||||||
MKDIR_P = @MKDIR_P@
|
|
||||||
MSGFMT = @MSGFMT@
|
|
||||||
MSGFMT_015 = @MSGFMT_015@
|
|
||||||
MSGMERGE = @MSGMERGE@
|
|
||||||
NM = @NM@
|
|
||||||
NMEDIT = @NMEDIT@
|
|
||||||
OBJDUMP = @OBJDUMP@
|
|
||||||
OBJEXT = @OBJEXT@
|
|
||||||
OTOOL = @OTOOL@
|
|
||||||
OTOOL64 = @OTOOL64@
|
|
||||||
PACKAGE = @PACKAGE@
|
|
||||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
|
||||||
PACKAGE_NAME = @PACKAGE_NAME@
|
|
||||||
PACKAGE_STRING = @PACKAGE_STRING@
|
|
||||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
|
||||||
PACKAGE_URL = @PACKAGE_URL@
|
|
||||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
|
||||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
|
||||||
POSUB = @POSUB@
|
|
||||||
PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
|
|
||||||
RANLIB = @RANLIB@
|
|
||||||
READLINELIB = @READLINELIB@
|
|
||||||
SED = @SED@
|
|
||||||
SET_MAKE = @SET_MAKE@
|
|
||||||
SHELL = @SHELL@
|
|
||||||
STRIP = @STRIP@
|
|
||||||
USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
|
|
||||||
USE_NLS = @USE_NLS@
|
|
||||||
VERSION = @VERSION@
|
|
||||||
WINDRES = @WINDRES@
|
|
||||||
WOE32 = @WOE32@
|
|
||||||
WOE32DLL = @WOE32DLL@
|
|
||||||
XFAILED = @XFAILED@
|
|
||||||
XGETTEXT = @XGETTEXT@
|
|
||||||
XGETTEXT_015 = @XGETTEXT_015@
|
|
||||||
XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
|
|
||||||
abs_builddir = @abs_builddir@
|
|
||||||
abs_srcdir = @abs_srcdir@
|
|
||||||
abs_top_builddir = @abs_top_builddir@
|
|
||||||
abs_top_srcdir = @abs_top_srcdir@
|
|
||||||
ac_ct_CC = @ac_ct_CC@
|
|
||||||
ac_ct_CXX = @ac_ct_CXX@
|
|
||||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
|
||||||
am__include = @am__include@
|
|
||||||
am__leading_dot = @am__leading_dot@
|
|
||||||
am__quote = @am__quote@
|
|
||||||
am__tar = @am__tar@
|
|
||||||
am__untar = @am__untar@
|
|
||||||
bindir = @bindir@
|
|
||||||
build = @build@
|
|
||||||
build_alias = @build_alias@
|
|
||||||
build_cpu = @build_cpu@
|
|
||||||
build_os = @build_os@
|
|
||||||
build_vendor = @build_vendor@
|
|
||||||
builddir = @builddir@
|
|
||||||
datadir = @datadir@
|
|
||||||
datarootdir = @datarootdir@
|
|
||||||
docdir = @docdir@
|
|
||||||
dvidir = @dvidir@
|
|
||||||
exec_prefix = @exec_prefix@
|
|
||||||
host = @host@
|
|
||||||
host_alias = @host_alias@
|
|
||||||
host_cpu = @host_cpu@
|
|
||||||
host_os = @host_os@
|
|
||||||
host_vendor = @host_vendor@
|
|
||||||
htmldir = @htmldir@
|
|
||||||
includedir = @includedir@
|
|
||||||
infodir = @infodir@
|
|
||||||
install_sh = @install_sh@
|
|
||||||
libdir = @libdir@
|
|
||||||
libexecdir = @libexecdir@
|
|
||||||
localedir = @localedir@
|
|
||||||
localstatedir = @localstatedir@
|
|
||||||
lt_ECHO = @lt_ECHO@
|
|
||||||
mandir = @mandir@
|
|
||||||
mkdir_p = @mkdir_p@
|
|
||||||
oldincludedir = @oldincludedir@
|
|
||||||
pdfdir = @pdfdir@
|
|
||||||
prefix = @prefix@
|
|
||||||
program_transform_name = @program_transform_name@
|
|
||||||
psdir = @psdir@
|
|
||||||
sbindir = @sbindir@
|
|
||||||
sharedstatedir = @sharedstatedir@
|
|
||||||
srcdir = @srcdir@
|
|
||||||
sysconfdir = @sysconfdir@
|
|
||||||
target = @target@
|
|
||||||
target_alias = @target_alias@
|
|
||||||
target_cpu = @target_cpu@
|
|
||||||
target_os = @target_os@
|
|
||||||
target_vendor = @target_vendor@
|
|
||||||
top_build_prefix = @top_build_prefix@
|
|
||||||
top_builddir = @top_builddir@
|
|
||||||
top_srcdir = @top_srcdir@
|
|
||||||
lib_LTLIBRARIES = libhunspell-1.3.la
|
|
||||||
libhunspell_1_3_includedir = $(includedir)/hunspell
|
|
||||||
libhunspell_1_3_la_SOURCES = affentry.cxx affixmgr.cxx csutil.cxx \
|
|
||||||
dictmgr.cxx hashmgr.cxx hunspell.cxx \
|
|
||||||
suggestmgr.cxx license.myspell license.hunspell \
|
|
||||||
phonet.cxx filemgr.cxx hunzip.cxx replist.cxx
|
|
||||||
|
|
||||||
libhunspell_1_3_include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \
|
|
||||||
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \
|
|
||||||
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \
|
|
||||||
phonet.hxx filemgr.hxx hunzip.hxx w_char.hxx replist.hxx \
|
|
||||||
hunvisapi.h
|
|
||||||
|
|
||||||
libhunspell_1_3_la_DEPENDENCIES = utf_info.cxx
|
|
||||||
libhunspell_1_3_la_LDFLAGS = -no-undefined
|
|
||||||
AM_CXXFLAGS = $(CFLAG_VISIBILITY) -DBUILDING_LIBHUNSPELL
|
|
||||||
EXTRA_DIST = hunspell.dsp makefile.mk README utf_info.cxx
|
|
||||||
all: all-am
|
|
||||||
|
|
||||||
.SUFFIXES:
|
|
||||||
.SUFFIXES: .cxx .lo .o .obj
|
|
||||||
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
|
||||||
@for dep in $?; do \
|
|
||||||
case '$(am__configure_deps)' in \
|
|
||||||
*$$dep*) \
|
|
||||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
|
||||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
|
||||||
exit 1;; \
|
|
||||||
esac; \
|
|
||||||
done; \
|
|
||||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/hunspell/Makefile'; \
|
|
||||||
$(am__cd) $(top_srcdir) && \
|
|
||||||
$(AUTOMAKE) --gnu src/hunspell/Makefile
|
|
||||||
.PRECIOUS: Makefile
|
|
||||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
|
||||||
@case '$?' in \
|
|
||||||
*config.status*) \
|
|
||||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
|
||||||
*) \
|
|
||||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
|
|
||||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
|
|
||||||
esac;
|
|
||||||
|
|
||||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
|
||||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
|
||||||
|
|
||||||
$(top_srcdir)/configure: $(am__configure_deps)
|
|
||||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
|
||||||
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
|
||||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
|
||||||
$(am__aclocal_m4_deps):
|
|
||||||
hunvisapi.h: $(top_builddir)/config.status $(srcdir)/hunvisapi.h.in
|
|
||||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
|
|
||||||
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
|
|
||||||
@$(NORMAL_INSTALL)
|
|
||||||
test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
|
|
||||||
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
|
||||||
list2=; for p in $$list; do \
|
|
||||||
if test -f $$p; then \
|
|
||||||
list2="$$list2 $$p"; \
|
|
||||||
else :; fi; \
|
|
||||||
done; \
|
|
||||||
test -z "$$list2" || { \
|
|
||||||
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
|
|
||||||
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
|
|
||||||
}
|
|
||||||
|
|
||||||
uninstall-libLTLIBRARIES:
|
|
||||||
@$(NORMAL_UNINSTALL)
|
|
||||||
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
|
||||||
for p in $$list; do \
|
|
||||||
$(am__strip_dir) \
|
|
||||||
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
|
|
||||||
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
|
|
||||||
done
|
|
||||||
|
|
||||||
clean-libLTLIBRARIES:
|
|
||||||
-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
|
|
||||||
@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
|
|
||||||
dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
|
|
||||||
test "$$dir" != "$$p" || dir=.; \
|
|
||||||
echo "rm -f \"$${dir}/so_locations\""; \
|
|
||||||
rm -f "$${dir}/so_locations"; \
|
|
||||||
done
|
|
||||||
libhunspell-1.3.la: $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_DEPENDENCIES) $(EXTRA_libhunspell_1_3_la_DEPENDENCIES)
|
|
||||||
$(libhunspell_1_3_la_LINK) -rpath $(libdir) $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_LIBADD) $(LIBS)
|
|
||||||
|
|
||||||
mostlyclean-compile:
|
|
||||||
-rm -f *.$(OBJEXT)
|
|
||||||
|
|
||||||
distclean-compile:
|
|
||||||
-rm -f *.tab.c
|
|
||||||
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affentry.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affixmgr.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/csutil.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dictmgr.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filemgr.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmgr.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phonet.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/replist.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/suggestmgr.Plo@am__quote@
|
|
||||||
|
|
||||||
.cxx.o:
|
|
||||||
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
|
||||||
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
|
||||||
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
|
|
||||||
|
|
||||||
.cxx.obj:
|
|
||||||
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
|
||||||
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
|
||||||
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
|
||||||
|
|
||||||
.cxx.lo:
|
|
||||||
@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
|
||||||
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
|
||||||
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
|
||||||
@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
|
|
||||||
|
|
||||||
mostlyclean-libtool:
|
|
||||||
-rm -f *.lo
|
|
||||||
|
|
||||||
clean-libtool:
|
|
||||||
-rm -rf .libs _libs
|
|
||||||
install-libhunspell_1_3_includeHEADERS: $(libhunspell_1_3_include_HEADERS)
|
|
||||||
@$(NORMAL_INSTALL)
|
|
||||||
test -z "$(libhunspell_1_3_includedir)" || $(MKDIR_P) "$(DESTDIR)$(libhunspell_1_3_includedir)"
|
|
||||||
@list='$(libhunspell_1_3_include_HEADERS)'; test -n "$(libhunspell_1_3_includedir)" || list=; \
|
|
||||||
for p in $$list; do \
|
|
||||||
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
|
||||||
echo "$$d$$p"; \
|
|
||||||
done | $(am__base_list) | \
|
|
||||||
while read files; do \
|
|
||||||
echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libhunspell_1_3_includedir)'"; \
|
|
||||||
$(INSTALL_HEADER) $$files "$(DESTDIR)$(libhunspell_1_3_includedir)" || exit $$?; \
|
|
||||||
done
|
|
||||||
|
|
||||||
uninstall-libhunspell_1_3_includeHEADERS:
|
|
||||||
@$(NORMAL_UNINSTALL)
|
|
||||||
@list='$(libhunspell_1_3_include_HEADERS)'; test -n "$(libhunspell_1_3_includedir)" || list=; \
|
|
||||||
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
|
||||||
dir='$(DESTDIR)$(libhunspell_1_3_includedir)'; $(am__uninstall_files_from_dir)
|
|
||||||
|
|
||||||
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
|
||||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
|
||||||
unique=`for i in $$list; do \
|
|
||||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
|
||||||
done | \
|
|
||||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
|
||||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
|
||||||
mkid -fID $$unique
|
|
||||||
tags: TAGS
|
|
||||||
|
|
||||||
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
|
||||||
$(TAGS_FILES) $(LISP)
|
|
||||||
set x; \
|
|
||||||
here=`pwd`; \
|
|
||||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
|
||||||
unique=`for i in $$list; do \
|
|
||||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
|
||||||
done | \
|
|
||||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
|
||||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
|
||||||
shift; \
|
|
||||||
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
|
||||||
test -n "$$unique" || unique=$$empty_fix; \
|
|
||||||
if test $$# -gt 0; then \
|
|
||||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
|
||||||
"$$@" $$unique; \
|
|
||||||
else \
|
|
||||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
|
||||||
$$unique; \
|
|
||||||
fi; \
|
|
||||||
fi
|
|
||||||
ctags: CTAGS
|
|
||||||
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
|
||||||
$(TAGS_FILES) $(LISP)
|
|
||||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
|
||||||
unique=`for i in $$list; do \
|
|
||||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
|
||||||
done | \
|
|
||||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
|
||||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
|
||||||
test -z "$(CTAGS_ARGS)$$unique" \
|
|
||||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
|
||||||
$$unique
|
|
||||||
|
|
||||||
GTAGS:
|
|
||||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
|
||||||
&& $(am__cd) $(top_srcdir) \
|
|
||||||
&& gtags -i $(GTAGS_ARGS) "$$here"
|
|
||||||
|
|
||||||
distclean-tags:
|
|
||||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
|
||||||
|
|
||||||
distdir: $(DISTFILES)
|
|
||||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
|
||||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
|
||||||
list='$(DISTFILES)'; \
|
|
||||||
dist_files=`for file in $$list; do echo $$file; done | \
|
|
||||||
sed -e "s|^$$srcdirstrip/||;t" \
|
|
||||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
|
||||||
case $$dist_files in \
|
|
||||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
|
||||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
|
||||||
sort -u` ;; \
|
|
||||||
esac; \
|
|
||||||
for file in $$dist_files; do \
|
|
||||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
|
||||||
if test -d $$d/$$file; then \
|
|
||||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
|
||||||
if test -d "$(distdir)/$$file"; then \
|
|
||||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
|
||||||
fi; \
|
|
||||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
|
||||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
|
||||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
|
||||||
fi; \
|
|
||||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
|
||||||
else \
|
|
||||||
test -f "$(distdir)/$$file" \
|
|
||||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
|
||||||
|| exit 1; \
|
|
||||||
fi; \
|
|
||||||
done
|
|
||||||
check-am: all-am
|
|
||||||
check: check-am
|
|
||||||
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
|
|
||||||
installdirs:
|
|
||||||
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libhunspell_1_3_includedir)"; do \
|
|
||||||
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
|
||||||
done
|
|
||||||
install: install-am
|
|
||||||
install-exec: install-exec-am
|
|
||||||
install-data: install-data-am
|
|
||||||
uninstall: uninstall-am
|
|
||||||
|
|
||||||
install-am: all-am
|
|
||||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
|
||||||
|
|
||||||
installcheck: installcheck-am
|
|
||||||
install-strip:
|
|
||||||
if test -z '$(STRIP)'; then \
|
|
||||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
|
||||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
|
||||||
install; \
|
|
||||||
else \
|
|
||||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
|
||||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
|
||||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
|
||||||
fi
|
|
||||||
mostlyclean-generic:
|
|
||||||
|
|
||||||
clean-generic:
|
|
||||||
|
|
||||||
distclean-generic:
|
|
||||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
|
||||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
|
||||||
|
|
||||||
maintainer-clean-generic:
|
|
||||||
@echo "This command is intended for maintainers to use"
|
|
||||||
@echo "it deletes files that may require special tools to rebuild."
|
|
||||||
clean: clean-am
|
|
||||||
|
|
||||||
clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
|
|
||||||
mostlyclean-am
|
|
||||||
|
|
||||||
distclean: distclean-am
|
|
||||||
-rm -rf ./$(DEPDIR)
|
|
||||||
-rm -f Makefile
|
|
||||||
distclean-am: clean-am distclean-compile distclean-generic \
|
|
||||||
distclean-tags
|
|
||||||
|
|
||||||
dvi: dvi-am
|
|
||||||
|
|
||||||
dvi-am:
|
|
||||||
|
|
||||||
html: html-am
|
|
||||||
|
|
||||||
html-am:
|
|
||||||
|
|
||||||
info: info-am
|
|
||||||
|
|
||||||
info-am:
|
|
||||||
|
|
||||||
install-data-am: install-libhunspell_1_3_includeHEADERS
|
|
||||||
|
|
||||||
install-dvi: install-dvi-am
|
|
||||||
|
|
||||||
install-dvi-am:
|
|
||||||
|
|
||||||
install-exec-am: install-libLTLIBRARIES
|
|
||||||
|
|
||||||
install-html: install-html-am
|
|
||||||
|
|
||||||
install-html-am:
|
|
||||||
|
|
||||||
install-info: install-info-am
|
|
||||||
|
|
||||||
install-info-am:
|
|
||||||
|
|
||||||
install-man:
|
|
||||||
|
|
||||||
install-pdf: install-pdf-am
|
|
||||||
|
|
||||||
install-pdf-am:
|
|
||||||
|
|
||||||
install-ps: install-ps-am
|
|
||||||
|
|
||||||
install-ps-am:
|
|
||||||
|
|
||||||
installcheck-am:
|
|
||||||
|
|
||||||
maintainer-clean: maintainer-clean-am
|
|
||||||
-rm -rf ./$(DEPDIR)
|
|
||||||
-rm -f Makefile
|
|
||||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
|
||||||
|
|
||||||
mostlyclean: mostlyclean-am
|
|
||||||
|
|
||||||
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
|
||||||
mostlyclean-libtool
|
|
||||||
|
|
||||||
pdf: pdf-am
|
|
||||||
|
|
||||||
pdf-am:
|
|
||||||
|
|
||||||
ps: ps-am
|
|
||||||
|
|
||||||
ps-am:
|
|
||||||
|
|
||||||
uninstall-am: uninstall-libLTLIBRARIES \
|
|
||||||
uninstall-libhunspell_1_3_includeHEADERS
|
|
||||||
|
|
||||||
.MAKE: install-am install-strip
|
|
||||||
|
|
||||||
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
|
|
||||||
clean-libLTLIBRARIES clean-libtool ctags distclean \
|
|
||||||
distclean-compile distclean-generic distclean-libtool \
|
|
||||||
distclean-tags distdir dvi dvi-am html html-am info info-am \
|
|
||||||
install install-am install-data install-data-am install-dvi \
|
|
||||||
install-dvi-am install-exec install-exec-am install-html \
|
|
||||||
install-html-am install-info install-info-am \
|
|
||||||
install-libLTLIBRARIES install-libhunspell_1_3_includeHEADERS \
|
|
||||||
install-man install-pdf install-pdf-am install-ps \
|
|
||||||
install-ps-am install-strip installcheck installcheck-am \
|
|
||||||
installdirs maintainer-clean maintainer-clean-generic \
|
|
||||||
mostlyclean mostlyclean-compile mostlyclean-generic \
|
|
||||||
mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
|
|
||||||
uninstall-am uninstall-libLTLIBRARIES \
|
|
||||||
uninstall-libhunspell_1_3_includeHEADERS
|
|
||||||
|
|
||||||
|
|
||||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
|
||||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
|
||||||
.NOEXPORT:
|
|
@ -1,21 +0,0 @@
|
|||||||
Hunspell spell checker and morphological analyser library
|
|
||||||
|
|
||||||
Documentation, tests, examples: http://hunspell.sourceforge.net
|
|
||||||
|
|
||||||
Author of Hunspell:
|
|
||||||
László Németh (nemethl (at) gyorsposta.hu)
|
|
||||||
|
|
||||||
Hunspell based on OpenOffice.org's Myspell. MySpell's author:
|
|
||||||
Kevin Hendricks (kevin.hendricks (at) sympatico.ca)
|
|
||||||
|
|
||||||
License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license
|
|
||||||
|
|
||||||
The contents of this library may be used under the terms of
|
|
||||||
the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL",
|
|
||||||
see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License
|
|
||||||
Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html).
|
|
||||||
|
|
||||||
Software distributed under these licenses is distributed on an "AS IS" basis,
|
|
||||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences
|
|
||||||
for the specific language governing rights and limitations under the licenses.
|
|
File diff suppressed because it is too large
Load Diff
@ -1,144 +0,0 @@
|
|||||||
#ifndef _AFFIX_HXX_
|
|
||||||
#define _AFFIX_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include "atypes.hxx"
|
|
||||||
#include "baseaffix.hxx"
|
|
||||||
#include "affixmgr.hxx"
|
|
||||||
|
|
||||||
/* A Prefix Entry */
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
PfxEntry(const PfxEntry&);
|
|
||||||
PfxEntry& operator = (const PfxEntry&);
|
|
||||||
private:
|
|
||||||
AffixMgr* pmyMgr;
|
|
||||||
|
|
||||||
PfxEntry * next;
|
|
||||||
PfxEntry * nexteq;
|
|
||||||
PfxEntry * nextne;
|
|
||||||
PfxEntry * flgnxt;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
|
||||||
~PfxEntry();
|
|
||||||
|
|
||||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
|
||||||
struct hentry * checkword(const char * word, int len, char in_compound,
|
|
||||||
const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
char * check_morph(const char * word, int len, char in_compound,
|
|
||||||
const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
char * check_twosfx_morph(const char * word, int len,
|
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
inline FLAG getFlag() { return aflag; }
|
|
||||||
inline const char * getKey() { return appnd; }
|
|
||||||
char * add(const char * word, int len);
|
|
||||||
|
|
||||||
inline short getKeyLen() { return appndl; }
|
|
||||||
|
|
||||||
inline const char * getMorph() { return morphcode; }
|
|
||||||
|
|
||||||
inline const unsigned short * getCont() { return contclass; }
|
|
||||||
inline short getContLen() { return contclasslen; }
|
|
||||||
|
|
||||||
inline PfxEntry * getNext() { return next; }
|
|
||||||
inline PfxEntry * getNextNE() { return nextne; }
|
|
||||||
inline PfxEntry * getNextEQ() { return nexteq; }
|
|
||||||
inline PfxEntry * getFlgNxt() { return flgnxt; }
|
|
||||||
|
|
||||||
inline void setNext(PfxEntry * ptr) { next = ptr; }
|
|
||||||
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
|
|
||||||
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
|
|
||||||
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
|
|
||||||
|
|
||||||
inline char * nextchar(char * p);
|
|
||||||
inline int test_condition(const char * st);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* A Suffix Entry */
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
SfxEntry(const SfxEntry&);
|
|
||||||
SfxEntry& operator = (const SfxEntry&);
|
|
||||||
private:
|
|
||||||
AffixMgr* pmyMgr;
|
|
||||||
char * rappnd;
|
|
||||||
|
|
||||||
SfxEntry * next;
|
|
||||||
SfxEntry * nexteq;
|
|
||||||
SfxEntry * nextne;
|
|
||||||
SfxEntry * flgnxt;
|
|
||||||
|
|
||||||
SfxEntry * l_morph;
|
|
||||||
SfxEntry * r_morph;
|
|
||||||
SfxEntry * eq_morph;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
SfxEntry(AffixMgr* pmgr, affentry* dp );
|
|
||||||
~SfxEntry();
|
|
||||||
|
|
||||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
|
||||||
struct hentry * checkword(const char * word, int len, int optflags,
|
|
||||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
|
||||||
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
|
|
||||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
|
|
||||||
|
|
||||||
struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
char * check_twosfx_morph(const char * word, int len, int optflags,
|
|
||||||
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
|
||||||
struct hentry * get_next_homonym(struct hentry * he);
|
|
||||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
|
|
||||||
const FLAG cclass, const FLAG needflag);
|
|
||||||
|
|
||||||
|
|
||||||
inline FLAG getFlag() { return aflag; }
|
|
||||||
inline const char * getKey() { return rappnd; }
|
|
||||||
char * add(const char * word, int len);
|
|
||||||
|
|
||||||
|
|
||||||
inline const char * getMorph() { return morphcode; }
|
|
||||||
|
|
||||||
inline const unsigned short * getCont() { return contclass; }
|
|
||||||
inline short getContLen() { return contclasslen; }
|
|
||||||
inline const char * getAffix() { return appnd; }
|
|
||||||
|
|
||||||
inline short getKeyLen() { return appndl; }
|
|
||||||
|
|
||||||
inline SfxEntry * getNext() { return next; }
|
|
||||||
inline SfxEntry * getNextNE() { return nextne; }
|
|
||||||
inline SfxEntry * getNextEQ() { return nexteq; }
|
|
||||||
|
|
||||||
inline SfxEntry * getLM() { return l_morph; }
|
|
||||||
inline SfxEntry * getRM() { return r_morph; }
|
|
||||||
inline SfxEntry * getEQM() { return eq_morph; }
|
|
||||||
inline SfxEntry * getFlgNxt() { return flgnxt; }
|
|
||||||
|
|
||||||
inline void setNext(SfxEntry * ptr) { next = ptr; }
|
|
||||||
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
|
|
||||||
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
|
|
||||||
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
|
|
||||||
|
|
||||||
inline char * nextchar(char * p);
|
|
||||||
inline int test_condition(const char * st, const char * begin);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,251 +0,0 @@
|
|||||||
#ifndef _AFFIXMGR_HXX_
|
|
||||||
#define _AFFIXMGR_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "atypes.hxx"
|
|
||||||
#include "baseaffix.hxx"
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "phonet.hxx"
|
|
||||||
#include "replist.hxx"
|
|
||||||
|
|
||||||
// check flag duplication
|
|
||||||
#define dupSFX (1 << 0)
|
|
||||||
#define dupPFX (1 << 1)
|
|
||||||
|
|
||||||
class PfxEntry;
|
|
||||||
class SfxEntry;
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
|
||||||
{
|
|
||||||
|
|
||||||
PfxEntry * pStart[SETSIZE];
|
|
||||||
SfxEntry * sStart[SETSIZE];
|
|
||||||
PfxEntry * pFlag[SETSIZE];
|
|
||||||
SfxEntry * sFlag[SETSIZE];
|
|
||||||
HashMgr * pHMgr;
|
|
||||||
HashMgr ** alldic;
|
|
||||||
int * maxdic;
|
|
||||||
char * keystring;
|
|
||||||
char * trystring;
|
|
||||||
char * encoding;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
int utf8;
|
|
||||||
int complexprefixes;
|
|
||||||
FLAG compoundflag;
|
|
||||||
FLAG compoundbegin;
|
|
||||||
FLAG compoundmiddle;
|
|
||||||
FLAG compoundend;
|
|
||||||
FLAG compoundroot;
|
|
||||||
FLAG compoundforbidflag;
|
|
||||||
FLAG compoundpermitflag;
|
|
||||||
int compoundmoresuffixes;
|
|
||||||
int checkcompounddup;
|
|
||||||
int checkcompoundrep;
|
|
||||||
int checkcompoundcase;
|
|
||||||
int checkcompoundtriple;
|
|
||||||
int simplifiedtriple;
|
|
||||||
FLAG forbiddenword;
|
|
||||||
FLAG nosuggest;
|
|
||||||
FLAG nongramsuggest;
|
|
||||||
FLAG needaffix;
|
|
||||||
int cpdmin;
|
|
||||||
int numrep;
|
|
||||||
replentry * reptable;
|
|
||||||
RepList * iconvtable;
|
|
||||||
RepList * oconvtable;
|
|
||||||
int nummap;
|
|
||||||
mapentry * maptable;
|
|
||||||
int numbreak;
|
|
||||||
char ** breaktable;
|
|
||||||
int numcheckcpd;
|
|
||||||
patentry * checkcpdtable;
|
|
||||||
int simplifiedcpd;
|
|
||||||
int numdefcpd;
|
|
||||||
flagentry * defcpdtable;
|
|
||||||
phonetable * phone;
|
|
||||||
int maxngramsugs;
|
|
||||||
int maxcpdsugs;
|
|
||||||
int maxdiff;
|
|
||||||
int onlymaxdiff;
|
|
||||||
int nosplitsugs;
|
|
||||||
int sugswithdots;
|
|
||||||
int cpdwordmax;
|
|
||||||
int cpdmaxsyllable;
|
|
||||||
char * cpdvowels;
|
|
||||||
w_char * cpdvowels_utf16;
|
|
||||||
int cpdvowels_utf16_len;
|
|
||||||
char * cpdsyllablenum;
|
|
||||||
const char * pfxappnd; // BUG: not stateless
|
|
||||||
const char * sfxappnd; // BUG: not stateless
|
|
||||||
FLAG sfxflag; // BUG: not stateless
|
|
||||||
char * derived; // BUG: not stateless
|
|
||||||
SfxEntry * sfx; // BUG: not stateless
|
|
||||||
PfxEntry * pfx; // BUG: not stateless
|
|
||||||
int checknum;
|
|
||||||
char * wordchars;
|
|
||||||
unsigned short * wordchars_utf16;
|
|
||||||
int wordchars_utf16_len;
|
|
||||||
char * ignorechars;
|
|
||||||
unsigned short * ignorechars_utf16;
|
|
||||||
int ignorechars_utf16_len;
|
|
||||||
char * version;
|
|
||||||
char * lang;
|
|
||||||
int langnum;
|
|
||||||
FLAG lemma_present;
|
|
||||||
FLAG circumfix;
|
|
||||||
FLAG onlyincompound;
|
|
||||||
FLAG keepcase;
|
|
||||||
FLAG forceucase;
|
|
||||||
FLAG warn;
|
|
||||||
int forbidwarn;
|
|
||||||
FLAG substandard;
|
|
||||||
int checksharps;
|
|
||||||
int fullstrip;
|
|
||||||
|
|
||||||
int havecontclass; // boolean variable
|
|
||||||
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, int * md);
|
|
||||||
~AffixMgr();
|
|
||||||
struct hentry * affix_check(const char * word, int len,
|
|
||||||
const unsigned short needflag = (unsigned short) 0,
|
|
||||||
char in_compound = IN_CPD_NOT);
|
|
||||||
struct hentry * prefix_check(const char * word, int len,
|
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
inline int isSubset(const char * s1, const char * s2);
|
|
||||||
struct hentry * prefix_check_twosfx(const char * word, int len,
|
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
|
|
||||||
struct hentry * suffix_check(const char * word, int len, int sfxopts,
|
|
||||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
|
||||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
|
|
||||||
char in_compound = IN_CPD_NOT);
|
|
||||||
struct hentry * suffix_check_twosfx(const char * word, int len,
|
|
||||||
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
char * affix_check_morph(const char * word, int len,
|
|
||||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
|
||||||
char * prefix_check_morph(const char * word, int len,
|
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
char * suffix_check_morph (const char * word, int len, int sfxopts,
|
|
||||||
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
|
|
||||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
|
||||||
|
|
||||||
char * prefix_check_twosfx_morph(const char * word, int len,
|
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
|
||||||
char * suffix_check_twosfx_morph(const char * word, int len,
|
|
||||||
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
|
|
||||||
|
|
||||||
char * morphgen(char * ts, int wl, const unsigned short * ap,
|
|
||||||
unsigned short al, char * morph, char * targetmorph, int level);
|
|
||||||
|
|
||||||
int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
|
|
||||||
int wl, const unsigned short * ap, unsigned short al, char * bad,
|
|
||||||
int, char *);
|
|
||||||
|
|
||||||
short get_syllable (const char * word, int wlen);
|
|
||||||
int cpdrep_check(const char * word, int len);
|
|
||||||
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
|
|
||||||
const char affixed);
|
|
||||||
int defcpd_check(hentry *** words, short wnum, hentry * rv,
|
|
||||||
hentry ** rwords, char all);
|
|
||||||
int cpdcase_check(const char * word, int len);
|
|
||||||
inline int candidate_check(const char * word, int len);
|
|
||||||
void setcminmax(int * cmin, int * cmax, const char * word, int len);
|
|
||||||
struct hentry * compound_check(const char * word, int len, short wordnum,
|
|
||||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
|
||||||
char hu_mov_rule, char is_sug, int * info);
|
|
||||||
|
|
||||||
int compound_check_morph(const char * word, int len, short wordnum,
|
|
||||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
|
||||||
char hu_mov_rule, char ** result, char * partresult);
|
|
||||||
|
|
||||||
struct hentry * lookup(const char * word);
|
|
||||||
int get_numrep() const;
|
|
||||||
struct replentry * get_reptable() const;
|
|
||||||
RepList * get_iconvtable() const;
|
|
||||||
RepList * get_oconvtable() const;
|
|
||||||
struct phonetable * get_phonetable() const;
|
|
||||||
int get_nummap() const;
|
|
||||||
struct mapentry * get_maptable() const;
|
|
||||||
int get_numbreak() const;
|
|
||||||
char ** get_breaktable() const;
|
|
||||||
char * get_encoding();
|
|
||||||
int get_langnum() const;
|
|
||||||
char * get_key_string();
|
|
||||||
char * get_try_string() const;
|
|
||||||
const char * get_wordchars() const;
|
|
||||||
unsigned short * get_wordchars_utf16(int * len) const;
|
|
||||||
char * get_ignore() const;
|
|
||||||
unsigned short * get_ignore_utf16(int * len) const;
|
|
||||||
int get_compound() const;
|
|
||||||
FLAG get_compoundflag() const;
|
|
||||||
FLAG get_compoundbegin() const;
|
|
||||||
FLAG get_forbiddenword() const;
|
|
||||||
FLAG get_nosuggest() const;
|
|
||||||
FLAG get_nongramsuggest() const;
|
|
||||||
FLAG get_needaffix() const;
|
|
||||||
FLAG get_onlyincompound() const;
|
|
||||||
FLAG get_compoundroot() const;
|
|
||||||
FLAG get_lemma_present() const;
|
|
||||||
int get_checknum() const;
|
|
||||||
const char * get_prefix() const;
|
|
||||||
const char * get_suffix() const;
|
|
||||||
const char * get_derived() const;
|
|
||||||
const char * get_version() const;
|
|
||||||
int have_contclass() const;
|
|
||||||
int get_utf8() const;
|
|
||||||
int get_complexprefixes() const;
|
|
||||||
char * get_suffixed(char ) const;
|
|
||||||
int get_maxngramsugs() const;
|
|
||||||
int get_maxcpdsugs() const;
|
|
||||||
int get_maxdiff() const;
|
|
||||||
int get_onlymaxdiff() const;
|
|
||||||
int get_nosplitsugs() const;
|
|
||||||
int get_sugswithdots(void) const;
|
|
||||||
FLAG get_keepcase(void) const;
|
|
||||||
FLAG get_forceucase(void) const;
|
|
||||||
FLAG get_warn(void) const;
|
|
||||||
int get_forbidwarn(void) const;
|
|
||||||
int get_checksharps(void) const;
|
|
||||||
char * encode_flag(unsigned short aflag) const;
|
|
||||||
int get_fullstrip() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int parse_file(const char *aff_data, const size_t aff_len);
|
|
||||||
int parse_flag(char * line, unsigned short * out, FileMgr * af);
|
|
||||||
int parse_num(char * line, int * out, FileMgr * af);
|
|
||||||
int parse_cpdsyllable(char * line, FileMgr * af);
|
|
||||||
int parse_reptable(char * line, FileMgr * af);
|
|
||||||
int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
|
|
||||||
int parse_phonetable(char * line, FileMgr * af);
|
|
||||||
int parse_maptable(char * line, FileMgr * af);
|
|
||||||
int parse_breaktable(char * line, FileMgr * af);
|
|
||||||
int parse_checkcpdtable(char * line, FileMgr * af);
|
|
||||||
int parse_defcpdtable(char * line, FileMgr * af);
|
|
||||||
int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
|
|
||||||
|
|
||||||
void reverse_condition(char *);
|
|
||||||
void debugflag(char * result, unsigned short flag);
|
|
||||||
int condlen(char *);
|
|
||||||
int encodeit(affentry &entry, char * cs);
|
|
||||||
int build_pfxtree(PfxEntry* pfxptr);
|
|
||||||
int build_sfxtree(SfxEntry* sfxptr);
|
|
||||||
int process_pfx_order();
|
|
||||||
int process_sfx_order();
|
|
||||||
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
|
|
||||||
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
|
|
||||||
int process_pfx_tree_to_list();
|
|
||||||
int process_sfx_tree_to_list();
|
|
||||||
int redundant_condition(char, char * strip, int stripl,
|
|
||||||
const char * cond, int);
|
|
||||||
void finishFileMgr(FileMgr *afflst);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
@ -1,107 +0,0 @@
|
|||||||
#ifndef _ATYPES_HXX_
|
|
||||||
#define _ATYPES_HXX_
|
|
||||||
|
|
||||||
#ifndef HUNSPELL_WARNING
|
|
||||||
#include <stdio.h>
|
|
||||||
#ifdef HUNSPELL_WARNING_ON
|
|
||||||
#define HUNSPELL_WARNING fprintf
|
|
||||||
#else
|
|
||||||
// empty inline function to switch off warnings (instead of the C99 standard variadic macros)
|
|
||||||
static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// HUNSTEM def.
|
|
||||||
#define HUNSTEM
|
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "w_char.hxx"
|
|
||||||
|
|
||||||
#define SETSIZE 256
|
|
||||||
#define CONTSIZE 65536
|
|
||||||
#define MAXWORDLEN 100
|
|
||||||
#define MAXWORDUTF8LEN 256
|
|
||||||
|
|
||||||
// affentry options
|
|
||||||
#define aeXPRODUCT (1 << 0)
|
|
||||||
#define aeUTF8 (1 << 1)
|
|
||||||
#define aeALIASF (1 << 2)
|
|
||||||
#define aeALIASM (1 << 3)
|
|
||||||
#define aeLONGCOND (1 << 4)
|
|
||||||
|
|
||||||
// compound options
|
|
||||||
#define IN_CPD_NOT 0
|
|
||||||
#define IN_CPD_BEGIN 1
|
|
||||||
#define IN_CPD_END 2
|
|
||||||
#define IN_CPD_OTHER 3
|
|
||||||
|
|
||||||
// info options
|
|
||||||
#define SPELL_COMPOUND (1 << 0)
|
|
||||||
#define SPELL_FORBIDDEN (1 << 1)
|
|
||||||
#define SPELL_ALLCAP (1 << 2)
|
|
||||||
#define SPELL_NOCAP (1 << 3)
|
|
||||||
#define SPELL_INITCAP (1 << 4)
|
|
||||||
#define SPELL_ORIGCAP (1 << 5)
|
|
||||||
#define SPELL_WARN (1 << 6)
|
|
||||||
|
|
||||||
#define MAXLNLEN 8192
|
|
||||||
|
|
||||||
#define MINCPDLEN 3
|
|
||||||
#define MAXCOMPOUND 10
|
|
||||||
#define MAXCONDLEN 20
|
|
||||||
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *))
|
|
||||||
|
|
||||||
#define MAXACC 1000
|
|
||||||
|
|
||||||
#define FLAG unsigned short
|
|
||||||
#define FLAG_NULL 0x00
|
|
||||||
#define FREE_FLAG(a) a = 0
|
|
||||||
|
|
||||||
#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c))
|
|
||||||
|
|
||||||
struct affentry
|
|
||||||
{
|
|
||||||
char * strip;
|
|
||||||
char * appnd;
|
|
||||||
unsigned char stripl;
|
|
||||||
unsigned char appndl;
|
|
||||||
char numconds;
|
|
||||||
char opts;
|
|
||||||
unsigned short aflag;
|
|
||||||
unsigned short * contclass;
|
|
||||||
short contclasslen;
|
|
||||||
union {
|
|
||||||
char conds[MAXCONDLEN];
|
|
||||||
struct {
|
|
||||||
char conds1[MAXCONDLEN_1];
|
|
||||||
char * conds2;
|
|
||||||
} l;
|
|
||||||
} c;
|
|
||||||
char * morphcode;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct guessword {
|
|
||||||
char * word;
|
|
||||||
bool allow;
|
|
||||||
char * orig;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct mapentry {
|
|
||||||
char ** set;
|
|
||||||
int len;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct flagentry {
|
|
||||||
FLAG * def;
|
|
||||||
int len;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct patentry {
|
|
||||||
char * pattern;
|
|
||||||
char * pattern2;
|
|
||||||
char * pattern3;
|
|
||||||
FLAG cond;
|
|
||||||
FLAG cond2;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||||||
#ifndef _BASEAFF_HXX_
|
|
||||||
#define _BASEAFF_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
AffEntry(const AffEntry&);
|
|
||||||
AffEntry& operator = (const AffEntry&);
|
|
||||||
protected:
|
|
||||||
AffEntry() {}
|
|
||||||
char * appnd;
|
|
||||||
char * strip;
|
|
||||||
unsigned char appndl;
|
|
||||||
unsigned char stripl;
|
|
||||||
char numconds;
|
|
||||||
char opts;
|
|
||||||
unsigned short aflag;
|
|
||||||
union {
|
|
||||||
char conds[MAXCONDLEN];
|
|
||||||
struct {
|
|
||||||
char conds1[MAXCONDLEN_1];
|
|
||||||
char * conds2;
|
|
||||||
} l;
|
|
||||||
} c;
|
|
||||||
char * morphcode;
|
|
||||||
unsigned short * contclass;
|
|
||||||
short contclasslen;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,36 +0,0 @@
|
|||||||
/*
|
|
||||||
* config.h
|
|
||||||
* Copyright (C) 2013 Kovid Goyal <kovid at kovidgoyal.net>
|
|
||||||
*
|
|
||||||
* Distributed under terms of the GPL3 license.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __config_h__
|
|
||||||
#define __config_h__
|
|
||||||
|
|
||||||
/* Name of package */
|
|
||||||
#define PACKAGE "hunspell"
|
|
||||||
|
|
||||||
/* Define to the address where bug reports for this package should be sent. */
|
|
||||||
#define PACKAGE_BUGREPORT "nemeth@openoffice.org"
|
|
||||||
|
|
||||||
/* Define to the full name of this package. */
|
|
||||||
#define PACKAGE_NAME "hunspell"
|
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
|
||||||
#define PACKAGE_STRING "hunspell 1.3.3"
|
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
|
||||||
#define PACKAGE_TARNAME "hunspell"
|
|
||||||
|
|
||||||
/* Define to the home page for this package. */
|
|
||||||
#define PACKAGE_URL ""
|
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
|
||||||
#define PACKAGE_VERSION "1.3.3"
|
|
||||||
|
|
||||||
/* Version number of package */
|
|
||||||
#define VERSION "1.3.3"
|
|
||||||
|
|
||||||
#endif /* !__config_h__ */
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,223 +0,0 @@
|
|||||||
#ifndef __CSUTILHXX__
|
|
||||||
#define __CSUTILHXX__
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
// First some base level utility routines
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
#include "w_char.hxx"
|
|
||||||
#include "htypes.hxx"
|
|
||||||
|
|
||||||
#ifdef MOZILLA_CLIENT
|
|
||||||
#include "nscore.h" // for mozalloc headers
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// casing
|
|
||||||
#define NOCAP 0
|
|
||||||
#define INITCAP 1
|
|
||||||
#define ALLCAP 2
|
|
||||||
#define HUHCAP 3
|
|
||||||
#define HUHINITCAP 4
|
|
||||||
|
|
||||||
// default encoding and keystring
|
|
||||||
#define SPELL_ENCODING "ISO8859-1"
|
|
||||||
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
|
||||||
|
|
||||||
// default morphological fields
|
|
||||||
#define MORPH_STEM "st:"
|
|
||||||
#define MORPH_ALLOMORPH "al:"
|
|
||||||
#define MORPH_POS "po:"
|
|
||||||
#define MORPH_DERI_PFX "dp:"
|
|
||||||
#define MORPH_INFL_PFX "ip:"
|
|
||||||
#define MORPH_TERM_PFX "tp:"
|
|
||||||
#define MORPH_DERI_SFX "ds:"
|
|
||||||
#define MORPH_INFL_SFX "is:"
|
|
||||||
#define MORPH_TERM_SFX "ts:"
|
|
||||||
#define MORPH_SURF_PFX "sp:"
|
|
||||||
#define MORPH_FREQ "fr:"
|
|
||||||
#define MORPH_PHON "ph:"
|
|
||||||
#define MORPH_HYPH "hy:"
|
|
||||||
#define MORPH_PART "pa:"
|
|
||||||
#define MORPH_FLAG "fl:"
|
|
||||||
#define MORPH_HENTRY "_H:"
|
|
||||||
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
|
||||||
|
|
||||||
#define MSEP_FLD ' '
|
|
||||||
#define MSEP_REC '\n'
|
|
||||||
#define MSEP_ALT '\v'
|
|
||||||
|
|
||||||
// default flags
|
|
||||||
#define DEFAULTFLAGS 65510
|
|
||||||
#define FORBIDDENWORD 65510
|
|
||||||
#define ONLYUPCASEFLAG 65511
|
|
||||||
|
|
||||||
// fopen or optional _wfopen to fix long pathname problem of WIN32
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
|
|
||||||
|
|
||||||
// convert UTF-16 characters to UTF-8
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
|
||||||
|
|
||||||
// convert UTF-8 characters to UTF-16
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
|
|
||||||
|
|
||||||
// sort 2-byte vector
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
|
|
||||||
|
|
||||||
// binary search in 2-byte vector
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
|
|
||||||
|
|
||||||
// remove end of line char(s)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
|
|
||||||
|
|
||||||
// duplicate string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
|
|
||||||
|
|
||||||
// strcat for limited length destination string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
|
|
||||||
|
|
||||||
// duplicate reverse of string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
|
|
||||||
|
|
||||||
// parse into tokens with char delimiter
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
|
|
||||||
// parse into tokens with char delimiter
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
|
|
||||||
|
|
||||||
// parse into tokens with char delimiter
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
|
|
||||||
|
|
||||||
// append s to ends of every lines in text
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
|
|
||||||
|
|
||||||
// tokenize into lines with new line
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
|
|
||||||
|
|
||||||
// tokenize into lines with new line and uniq in place
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
|
|
||||||
|
|
||||||
// change oldchar to newchar in place
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
|
|
||||||
|
|
||||||
// reverse word
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
|
|
||||||
|
|
||||||
// reverse word
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
|
|
||||||
|
|
||||||
// remove duplicates
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
|
|
||||||
|
|
||||||
// free character array list
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
|
|
||||||
|
|
||||||
// character encoding information
|
|
||||||
struct cs_info {
|
|
||||||
unsigned char ccase;
|
|
||||||
unsigned char clower;
|
|
||||||
unsigned char cupper;
|
|
||||||
};
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
|
|
||||||
|
|
||||||
// get language identifiers of language codes
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
|
|
||||||
|
|
||||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
|
|
||||||
|
|
||||||
// convert null terminated string to all caps using encoding
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
|
|
||||||
|
|
||||||
// convert null terminated string to all little using encoding
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
|
|
||||||
|
|
||||||
// convert null terminated string to have initial capital using encoding
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
|
|
||||||
|
|
||||||
// convert null terminated string to all caps
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
|
|
||||||
|
|
||||||
// convert null terminated string to all little
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
|
|
||||||
|
|
||||||
// convert null terminated string to have initial capital
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
|
|
||||||
|
|
||||||
// convert first nc characters of UTF-8 string to little
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
|
|
||||||
|
|
||||||
// convert first nc characters of UTF-8 string to capital
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
|
|
||||||
|
|
||||||
// get type of capitalization
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
|
|
||||||
|
|
||||||
// get type of capitalization (UTF-8)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
|
|
||||||
|
|
||||||
// strip all ignored characters in the string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
|
|
||||||
|
|
||||||
// strip all ignored characters in the string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
|
|
||||||
int * out_utf16_len, int utf8, int ln);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
|
|
||||||
|
|
||||||
// conversion function for protected memory
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
|
|
||||||
|
|
||||||
// conversion function for protected memory
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
|
|
||||||
|
|
||||||
// hash entry macros
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
|
|
||||||
{
|
|
||||||
char *ret;
|
|
||||||
if (!h->var)
|
|
||||||
ret = NULL;
|
|
||||||
else if (h->var & H_OPT_ALIASM)
|
|
||||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
|
||||||
else
|
|
||||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NULL-free version for warning-free OOo build
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
|
|
||||||
{
|
|
||||||
const char *ret;
|
|
||||||
if (!h->var)
|
|
||||||
ret = "";
|
|
||||||
else if (h->var & H_OPT_ALIASM)
|
|
||||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
|
||||||
else
|
|
||||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
|
|
||||||
{
|
|
||||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,182 +0,0 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "dictmgr.hxx"
|
|
||||||
#include "csutil.hxx"
|
|
||||||
|
|
||||||
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
|
|
||||||
{
|
|
||||||
// load list of etype entries
|
|
||||||
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
|
|
||||||
if (pdentry) {
|
|
||||||
if (parse_file(dictpath, etype)) {
|
|
||||||
numdict = 0;
|
|
||||||
// no dictionary.lst found is okay
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
DictMgr::~DictMgr()
|
|
||||||
{
|
|
||||||
dictentry * pdict = NULL;
|
|
||||||
if (pdentry) {
|
|
||||||
pdict = pdentry;
|
|
||||||
for (int i=0;i<numdict;i++) {
|
|
||||||
if (pdict->lang) {
|
|
||||||
free(pdict->lang);
|
|
||||||
pdict->lang = NULL;
|
|
||||||
}
|
|
||||||
if (pdict->region) {
|
|
||||||
free(pdict->region);
|
|
||||||
pdict->region=NULL;
|
|
||||||
}
|
|
||||||
if (pdict->filename) {
|
|
||||||
free(pdict->filename);
|
|
||||||
pdict->filename = NULL;
|
|
||||||
}
|
|
||||||
pdict++;
|
|
||||||
}
|
|
||||||
free(pdentry);
|
|
||||||
pdentry = NULL;
|
|
||||||
pdict = NULL;
|
|
||||||
}
|
|
||||||
numdict = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// read in list of etype entries and build up structure to describe them
|
|
||||||
int DictMgr::parse_file(const char * dictpath, const char * etype)
|
|
||||||
{
|
|
||||||
|
|
||||||
int i;
|
|
||||||
char line[MAXDICTENTRYLEN+1];
|
|
||||||
dictentry * pdict = pdentry;
|
|
||||||
|
|
||||||
// open the dictionary list file
|
|
||||||
FILE * dictlst;
|
|
||||||
dictlst = myfopen(dictpath,"r");
|
|
||||||
if (!dictlst) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// step one is to parse the dictionary list building up the
|
|
||||||
// descriptive structures
|
|
||||||
|
|
||||||
// read in each line ignoring any that dont start with etype
|
|
||||||
while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
|
|
||||||
mychomp(line);
|
|
||||||
|
|
||||||
/* parse in a dictionary entry */
|
|
||||||
if (strncmp(line,etype,4) == 0) {
|
|
||||||
if (numdict < MAXDICTIONARIES) {
|
|
||||||
char * tp = line;
|
|
||||||
char * piece;
|
|
||||||
i = 0;
|
|
||||||
while ((piece=mystrsep(&tp,' '))) {
|
|
||||||
if (*piece != '\0') {
|
|
||||||
switch(i) {
|
|
||||||
case 0: break;
|
|
||||||
case 1: pdict->lang = mystrdup(piece); break;
|
|
||||||
case 2: if (strcmp (piece, "ANY") == 0)
|
|
||||||
pdict->region = mystrdup("");
|
|
||||||
else
|
|
||||||
pdict->region = mystrdup(piece);
|
|
||||||
break;
|
|
||||||
case 3: pdict->filename = mystrdup(piece); break;
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
free(piece);
|
|
||||||
}
|
|
||||||
if (i == 4) {
|
|
||||||
numdict++;
|
|
||||||
pdict++;
|
|
||||||
} else {
|
|
||||||
switch (i) {
|
|
||||||
case 3:
|
|
||||||
free(pdict->region);
|
|
||||||
pdict->region=NULL;
|
|
||||||
/* FALLTHROUGH */
|
|
||||||
case 2:
|
|
||||||
free(pdict->lang);
|
|
||||||
pdict->lang=NULL;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
|
|
||||||
fflush(stderr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(dictlst);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// return text encoding of dictionary
|
|
||||||
int DictMgr::get_list(dictentry ** ppentry)
|
|
||||||
{
|
|
||||||
*ppentry = pdentry;
|
|
||||||
return numdict;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// strip strings into token based on single char delimiter
|
|
||||||
// acts like strsep() but only uses a delim char and not
|
|
||||||
// a delim string
|
|
||||||
|
|
||||||
char * DictMgr::mystrsep(char ** stringp, const char delim)
|
|
||||||
{
|
|
||||||
char * rv = NULL;
|
|
||||||
char * mp = *stringp;
|
|
||||||
size_t n = strlen(mp);
|
|
||||||
if (n > 0) {
|
|
||||||
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
|
|
||||||
if (dp) {
|
|
||||||
*stringp = dp+1;
|
|
||||||
size_t nc = dp - mp;
|
|
||||||
rv = (char *) malloc(nc+1);
|
|
||||||
if (rv) {
|
|
||||||
memcpy(rv,mp,nc);
|
|
||||||
*(rv+nc) = '\0';
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
rv = (char *) malloc(n+1);
|
|
||||||
if (rv) {
|
|
||||||
memcpy(rv, mp, n);
|
|
||||||
*(rv+n) = '\0';
|
|
||||||
*stringp = mp + n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// replaces strdup with ansi version
|
|
||||||
char * DictMgr::mystrdup(const char * s)
|
|
||||||
{
|
|
||||||
char * d = NULL;
|
|
||||||
if (s) {
|
|
||||||
int sl = strlen(s)+1;
|
|
||||||
d = (char *) malloc(sl);
|
|
||||||
if (d) memcpy(d,s,sl);
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// remove cross-platform text line end characters
|
|
||||||
void DictMgr:: mychomp(char * s)
|
|
||||||
{
|
|
||||||
int k = strlen(s);
|
|
||||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
|
||||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
|
||||||
}
|
|
||||||
|
|
@ -1,39 +0,0 @@
|
|||||||
#ifndef _DICTMGR_HXX_
|
|
||||||
#define _DICTMGR_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#define MAXDICTIONARIES 100
|
|
||||||
#define MAXDICTENTRYLEN 1024
|
|
||||||
|
|
||||||
struct dictentry {
|
|
||||||
char * filename;
|
|
||||||
char * lang;
|
|
||||||
char * region;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
DictMgr(const DictMgr&);
|
|
||||||
DictMgr& operator = (const DictMgr&);
|
|
||||||
private:
|
|
||||||
int numdict;
|
|
||||||
dictentry * pdentry;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
DictMgr(const char * dictpath, const char * etype);
|
|
||||||
~DictMgr();
|
|
||||||
int get_list(dictentry** ppentry);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int parse_file(const char * dictpath, const char * etype);
|
|
||||||
char * mystrsep(char ** stringp, const char delim);
|
|
||||||
char * mystrdup(const char * s);
|
|
||||||
void mychomp(char * s);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,46 +0,0 @@
|
|||||||
#include "license.hunspell"
|
|
||||||
#include "license.myspell"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "filemgr.hxx"
|
|
||||||
|
|
||||||
FileMgr::FileMgr(const char *data, const size_t dlen) {
|
|
||||||
linenum = 0;
|
|
||||||
last = 0;
|
|
||||||
buf = new char[dlen+1];
|
|
||||||
memcpy(buf, data, dlen);
|
|
||||||
buf[dlen] = 0;
|
|
||||||
pos = buf;
|
|
||||||
buflen = dlen;
|
|
||||||
}
|
|
||||||
|
|
||||||
FileMgr::~FileMgr()
|
|
||||||
{
|
|
||||||
if (buf != NULL) { delete[] buf; buf = NULL; }
|
|
||||||
pos = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
char * FileMgr::getline() {
|
|
||||||
if (buf == NULL) return NULL;
|
|
||||||
if (((size_t)(pos - buf)) >= buflen) {
|
|
||||||
// free up the memory as it will not be needed anymore
|
|
||||||
delete[] buf; buf = NULL; pos = NULL; return NULL;
|
|
||||||
}
|
|
||||||
if (pos != buf) *pos = last; // Restore the character that was previously replaced by null
|
|
||||||
char *ans = pos;
|
|
||||||
// Move pos to the start of the next line
|
|
||||||
pos = (char *)memchr(pos, 10, buflen - (pos - buf));
|
|
||||||
if (pos == NULL) pos = buf + buflen + 1;
|
|
||||||
else pos++;
|
|
||||||
// Ensure the current line is null terminated
|
|
||||||
last = *pos;
|
|
||||||
*pos = 0;
|
|
||||||
linenum++;
|
|
||||||
return ans;
|
|
||||||
}
|
|
||||||
|
|
||||||
int FileMgr::getlinenum() {
|
|
||||||
return linenum;
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
|
||||||
#ifndef _FILEMGR_HXX_
|
|
||||||
#define _FILEMGR_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
char *buf;
|
|
||||||
char *pos;
|
|
||||||
size_t buflen;
|
|
||||||
char last;
|
|
||||||
int linenum;
|
|
||||||
|
|
||||||
public:
|
|
||||||
FileMgr(const char *data, const size_t dlen);
|
|
||||||
~FileMgr();
|
|
||||||
char * getline();
|
|
||||||
int getlinenum();
|
|
||||||
};
|
|
||||||
#endif
|
|
@ -1,938 +0,0 @@
|
|||||||
#include "license.hunspell"
|
|
||||||
#include "license.myspell"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "csutil.hxx"
|
|
||||||
#include "atypes.hxx"
|
|
||||||
|
|
||||||
#define BUFSIZE 65536
|
|
||||||
|
|
||||||
// build a hash table from a munched word list
|
|
||||||
|
|
||||||
HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len)
|
|
||||||
: tablesize(0)
|
|
||||||
, tableptr(NULL)
|
|
||||||
, userword(0)
|
|
||||||
, flag_mode(FLAG_CHAR)
|
|
||||||
, complexprefixes(0)
|
|
||||||
, utf8(0)
|
|
||||||
, forbiddenword(FORBIDDENWORD) // forbidden word signing flag
|
|
||||||
, numaliasf(0)
|
|
||||||
, aliasf(NULL)
|
|
||||||
, aliasflen(0)
|
|
||||||
, numaliasm(0)
|
|
||||||
, aliasm(NULL)
|
|
||||||
{
|
|
||||||
langnum = 0;
|
|
||||||
lang = NULL;
|
|
||||||
enc = NULL;
|
|
||||||
csconv = 0;
|
|
||||||
ignorechars = NULL;
|
|
||||||
ignorechars_utf16 = NULL;
|
|
||||||
ignorechars_utf16_len = 0;
|
|
||||||
load_config(aff_data, aff_len);
|
|
||||||
int ec = load_tables(dic_data, dic_len);
|
|
||||||
if (ec) {
|
|
||||||
/* error condition - what should we do here */
|
|
||||||
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
|
|
||||||
if (tableptr) {
|
|
||||||
free(tableptr);
|
|
||||||
tableptr = NULL;
|
|
||||||
}
|
|
||||||
tablesize = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
HashMgr::~HashMgr()
|
|
||||||
{
|
|
||||||
if (tableptr) {
|
|
||||||
// now pass through hash table freeing up everything
|
|
||||||
// go through column by column of the table
|
|
||||||
for (int i=0; i < tablesize; i++) {
|
|
||||||
struct hentry * pt = tableptr[i];
|
|
||||||
struct hentry * nt = NULL;
|
|
||||||
while(pt) {
|
|
||||||
nt = pt->next;
|
|
||||||
if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
|
|
||||||
free(pt);
|
|
||||||
pt = nt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(tableptr);
|
|
||||||
}
|
|
||||||
tablesize = 0;
|
|
||||||
|
|
||||||
if (aliasf) {
|
|
||||||
for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
|
|
||||||
free(aliasf);
|
|
||||||
aliasf = NULL;
|
|
||||||
if (aliasflen) {
|
|
||||||
free(aliasflen);
|
|
||||||
aliasflen = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (aliasm) {
|
|
||||||
for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
|
|
||||||
free(aliasm);
|
|
||||||
aliasm = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef OPENOFFICEORG
|
|
||||||
#ifndef MOZILLA_CLIENT
|
|
||||||
if (utf8) free_utf_tbl();
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (enc) free(enc);
|
|
||||||
if (lang) free(lang);
|
|
||||||
|
|
||||||
if (ignorechars) free(ignorechars);
|
|
||||||
if (ignorechars_utf16) free(ignorechars_utf16);
|
|
||||||
|
|
||||||
#ifdef MOZILLA_CLIENT
|
|
||||||
delete [] csconv;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// lookup a root word in the hashtable
|
|
||||||
|
|
||||||
struct hentry * HashMgr::lookup(const char *word) const
|
|
||||||
{
|
|
||||||
struct hentry * dp;
|
|
||||||
if (tableptr) {
|
|
||||||
dp = tableptr[hash(word)];
|
|
||||||
if (!dp) return NULL;
|
|
||||||
for ( ; dp != NULL; dp = dp->next) {
|
|
||||||
if (strcmp(word, dp->word) == 0) return dp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// add a word to the hash table (private)
|
|
||||||
int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
|
||||||
int al, const char * desc, bool onlyupcase)
|
|
||||||
{
|
|
||||||
bool upcasehomonym = false;
|
|
||||||
int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
|
|
||||||
// variable-length hash record with word and optional fields
|
|
||||||
struct hentry* hp =
|
|
||||||
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
|
|
||||||
if (!hp) return 1;
|
|
||||||
char * hpw = hp->word;
|
|
||||||
strcpy(hpw, word);
|
|
||||||
if (ignorechars != NULL) {
|
|
||||||
if (utf8) {
|
|
||||||
remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
|
|
||||||
} else {
|
|
||||||
remove_ignored_chars(hpw, ignorechars);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (complexprefixes) {
|
|
||||||
if (utf8) reverseword_utf(hpw); else reverseword(hpw);
|
|
||||||
}
|
|
||||||
|
|
||||||
int i = hash(hpw);
|
|
||||||
|
|
||||||
hp->blen = (unsigned char) wbl;
|
|
||||||
hp->clen = (unsigned char) wcl;
|
|
||||||
hp->alen = (short) al;
|
|
||||||
hp->astr = aff;
|
|
||||||
hp->next = NULL;
|
|
||||||
hp->next_homonym = NULL;
|
|
||||||
|
|
||||||
// store the description string or its pointer
|
|
||||||
if (desc) {
|
|
||||||
hp->var = H_OPT;
|
|
||||||
if (aliasm) {
|
|
||||||
hp->var += H_OPT_ALIASM;
|
|
||||||
store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
|
|
||||||
} else {
|
|
||||||
strcpy(hpw + wbl + 1, desc);
|
|
||||||
if (complexprefixes) {
|
|
||||||
if (utf8) reverseword_utf(HENTRY_DATA(hp));
|
|
||||||
else reverseword(HENTRY_DATA(hp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
|
|
||||||
} else hp->var = 0;
|
|
||||||
|
|
||||||
struct hentry * dp = tableptr[i];
|
|
||||||
if (!dp) {
|
|
||||||
tableptr[i] = hp;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
while (dp->next != NULL) {
|
|
||||||
if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
|
|
||||||
// remove hidden onlyupcase homonym
|
|
||||||
if (!onlyupcase) {
|
|
||||||
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
|
|
||||||
free(dp->astr);
|
|
||||||
dp->astr = hp->astr;
|
|
||||||
dp->alen = hp->alen;
|
|
||||||
free(hp);
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
dp->next_homonym = hp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
upcasehomonym = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dp=dp->next;
|
|
||||||
}
|
|
||||||
if (strcmp(hp->word, dp->word) == 0) {
|
|
||||||
// remove hidden onlyupcase homonym
|
|
||||||
if (!onlyupcase) {
|
|
||||||
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
|
|
||||||
free(dp->astr);
|
|
||||||
dp->astr = hp->astr;
|
|
||||||
dp->alen = hp->alen;
|
|
||||||
free(hp);
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
dp->next_homonym = hp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
upcasehomonym = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!upcasehomonym) {
|
|
||||||
dp->next = hp;
|
|
||||||
} else {
|
|
||||||
// remove hidden onlyupcase homonym
|
|
||||||
if (hp->astr) free(hp->astr);
|
|
||||||
free(hp);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
|
||||||
unsigned short * flags, int flagslen, char * dp, int captype)
|
|
||||||
{
|
|
||||||
if (flags == NULL)
|
|
||||||
flagslen = 0;
|
|
||||||
|
|
||||||
// add inner capitalized forms to handle the following allcap forms:
|
|
||||||
// Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
|
|
||||||
// Allcaps with suffixes: CIA's -> CIA'S
|
|
||||||
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
|
|
||||||
((captype == ALLCAP) && (flagslen != 0))) &&
|
|
||||||
!((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
|
|
||||||
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1));
|
|
||||||
if (!flags2) return 1;
|
|
||||||
if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short));
|
|
||||||
flags2[flagslen] = ONLYUPCASEFLAG;
|
|
||||||
if (utf8) {
|
|
||||||
char st[BUFSIZE];
|
|
||||||
w_char w[BUFSIZE];
|
|
||||||
int wlen = u8_u16(w, BUFSIZE, word);
|
|
||||||
mkallsmall_utf(w, wlen, langnum);
|
|
||||||
mkallcap_utf(w, 1, langnum);
|
|
||||||
u16_u8(st, BUFSIZE, w, wlen);
|
|
||||||
return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true);
|
|
||||||
} else {
|
|
||||||
mkallsmall(word, csconv);
|
|
||||||
mkinitcap(word, csconv);
|
|
||||||
return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// detect captype and modify word length for UTF-8 encoding
|
|
||||||
int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
|
|
||||||
int len;
|
|
||||||
if (utf8) {
|
|
||||||
w_char dest_utf[BUFSIZE];
|
|
||||||
len = u8_u16(dest_utf, BUFSIZE, word);
|
|
||||||
*captype = get_captype_utf8(dest_utf, len, langnum);
|
|
||||||
} else {
|
|
||||||
len = wbl;
|
|
||||||
*captype = get_captype((char *) word, len, csconv);
|
|
||||||
}
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove word (personal dictionary function for standalone applications)
|
|
||||||
int HashMgr::remove(const char * word)
|
|
||||||
{
|
|
||||||
struct hentry * dp = lookup(word);
|
|
||||||
while (dp) {
|
|
||||||
if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
|
|
||||||
unsigned short * flags =
|
|
||||||
(unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
|
|
||||||
if (!flags) return 1;
|
|
||||||
for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
|
|
||||||
flags[dp->alen] = forbiddenword;
|
|
||||||
dp->astr = flags;
|
|
||||||
dp->alen++;
|
|
||||||
flag_qsort(flags, 0, dp->alen);
|
|
||||||
}
|
|
||||||
dp = dp->next_homonym;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* remove forbidden flag to add a personal word to the hash */
|
|
||||||
int HashMgr::remove_forbidden_flag(const char * word) {
|
|
||||||
struct hentry * dp = lookup(word);
|
|
||||||
if (!dp) return 1;
|
|
||||||
while (dp) {
|
|
||||||
if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
|
|
||||||
if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
|
|
||||||
else {
|
|
||||||
unsigned short * flags2 =
|
|
||||||
(unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
|
|
||||||
if (!flags2) return 1;
|
|
||||||
int i, j = 0;
|
|
||||||
for (i = 0; i < dp->alen; i++) {
|
|
||||||
if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
|
|
||||||
}
|
|
||||||
dp->alen--;
|
|
||||||
dp->astr = flags2; // XXX allowed forbidden words
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dp = dp->next_homonym;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// add a custom dic. word to the hash table (public)
|
|
||||||
int HashMgr::add(const char * word)
|
|
||||||
{
|
|
||||||
unsigned short * flags = NULL;
|
|
||||||
int al = 0;
|
|
||||||
if (remove_forbidden_flag(word)) {
|
|
||||||
int captype;
|
|
||||||
int wbl = strlen(word);
|
|
||||||
int wcl = get_clen_and_captype(word, wbl, &captype);
|
|
||||||
add_word(word, wbl, wcl, flags, al, NULL, false);
|
|
||||||
return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::add_with_affix(const char * word, const char * example)
|
|
||||||
{
|
|
||||||
// detect captype and modify word length for UTF-8 encoding
|
|
||||||
struct hentry * dp = lookup(example);
|
|
||||||
remove_forbidden_flag(word);
|
|
||||||
if (dp && dp->astr) {
|
|
||||||
int captype;
|
|
||||||
int wbl = strlen(word);
|
|
||||||
int wcl = get_clen_and_captype(word, wbl, &captype);
|
|
||||||
if (aliasf) {
|
|
||||||
add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
|
|
||||||
} else {
|
|
||||||
unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
|
|
||||||
if (flags) {
|
|
||||||
memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
|
|
||||||
add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
|
|
||||||
} else return 1;
|
|
||||||
}
|
|
||||||
return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// walk the hash table entry by entry - null at end
|
|
||||||
// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
|
|
||||||
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
|
||||||
{
|
|
||||||
if (hp && hp->next != NULL) return hp->next;
|
|
||||||
for (col++; col < tablesize; col++) {
|
|
||||||
if (tableptr[col]) return tableptr[col];
|
|
||||||
}
|
|
||||||
// null at end and reset to start
|
|
||||||
col = -1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// load a munched word list and build a hash table on the fly
|
|
||||||
int HashMgr::load_tables(const char *dic_data, const size_t dic_len)
|
|
||||||
{
|
|
||||||
int al;
|
|
||||||
char * ap;
|
|
||||||
char * dp;
|
|
||||||
char * dp2;
|
|
||||||
unsigned short * flags;
|
|
||||||
char * ts;
|
|
||||||
|
|
||||||
// open dictionary file
|
|
||||||
FileMgr * dict = new FileMgr(dic_data, dic_len);
|
|
||||||
if (dict == NULL) return 1;
|
|
||||||
|
|
||||||
// first read the first line of file to get hash table size */
|
|
||||||
if ((ts = dict->getline()) == NULL) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: empty dic file \n");
|
|
||||||
delete dict;
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
mychomp(ts);
|
|
||||||
|
|
||||||
/* remove byte order mark */
|
|
||||||
if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
|
|
||||||
memmove(ts, ts+3, strlen(ts+3)+1);
|
|
||||||
// warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
|
|
||||||
}
|
|
||||||
|
|
||||||
tablesize = atoi(ts);
|
|
||||||
|
|
||||||
int nExtra = 5 + USERWORD;
|
|
||||||
|
|
||||||
if (tablesize <= 0 || (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
|
|
||||||
delete dict;
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
tablesize += nExtra;
|
|
||||||
if ((tablesize % 2) == 0) tablesize++;
|
|
||||||
|
|
||||||
// allocate the hash table
|
|
||||||
tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *));
|
|
||||||
if (! tableptr) {
|
|
||||||
delete dict;
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// loop through all words on much list and add to hash
|
|
||||||
// table and create word and affix strings
|
|
||||||
|
|
||||||
while ((ts = dict->getline()) != NULL) {
|
|
||||||
mychomp(ts);
|
|
||||||
// split each line into word and morphological description
|
|
||||||
dp = ts;
|
|
||||||
while ((dp = strchr(dp, ':')) != NULL) {
|
|
||||||
if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
|
|
||||||
for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
|
|
||||||
if (dp < ts) { // missing word
|
|
||||||
dp = NULL;
|
|
||||||
} else {
|
|
||||||
*(dp + 1) = '\0';
|
|
||||||
dp = dp + 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
dp++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// tabulator is the old morphological field separator
|
|
||||||
dp2 = strchr(ts, '\t');
|
|
||||||
if (dp2 && (!dp || dp2 < dp)) {
|
|
||||||
*dp2 = '\0';
|
|
||||||
dp = dp2 + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// split each line into word and affix char strings
|
|
||||||
// "\/" signs slash in words (not affix separator)
|
|
||||||
// "/" at beginning of the line is word character (not affix separator)
|
|
||||||
ap = strchr(ts,'/');
|
|
||||||
while (ap) {
|
|
||||||
if (ap == ts) {
|
|
||||||
ap++;
|
|
||||||
continue;
|
|
||||||
} else if (*(ap - 1) != '\\') break;
|
|
||||||
// replace "\/" with "/"
|
|
||||||
for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
|
|
||||||
ap = strchr(ap,'/');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ap) {
|
|
||||||
*ap = '\0';
|
|
||||||
if (aliasf) {
|
|
||||||
int index = atoi(ap + 1);
|
|
||||||
al = get_aliasf(index, &flags, dict);
|
|
||||||
if (!al) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
|
|
||||||
*ap = '\0';
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
al = decode_flags(&flags, ap + 1, dict);
|
|
||||||
if (al == -1) {
|
|
||||||
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
|
|
||||||
delete dict;
|
|
||||||
return 6;
|
|
||||||
}
|
|
||||||
flag_qsort(flags, 0, al);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
al = 0;
|
|
||||||
ap = NULL;
|
|
||||||
flags = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
int captype;
|
|
||||||
int wbl = strlen(ts);
|
|
||||||
int wcl = get_clen_and_captype(ts, wbl, &captype);
|
|
||||||
// add the word and its index plus its capitalized form optionally
|
|
||||||
if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
|
|
||||||
add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
|
|
||||||
delete dict;
|
|
||||||
return 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
delete dict;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// the hash function is a simple load and rotate
|
|
||||||
// algorithm borrowed
|
|
||||||
|
|
||||||
int HashMgr::hash(const char * word) const
|
|
||||||
{
|
|
||||||
long hv = 0;
|
|
||||||
for (int i=0; i < 4 && *word != 0; i++)
|
|
||||||
hv = (hv << 8) | (*word++);
|
|
||||||
while (*word != 0) {
|
|
||||||
ROTATE(hv,ROTATE_LEN);
|
|
||||||
hv ^= (*word++);
|
|
||||||
}
|
|
||||||
return (unsigned long) hv % tablesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
|
|
||||||
int len;
|
|
||||||
if (*flags == '\0') {
|
|
||||||
*result = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
switch (flag_mode) {
|
|
||||||
case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
|
|
||||||
len = strlen(flags);
|
|
||||||
if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
|
|
||||||
len /= 2;
|
|
||||||
*result = (unsigned short *) malloc(len * sizeof(short));
|
|
||||||
if (!*result) return -1;
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
(*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
|
|
||||||
int i;
|
|
||||||
len = 1;
|
|
||||||
char * src = flags;
|
|
||||||
unsigned short * dest;
|
|
||||||
char * p;
|
|
||||||
for (p = flags; *p; p++) {
|
|
||||||
if (*p == ',') len++;
|
|
||||||
}
|
|
||||||
*result = (unsigned short *) malloc(len * sizeof(short));
|
|
||||||
if (!*result) return -1;
|
|
||||||
dest = *result;
|
|
||||||
for (p = flags; *p; p++) {
|
|
||||||
if (*p == ',') {
|
|
||||||
i = atoi(src);
|
|
||||||
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
|
|
||||||
af->getlinenum(), i, DEFAULTFLAGS - 1);
|
|
||||||
*dest = (unsigned short) i;
|
|
||||||
if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
|
|
||||||
src = p + 1;
|
|
||||||
dest++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
i = atoi(src);
|
|
||||||
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
|
|
||||||
af->getlinenum(), i, DEFAULTFLAGS - 1);
|
|
||||||
*dest = (unsigned short) i;
|
|
||||||
if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case FLAG_UNI: { // UTF-8 characters
|
|
||||||
w_char w[BUFSIZE/2];
|
|
||||||
len = u8_u16(w, BUFSIZE/2, flags);
|
|
||||||
*result = (unsigned short *) malloc(len * sizeof(short));
|
|
||||||
if (!*result) return -1;
|
|
||||||
memcpy(*result, w, len * sizeof(short));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: { // Ispell's one-character flags (erfg -> e r f g)
|
|
||||||
unsigned short * dest;
|
|
||||||
len = strlen(flags);
|
|
||||||
*result = (unsigned short *) malloc(len * sizeof(short));
|
|
||||||
if (!*result) return -1;
|
|
||||||
dest = *result;
|
|
||||||
for (unsigned char * p = (unsigned char *) flags; *p; p++) {
|
|
||||||
*dest = (unsigned short) *p;
|
|
||||||
dest++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned short HashMgr::decode_flag(const char * f) {
|
|
||||||
unsigned short s = 0;
|
|
||||||
int i;
|
|
||||||
switch (flag_mode) {
|
|
||||||
case FLAG_LONG:
|
|
||||||
s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
|
|
||||||
break;
|
|
||||||
case FLAG_NUM:
|
|
||||||
i = atoi(f);
|
|
||||||
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);
|
|
||||||
s = (unsigned short) i;
|
|
||||||
break;
|
|
||||||
case FLAG_UNI:
|
|
||||||
u8_u16((w_char *) &s, 1, f);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
s = (unsigned short) *((unsigned char *)f);
|
|
||||||
}
|
|
||||||
if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
char * HashMgr::encode_flag(unsigned short f) {
|
|
||||||
unsigned char ch[10];
|
|
||||||
if (f==0) return mystrdup("(NULL)");
|
|
||||||
if (flag_mode == FLAG_LONG) {
|
|
||||||
ch[0] = (unsigned char) (f >> 8);
|
|
||||||
ch[1] = (unsigned char) (f - ((f >> 8) << 8));
|
|
||||||
ch[2] = '\0';
|
|
||||||
} else if (flag_mode == FLAG_NUM) {
|
|
||||||
sprintf((char *) ch, "%d", f);
|
|
||||||
} else if (flag_mode == FLAG_UNI) {
|
|
||||||
u16_u8((char *) &ch, 10, (w_char *) &f, 1);
|
|
||||||
} else {
|
|
||||||
ch[0] = (unsigned char) (f);
|
|
||||||
ch[1] = '\0';
|
|
||||||
}
|
|
||||||
return mystrdup((char *) ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
// read in aff file and set flag mode
|
|
||||||
int HashMgr::load_config(const char *aff_data, const size_t aff_len)
|
|
||||||
{
|
|
||||||
char * line; // io buffers
|
|
||||||
int firstline = 1;
|
|
||||||
|
|
||||||
// open the affix file
|
|
||||||
FileMgr * afflst = new FileMgr(aff_data, aff_len);
|
|
||||||
if (!afflst) {
|
|
||||||
HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// read in each line ignoring any that do not
|
|
||||||
// start with a known line type indicator
|
|
||||||
|
|
||||||
while ((line = afflst->getline()) != NULL) {
|
|
||||||
mychomp(line);
|
|
||||||
|
|
||||||
/* remove byte order mark */
|
|
||||||
if (firstline) {
|
|
||||||
firstline = 0;
|
|
||||||
if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* parse in the try string */
|
|
||||||
if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
|
|
||||||
if (flag_mode != FLAG_CHAR) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());
|
|
||||||
}
|
|
||||||
if (strstr(line, "long")) flag_mode = FLAG_LONG;
|
|
||||||
if (strstr(line, "num")) flag_mode = FLAG_NUM;
|
|
||||||
if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
|
|
||||||
if (flag_mode == FLAG_CHAR) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
|
|
||||||
char * st = NULL;
|
|
||||||
if (parse_string(line, &st, afflst->getlinenum())) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
forbiddenword = decode_flag(st);
|
|
||||||
free(st);
|
|
||||||
}
|
|
||||||
if (strncmp(line, "SET", 3) == 0) {
|
|
||||||
if (parse_string(line, &enc, afflst->getlinenum())) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (strcmp(enc, "UTF-8") == 0) {
|
|
||||||
utf8 = 1;
|
|
||||||
#ifndef OPENOFFICEORG
|
|
||||||
#ifndef MOZILLA_CLIENT
|
|
||||||
initialize_utf_tbl();
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
} else csconv = get_current_cs(enc);
|
|
||||||
}
|
|
||||||
if (strncmp(line, "LANG", 4) == 0) {
|
|
||||||
if (parse_string(line, &lang, afflst->getlinenum())) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
langnum = get_lang_num(lang);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* parse in the ignored characters (for example, Arabic optional diacritics characters */
|
|
||||||
if (strncmp(line,"IGNORE",6) == 0) {
|
|
||||||
if (parse_array(line, &ignorechars, &ignorechars_utf16,
|
|
||||||
&ignorechars_utf16_len, utf8, afflst->getlinenum())) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
|
|
||||||
if (parse_aliasf(line, afflst)) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
|
|
||||||
if (parse_aliasm(line, afflst)) {
|
|
||||||
delete afflst;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
|
|
||||||
if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
|
|
||||||
}
|
|
||||||
if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
|
|
||||||
delete afflst;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* parse in the ALIAS table */
|
|
||||||
int HashMgr::parse_aliasf(char * line, FileMgr * af)
|
|
||||||
{
|
|
||||||
if (numaliasf != 0) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
char * tp = line;
|
|
||||||
char * piece;
|
|
||||||
int i = 0;
|
|
||||||
int np = 0;
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
while (piece) {
|
|
||||||
if (*piece != '\0') {
|
|
||||||
switch(i) {
|
|
||||||
case 0: { np++; break; }
|
|
||||||
case 1: {
|
|
||||||
numaliasf = atoi(piece);
|
|
||||||
if (numaliasf < 1) {
|
|
||||||
numaliasf = 0;
|
|
||||||
aliasf = NULL;
|
|
||||||
aliasflen = NULL;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
|
|
||||||
aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
|
|
||||||
if (!aliasf || !aliasflen) {
|
|
||||||
numaliasf = 0;
|
|
||||||
if (aliasf) free(aliasf);
|
|
||||||
if (aliasflen) free(aliasflen);
|
|
||||||
aliasf = NULL;
|
|
||||||
aliasflen = NULL;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
np++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
}
|
|
||||||
if (np != 2) {
|
|
||||||
numaliasf = 0;
|
|
||||||
free(aliasf);
|
|
||||||
free(aliasflen);
|
|
||||||
aliasf = NULL;
|
|
||||||
aliasflen = NULL;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now parse the numaliasf lines to read in the remainder of the table */
|
|
||||||
char * nl;
|
|
||||||
for (int j=0; j < numaliasf; j++) {
|
|
||||||
if ((nl = af->getline()) == NULL) return 1;
|
|
||||||
mychomp(nl);
|
|
||||||
tp = nl;
|
|
||||||
i = 0;
|
|
||||||
aliasf[j] = NULL;
|
|
||||||
aliasflen[j] = 0;
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
while (piece) {
|
|
||||||
if (*piece != '\0') {
|
|
||||||
switch(i) {
|
|
||||||
case 0: {
|
|
||||||
if (strncmp(piece,"AF",2) != 0) {
|
|
||||||
numaliasf = 0;
|
|
||||||
free(aliasf);
|
|
||||||
free(aliasflen);
|
|
||||||
aliasf = NULL;
|
|
||||||
aliasflen = NULL;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 1: {
|
|
||||||
aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);
|
|
||||||
flag_qsort(aliasf[j], 0, aliasflen[j]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
}
|
|
||||||
if (!aliasf[j]) {
|
|
||||||
free(aliasf);
|
|
||||||
free(aliasflen);
|
|
||||||
aliasf = NULL;
|
|
||||||
aliasflen = NULL;
|
|
||||||
numaliasf = 0;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::is_aliasf() {
|
|
||||||
return (aliasf != NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {
|
|
||||||
if ((index > 0) && (index <= numaliasf)) {
|
|
||||||
*fvec = aliasf[index - 1];
|
|
||||||
return aliasflen[index - 1];
|
|
||||||
}
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);
|
|
||||||
*fvec = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* parse morph alias definitions */
|
|
||||||
int HashMgr::parse_aliasm(char * line, FileMgr * af)
|
|
||||||
{
|
|
||||||
if (numaliasm != 0) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
char * tp = line;
|
|
||||||
char * piece;
|
|
||||||
int i = 0;
|
|
||||||
int np = 0;
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
while (piece) {
|
|
||||||
if (*piece != '\0') {
|
|
||||||
switch(i) {
|
|
||||||
case 0: { np++; break; }
|
|
||||||
case 1: {
|
|
||||||
numaliasm = atoi(piece);
|
|
||||||
if (numaliasm < 1) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
aliasm = (char **) malloc(numaliasm * sizeof(char *));
|
|
||||||
if (!aliasm) {
|
|
||||||
numaliasm = 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
np++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
piece = mystrsep(&tp, 0);
|
|
||||||
}
|
|
||||||
if (np != 2) {
|
|
||||||
numaliasm = 0;
|
|
||||||
free(aliasm);
|
|
||||||
aliasm = NULL;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now parse the numaliasm lines to read in the remainder of the table */
|
|
||||||
char * nl = line;
|
|
||||||
for (int j=0; j < numaliasm; j++) {
|
|
||||||
if ((nl = af->getline()) == NULL) return 1;
|
|
||||||
mychomp(nl);
|
|
||||||
tp = nl;
|
|
||||||
i = 0;
|
|
||||||
aliasm[j] = NULL;
|
|
||||||
piece = mystrsep(&tp, ' ');
|
|
||||||
while (piece) {
|
|
||||||
if (*piece != '\0') {
|
|
||||||
switch(i) {
|
|
||||||
case 0: {
|
|
||||||
if (strncmp(piece,"AM",2) != 0) {
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
|
||||||
numaliasm = 0;
|
|
||||||
free(aliasm);
|
|
||||||
aliasm = NULL;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 1: {
|
|
||||||
// add the remaining of the line
|
|
||||||
if (*tp) {
|
|
||||||
*(tp - 1) = ' ';
|
|
||||||
tp = tp + strlen(tp);
|
|
||||||
}
|
|
||||||
if (complexprefixes) {
|
|
||||||
if (utf8) reverseword_utf(piece);
|
|
||||||
else reverseword(piece);
|
|
||||||
}
|
|
||||||
aliasm[j] = mystrdup(piece);
|
|
||||||
if (!aliasm[j]) {
|
|
||||||
numaliasm = 0;
|
|
||||||
free(aliasm);
|
|
||||||
aliasm = NULL;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break; }
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
piece = mystrsep(&tp, ' ');
|
|
||||||
}
|
|
||||||
if (!aliasm[j]) {
|
|
||||||
numaliasm = 0;
|
|
||||||
free(aliasm);
|
|
||||||
aliasm = NULL;
|
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int HashMgr::is_aliasm() {
|
|
||||||
return (aliasm != NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
char * HashMgr::get_aliasm(int index) {
|
|
||||||
if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
|
|
||||||
HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
@ -1,69 +0,0 @@
|
|||||||
#ifndef _HASHMGR_HXX_
|
|
||||||
#define _HASHMGR_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "htypes.hxx"
|
|
||||||
#include "filemgr.hxx"
|
|
||||||
|
|
||||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
|
||||||
{
|
|
||||||
int tablesize;
|
|
||||||
struct hentry ** tableptr;
|
|
||||||
int userword;
|
|
||||||
flag flag_mode;
|
|
||||||
int complexprefixes;
|
|
||||||
int utf8;
|
|
||||||
unsigned short forbiddenword;
|
|
||||||
int langnum;
|
|
||||||
char * enc;
|
|
||||||
char * lang;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
char * ignorechars;
|
|
||||||
unsigned short * ignorechars_utf16;
|
|
||||||
int ignorechars_utf16_len;
|
|
||||||
int numaliasf; // flag vector `compression' with aliases
|
|
||||||
unsigned short ** aliasf;
|
|
||||||
unsigned short * aliasflen;
|
|
||||||
int numaliasm; // morphological desciption `compression' with aliases
|
|
||||||
char ** aliasm;
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len);
|
|
||||||
~HashMgr();
|
|
||||||
|
|
||||||
struct hentry * lookup(const char *) const;
|
|
||||||
int hash(const char *) const;
|
|
||||||
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
|
|
||||||
|
|
||||||
int add(const char * word);
|
|
||||||
int add_with_affix(const char * word, const char * pattern);
|
|
||||||
int remove(const char * word);
|
|
||||||
int decode_flags(unsigned short ** result, char * flags, FileMgr * af);
|
|
||||||
unsigned short decode_flag(const char * flag);
|
|
||||||
char * encode_flag(unsigned short flag);
|
|
||||||
int is_aliasf();
|
|
||||||
int get_aliasf(int index, unsigned short ** fvec, FileMgr * af);
|
|
||||||
int is_aliasm();
|
|
||||||
char * get_aliasm(int index);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int get_clen_and_captype(const char * word, int wbl, int * captype);
|
|
||||||
int load_tables(const char *dic_data, const size_t dic_len);
|
|
||||||
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
|
|
||||||
int al, const char * desc, bool onlyupcase);
|
|
||||||
int load_config(const char *aff_data, const size_t aff_len);
|
|
||||||
int parse_aliasf(char * line, FileMgr * af);
|
|
||||||
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
|
||||||
unsigned short * flags, int al, char * dp, int captype);
|
|
||||||
int parse_aliasm(char * line, FileMgr * af);
|
|
||||||
int remove_forbidden_flag(const char * word);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||||||
#ifndef _HTYPES_HXX_
|
|
||||||
#define _HTYPES_HXX_
|
|
||||||
|
|
||||||
#define ROTATE_LEN 5
|
|
||||||
|
|
||||||
#define ROTATE(v,q) \
|
|
||||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
|
||||||
|
|
||||||
// hentry options
|
|
||||||
#define H_OPT (1 << 0)
|
|
||||||
#define H_OPT_ALIASM (1 << 1)
|
|
||||||
#define H_OPT_PHON (1 << 2)
|
|
||||||
|
|
||||||
// see also csutil.hxx
|
|
||||||
#define HENTRY_WORD(h) &(h->word[0])
|
|
||||||
|
|
||||||
// approx. number of user defined words
|
|
||||||
#define USERWORD 1000
|
|
||||||
|
|
||||||
struct hentry
|
|
||||||
{
|
|
||||||
unsigned char blen; // word length in bytes
|
|
||||||
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
|
||||||
short alen; // length of affix flag vector
|
|
||||||
unsigned short * astr; // affix flag vector
|
|
||||||
struct hentry * next; // next word with same hash code
|
|
||||||
struct hentry * next_homonym; // next homonym word (with same hash code)
|
|
||||||
char var; // variable fields (only for special pronounciation yet)
|
|
||||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@ -1,164 +0,0 @@
|
|||||||
# Microsoft Developer Studio Project File - Name="hunspell" - Package Owner=<4>
|
|
||||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
|
||||||
# ** DO NOT EDIT **
|
|
||||||
|
|
||||||
# TARGTYPE "Win32 (x86) Static Library" 0x0104
|
|
||||||
|
|
||||||
CFG=hunspell - Win32 Debug
|
|
||||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
|
||||||
!MESSAGE use the Export Makefile command and run
|
|
||||||
!MESSAGE
|
|
||||||
!MESSAGE NMAKE /f "hunspell.mak".
|
|
||||||
!MESSAGE
|
|
||||||
!MESSAGE You can specify a configuration when running NMAKE
|
|
||||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
|
||||||
!MESSAGE
|
|
||||||
!MESSAGE NMAKE /f "hunspell.mak" CFG="hunspell - Win32 Debug"
|
|
||||||
!MESSAGE
|
|
||||||
!MESSAGE Possible choices for configuration are:
|
|
||||||
!MESSAGE
|
|
||||||
!MESSAGE "hunspell - Win32 Release" (based on "Win32 (x86) Static Library")
|
|
||||||
!MESSAGE "hunspell - Win32 Debug" (based on "Win32 (x86) Static Library")
|
|
||||||
!MESSAGE
|
|
||||||
|
|
||||||
# Begin Project
|
|
||||||
# PROP AllowPerConfigDependencies 0
|
|
||||||
# PROP Scc_ProjName ""
|
|
||||||
# PROP Scc_LocalPath ""
|
|
||||||
CPP=cl.exe
|
|
||||||
RSC=rc.exe
|
|
||||||
|
|
||||||
!IF "$(CFG)" == "hunspell - Win32 Release"
|
|
||||||
|
|
||||||
# PROP BASE Use_MFC 0
|
|
||||||
# PROP BASE Use_Debug_Libraries 0
|
|
||||||
# PROP BASE Output_Dir "Release"
|
|
||||||
# PROP BASE Intermediate_Dir "Release"
|
|
||||||
# PROP BASE Target_Dir ""
|
|
||||||
# PROP Use_MFC 0
|
|
||||||
# PROP Use_Debug_Libraries 0
|
|
||||||
# PROP Output_Dir "Release"
|
|
||||||
# PROP Intermediate_Dir "Release"
|
|
||||||
# PROP Target_Dir ""
|
|
||||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
|
|
||||||
# ADD CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
|
|
||||||
# ADD BASE RSC /l 0x40e /d "NDEBUG"
|
|
||||||
# ADD RSC /l 0x40e /d "NDEBUG"
|
|
||||||
BSC32=bscmake.exe
|
|
||||||
# ADD BASE BSC32 /nologo
|
|
||||||
# ADD BSC32 /nologo
|
|
||||||
LIB32=link.exe -lib
|
|
||||||
# ADD BASE LIB32 /nologo
|
|
||||||
# ADD LIB32 /nologo
|
|
||||||
|
|
||||||
!ELSEIF "$(CFG)" == "hunspell - Win32 Debug"
|
|
||||||
|
|
||||||
# PROP BASE Use_MFC 0
|
|
||||||
# PROP BASE Use_Debug_Libraries 1
|
|
||||||
# PROP BASE Output_Dir "Debug"
|
|
||||||
# PROP BASE Intermediate_Dir "Debug"
|
|
||||||
# PROP BASE Target_Dir ""
|
|
||||||
# PROP Use_MFC 0
|
|
||||||
# PROP Use_Debug_Libraries 1
|
|
||||||
# PROP Output_Dir "Debug"
|
|
||||||
# PROP Intermediate_Dir "Debug"
|
|
||||||
# PROP Target_Dir ""
|
|
||||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
|
|
||||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
|
|
||||||
# ADD BASE RSC /l 0x40e /d "_DEBUG"
|
|
||||||
# ADD RSC /l 0x40e /d "_DEBUG"
|
|
||||||
BSC32=bscmake.exe
|
|
||||||
# ADD BASE BSC32 /nologo
|
|
||||||
# ADD BSC32 /nologo
|
|
||||||
LIB32=link.exe -lib
|
|
||||||
# ADD BASE LIB32 /nologo
|
|
||||||
# ADD LIB32 /nologo
|
|
||||||
|
|
||||||
!ENDIF
|
|
||||||
|
|
||||||
# Begin Target
|
|
||||||
|
|
||||||
# Name "hunspell - Win32 Release"
|
|
||||||
# Name "hunspell - Win32 Debug"
|
|
||||||
# Begin Group "Source Files"
|
|
||||||
|
|
||||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\affentry.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\affixmgr.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\csutil.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\dictmgr.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\hashmgr.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\hunspell.cxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\suggestmgr.cxx
|
|
||||||
# End Source File
|
|
||||||
# End Group
|
|
||||||
# Begin Group "Header Files"
|
|
||||||
|
|
||||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\affentry.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\affixmgr.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\atypes.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\baseaffix.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\csutil.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\dictmgr.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\hashmgr.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\htypes.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\langnum.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\hunspell.hxx
|
|
||||||
# End Source File
|
|
||||||
# Begin Source File
|
|
||||||
|
|
||||||
SOURCE=.\suggestmgr.hxx
|
|
||||||
# End Source File
|
|
||||||
# End Group
|
|
||||||
# End Target
|
|
||||||
# End Project
|
|
@ -1,180 +0,0 @@
|
|||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "affixmgr.hxx"
|
|
||||||
#include "suggestmgr.hxx"
|
|
||||||
#include "langnum.hxx"
|
|
||||||
|
|
||||||
#define SPELL_XML "<?xml?>"
|
|
||||||
|
|
||||||
#define MAXDIC 20
|
|
||||||
#define MAXSUGGESTION 15
|
|
||||||
#define MAXSHARPS 5
|
|
||||||
|
|
||||||
#define HUNSPELL_OK (1 << 0)
|
|
||||||
#define HUNSPELL_OK_WARN (1 << 1)
|
|
||||||
|
|
||||||
#ifndef _MYSPELLMGR_HXX_
|
|
||||||
#define _MYSPELLMGR_HXX_
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
Hunspell(const Hunspell&);
|
|
||||||
Hunspell& operator = (const Hunspell&);
|
|
||||||
private:
|
|
||||||
AffixMgr* pAMgr;
|
|
||||||
HashMgr* pHMgr[MAXDIC];
|
|
||||||
int maxdic;
|
|
||||||
SuggestMgr* pSMgr;
|
|
||||||
char * encoding;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
int langnum;
|
|
||||||
int utf8;
|
|
||||||
int complexprefixes;
|
|
||||||
char** wordbreak;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
|
||||||
* input: The affix and dictionary data as bytes
|
|
||||||
*
|
|
||||||
* In WIN32 environment, use UTF-8 encoded paths started with the long path
|
|
||||||
* prefix \\\\?\\ to handle system-independent character encoding and very
|
|
||||||
* long path names (without the long path prefix Hunspell will use fopen()
|
|
||||||
* with system-dependent character encoding instead of _wfopen()).
|
|
||||||
*/
|
|
||||||
|
|
||||||
Hunspell(const char *affix_data, const size_t affix_len, const char *dic_data, const size_t dic_len);
|
|
||||||
~Hunspell();
|
|
||||||
|
|
||||||
/* spell(word) - spellcheck word
|
|
||||||
* output: 0 = bad word, not 0 = good word
|
|
||||||
*
|
|
||||||
* plus output:
|
|
||||||
* info: information bit array, fields:
|
|
||||||
* SPELL_COMPOUND = a compound word
|
|
||||||
* SPELL_FORBIDDEN = an explicit forbidden word
|
|
||||||
* root: root (stem), when input is a word with affix(es)
|
|
||||||
*/
|
|
||||||
|
|
||||||
int spell(const char * word, int * info = NULL, char ** root = NULL);
|
|
||||||
|
|
||||||
/* suggest(suggestions, word) - search suggestions
|
|
||||||
* input: pointer to an array of strings pointer and the (bad) word
|
|
||||||
* array of strings pointer (here *slst) may not be initialized
|
|
||||||
* output: number of suggestions in string array, and suggestions in
|
|
||||||
* a newly allocated array of strings (*slts will be NULL when number
|
|
||||||
* of suggestion equals 0.)
|
|
||||||
*/
|
|
||||||
|
|
||||||
int suggest(char*** slst, const char * word);
|
|
||||||
|
|
||||||
/* deallocate suggestion lists */
|
|
||||||
|
|
||||||
void free_list(char *** slst, int n);
|
|
||||||
|
|
||||||
char * get_dic_encoding();
|
|
||||||
|
|
||||||
/* morphological functions */
|
|
||||||
|
|
||||||
/* analyze(result, word) - morphological analysis of the word */
|
|
||||||
|
|
||||||
int analyze(char*** slst, const char * word);
|
|
||||||
|
|
||||||
/* stem(result, word) - stemmer function */
|
|
||||||
|
|
||||||
int stem(char*** slst, const char * word);
|
|
||||||
|
|
||||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
|
||||||
* example:
|
|
||||||
* char ** result, result2;
|
|
||||||
* int n1 = analyze(&result, "words");
|
|
||||||
* int n2 = stem(&result2, result, n1);
|
|
||||||
*/
|
|
||||||
|
|
||||||
int stem(char*** slst, char ** morph, int n);
|
|
||||||
|
|
||||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
|
||||||
|
|
||||||
int generate(char*** slst, const char * word, const char * word2);
|
|
||||||
|
|
||||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
|
||||||
* example:
|
|
||||||
* char ** result;
|
|
||||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
|
||||||
* int n = generate(&result, "word", &affix, 1);
|
|
||||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
|
||||||
*/
|
|
||||||
|
|
||||||
int generate(char*** slst, const char * word, char ** desc, int n);
|
|
||||||
|
|
||||||
/* functions for run-time modification of the dictionary */
|
|
||||||
|
|
||||||
/* add word to the run-time dictionary */
|
|
||||||
|
|
||||||
int add(const char * word);
|
|
||||||
|
|
||||||
/* add word to the run-time dictionary with affix flags of
|
|
||||||
* the example (a dictionary word): Hunspell will recognize
|
|
||||||
* affixed forms of the new word, too.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int add_with_affix(const char * word, const char * example);
|
|
||||||
|
|
||||||
/* remove word from the run-time dictionary */
|
|
||||||
|
|
||||||
int remove(const char * word);
|
|
||||||
|
|
||||||
/* other */
|
|
||||||
|
|
||||||
/* get extra word characters definied in affix file for tokenization */
|
|
||||||
const char * get_wordchars();
|
|
||||||
unsigned short * get_wordchars_utf16(int * len);
|
|
||||||
|
|
||||||
struct cs_info * get_csconv();
|
|
||||||
const char * get_version();
|
|
||||||
|
|
||||||
int get_langnum() const;
|
|
||||||
|
|
||||||
/* need for putdic */
|
|
||||||
int input_conv(const char * word, char * dest);
|
|
||||||
|
|
||||||
/* experimental and deprecated functions */
|
|
||||||
|
|
||||||
#ifdef HUNSPELL_EXPERIMENTAL
|
|
||||||
/* suffix is an affix flag string, similarly in dictionary files */
|
|
||||||
int put_word_suffix(const char * word, const char * suffix);
|
|
||||||
char * morph_with_correction(const char * word);
|
|
||||||
|
|
||||||
/* spec. suggestions */
|
|
||||||
int suggest_auto(char*** slst, const char * word);
|
|
||||||
int suggest_pos_stems(char*** slst, const char * word);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
private:
|
|
||||||
int cleanword(char *, const char *, int * pcaptype, int * pabbrev);
|
|
||||||
int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
|
|
||||||
void mkinitcap(char *);
|
|
||||||
int mkinitcap2(char * p, w_char * u, int nc);
|
|
||||||
int mkinitsmall2(char * p, w_char * u, int nc);
|
|
||||||
void mkallcap(char *);
|
|
||||||
int mkallcap2(char * p, w_char * u, int nc);
|
|
||||||
void mkallsmall(char *);
|
|
||||||
int mkallsmall2(char * p, w_char * u, int nc);
|
|
||||||
struct hentry * checkword(const char *, int * info, char **root);
|
|
||||||
char * sharps_u8_l1(char * dest, char * source);
|
|
||||||
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
|
|
||||||
int is_keepcase(const hentry * rv);
|
|
||||||
int insert_sug(char ***slst, char * word, int ns);
|
|
||||||
void cat_result(char * result, char * st);
|
|
||||||
char * stem_description(const char * desc);
|
|
||||||
int spellml(char*** slst, const char * word);
|
|
||||||
int get_xml_par(char * dest, const char * par, int maxl);
|
|
||||||
const char * get_xml_pos(const char * s, const char * attr);
|
|
||||||
int get_xml_list(char ***slst, char * list, const char * tag);
|
|
||||||
int check_xml_par(const char * q, const char * attr, const char * value);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,18 +0,0 @@
|
|||||||
#ifndef _HUNSPELL_VISIBILITY_H_
|
|
||||||
#define _HUNSPELL_VISIBILITY_H_
|
|
||||||
|
|
||||||
#if defined(HUNSPELL_STATIC)
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
# if defined(BUILDING_LIBHUNSPELL)
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
|
|
||||||
# else
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
|
||||||
# endif
|
|
||||||
#elif defined(BUILDING_LIBHUNSPELL) && 1
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
|
||||||
#else
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,18 +0,0 @@
|
|||||||
#ifndef _HUNSPELL_VISIBILITY_H_
|
|
||||||
#define _HUNSPELL_VISIBILITY_H_
|
|
||||||
|
|
||||||
#if defined(HUNSPELL_STATIC)
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
# if defined(BUILDING_LIBHUNSPELL)
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
|
|
||||||
# else
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
|
||||||
# endif
|
|
||||||
#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
|
||||||
#else
|
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,38 +0,0 @@
|
|||||||
#ifndef _LANGNUM_HXX_
|
|
||||||
#define _LANGNUM_HXX_
|
|
||||||
|
|
||||||
/*
|
|
||||||
language numbers for language specific codes
|
|
||||||
see http://l10n.openoffice.org/languages.html
|
|
||||||
*/
|
|
||||||
|
|
||||||
enum {
|
|
||||||
LANG_ar=96,
|
|
||||||
LANG_az=100, // custom number
|
|
||||||
LANG_bg=41,
|
|
||||||
LANG_ca=37,
|
|
||||||
LANG_cs=42,
|
|
||||||
LANG_da=45,
|
|
||||||
LANG_de=49,
|
|
||||||
LANG_el=30,
|
|
||||||
LANG_en=01,
|
|
||||||
LANG_es=34,
|
|
||||||
LANG_eu=10,
|
|
||||||
LANG_fr=02,
|
|
||||||
LANG_gl=38,
|
|
||||||
LANG_hr=78,
|
|
||||||
LANG_hu=36,
|
|
||||||
LANG_it=39,
|
|
||||||
LANG_la=99, // custom number
|
|
||||||
LANG_lv=101, // custom number
|
|
||||||
LANG_nl=31,
|
|
||||||
LANG_pl=48,
|
|
||||||
LANG_pt=03,
|
|
||||||
LANG_ru=07,
|
|
||||||
LANG_sv=50,
|
|
||||||
LANG_tr=90,
|
|
||||||
LANG_uk=80,
|
|
||||||
LANG_xx=999
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,59 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* The Original Code is Hunspell, based on MySpell.
|
|
||||||
*
|
|
||||||
* The Initial Developers of the Original Code are
|
|
||||||
* Kevin Hendricks (MySpell) and Laszlo Nemeth (Hunspell).
|
|
||||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
|
||||||
* the Initial Developers. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
* David Einstein
|
|
||||||
* Davide Prina
|
|
||||||
* Giuseppe Modugno
|
|
||||||
* Gianluca Turconi
|
|
||||||
* Simon Brouwer
|
|
||||||
* Noll Janos
|
|
||||||
* Biro Arpad
|
|
||||||
* Goldman Eleonora
|
|
||||||
* Sarlos Tamas
|
|
||||||
* Bencsath Boldizsar
|
|
||||||
* Halacsy Peter
|
|
||||||
* Dvornik Laszlo
|
|
||||||
* Gefferth Andras
|
|
||||||
* Nagy Viktor
|
|
||||||
* Varga Daniel
|
|
||||||
* Chris Halls
|
|
||||||
* Rene Engelhard
|
|
||||||
* Bram Moolenaar
|
|
||||||
* Dafydd Jones
|
|
||||||
* Harri Pitkanen
|
|
||||||
* Andras Timar
|
|
||||||
* Tor Lillqvist
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include "config.h"
|
|
@ -1,61 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
|
||||||
* And Contributors. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* 3. All modifications to the source code must be clearly marked as
|
|
||||||
* such. Binary redistributions based on modified source code
|
|
||||||
* must be clearly marked as modified versions in the documentation
|
|
||||||
* and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
|
||||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* NOTE: A special thanks and credit goes to Geoff Kuenning
|
|
||||||
* the creator of ispell. MySpell's affix algorithms were
|
|
||||||
* based on those of ispell which should be noted is
|
|
||||||
* copyright Geoff Kuenning et.al. and now available
|
|
||||||
* under a BSD style license. For more information on ispell
|
|
||||||
* and affix compression in general, please see:
|
|
||||||
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
|
||||||
* (the home page for ispell)
|
|
||||||
*
|
|
||||||
* An almost complete rewrite of MySpell for use by
|
|
||||||
* the Mozilla project has been developed by David Einstein
|
|
||||||
* (Deinst@world.std.com). David and I are now
|
|
||||||
* working on parallel development tracks to help
|
|
||||||
* our respective projects (Mozilla and OpenOffice.org
|
|
||||||
* and we will maintain full affix file and dictionary
|
|
||||||
* file compatibility and work on merging our versions
|
|
||||||
* of MySpell back into a single tree. David has been
|
|
||||||
* a significant help in improving MySpell.
|
|
||||||
*
|
|
||||||
* Special thanks also go to La'szlo' Ne'meth
|
|
||||||
* <nemethl@gyorsposta.hu> who is the author of the
|
|
||||||
* Hungarian dictionary and who developed and contributed
|
|
||||||
* the code to support compound words in MySpell
|
|
||||||
* and fixed numerous problems with the encoding
|
|
||||||
* case conversion tables.
|
|
||||||
*
|
|
||||||
*/
|
|
@ -1,65 +0,0 @@
|
|||||||
#*************************************************************************
|
|
||||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
#
|
|
||||||
# The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
# http://www.mozilla.org/MPL/
|
|
||||||
#
|
|
||||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
# for the specific language governing rights and limitations under the
|
|
||||||
# License.
|
|
||||||
#
|
|
||||||
# Alternatively, the contents of this file may be used under the terms of
|
|
||||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
# of those above. If you wish to allow use of your version of this file only
|
|
||||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
# use your version of this file under the terms of the MPL, indicate your
|
|
||||||
# decision by deleting the provisions above and replace them with the notice
|
|
||||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
# the provisions above, a recipient may use your version of this file under
|
|
||||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
#
|
|
||||||
#*************************************************************************
|
|
||||||
|
|
||||||
PRJ = ../../../../../..
|
|
||||||
|
|
||||||
PRJNAME = hunspell
|
|
||||||
TARGET = hunspell
|
|
||||||
LIBTARGET=YES
|
|
||||||
EXTERNAL_WARNINGS_NOT_ERRORS := TRUE
|
|
||||||
UWINAPILIB=
|
|
||||||
|
|
||||||
#----- Settings ---------------------------------------------------------
|
|
||||||
|
|
||||||
.INCLUDE : settings.mk
|
|
||||||
|
|
||||||
# --- Files --------------------------------------------------------
|
|
||||||
|
|
||||||
CFLAGS+=-I..$/..$/
|
|
||||||
CDEFS+=-DOPENOFFICEORG
|
|
||||||
|
|
||||||
SLOFILES= \
|
|
||||||
$(SLO)$/affentry.obj \
|
|
||||||
$(SLO)$/affixmgr.obj \
|
|
||||||
$(SLO)$/dictmgr.obj \
|
|
||||||
$(SLO)$/csutil.obj \
|
|
||||||
$(SLO)$/hashmgr.obj \
|
|
||||||
$(SLO)$/suggestmgr.obj \
|
|
||||||
$(SLO)$/phonet.obj \
|
|
||||||
$(SLO)$/hunzip.obj \
|
|
||||||
$(SLO)$/filemgr.obj \
|
|
||||||
$(SLO)$/replist.obj \
|
|
||||||
$(SLO)$/hunspell.obj
|
|
||||||
|
|
||||||
LIB1TARGET= $(SLB)$/lib$(TARGET).lib
|
|
||||||
LIB1ARCHIV= $(LB)/lib$(TARGET).a
|
|
||||||
LIB1OBJFILES= $(SLOFILES)
|
|
||||||
|
|
||||||
# --- Targets ------------------------------------------------------
|
|
||||||
|
|
||||||
.INCLUDE : target.mk
|
|
||||||
|
|
@ -1,293 +0,0 @@
|
|||||||
/* phonetic.c - generic replacement aglogithms for phonetic transformation
|
|
||||||
Copyright (C) 2000 Bjoern Jacke
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License version 2.1 as published by the Free Software Foundation;
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; If not, see
|
|
||||||
<http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
Changelog:
|
|
||||||
|
|
||||||
2000-01-05 Bjoern Jacke <bjoern at j3e.de>
|
|
||||||
Initial Release insprired by the article about phonetic
|
|
||||||
transformations out of c't 25/1999
|
|
||||||
|
|
||||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
|
||||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
|
||||||
|
|
||||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
|
||||||
Porting from Aspell to Hunspell using C-like structs
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "csutil.hxx"
|
|
||||||
#include "phonet.hxx"
|
|
||||||
|
|
||||||
void init_phonet_hash(phonetable & parms)
|
|
||||||
{
|
|
||||||
int i, k;
|
|
||||||
|
|
||||||
for (i = 0; i < HASHSIZE; i++) {
|
|
||||||
parms.hash[i] = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
|
||||||
/** set hash value **/
|
|
||||||
k = (unsigned char) parms.rules[i][0];
|
|
||||||
|
|
||||||
if (parms.hash[k] < 0) {
|
|
||||||
parms.hash[k] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// like strcpy but safe if the strings overlap
|
|
||||||
// but only if dest < src
|
|
||||||
static inline void strmove(char * dest, char * src) {
|
|
||||||
while (*src)
|
|
||||||
*dest++ = *src++;
|
|
||||||
*dest = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
static int myisalpha(char ch) {
|
|
||||||
if ((unsigned char) ch < 128) return isalpha(ch);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* phonetic transcription algorithm */
|
|
||||||
/* see: http://aspell.net/man-html/Phonetic-Code.html */
|
|
||||||
/* convert string to uppercase before this call */
|
|
||||||
int phonet (const char * inword, char * target,
|
|
||||||
int len,
|
|
||||||
phonetable & parms)
|
|
||||||
{
|
|
||||||
/** Do phonetic transformation. **/
|
|
||||||
/** "len" = length of "inword" incl. '\0'. **/
|
|
||||||
|
|
||||||
/** result: >= 0: length of "target" **/
|
|
||||||
/** otherwise: error **/
|
|
||||||
|
|
||||||
int i,j,k=0,n,p,z;
|
|
||||||
int k0,n0,p0=-333,z0;
|
|
||||||
char c, c0;
|
|
||||||
const char * s;
|
|
||||||
typedef unsigned char uchar;
|
|
||||||
char word[MAXPHONETUTF8LEN + 1];
|
|
||||||
if (len == -1) len = strlen(inword);
|
|
||||||
if (len > MAXPHONETUTF8LEN) return 0;
|
|
||||||
strncpy(word, inword, MAXPHONETUTF8LEN);
|
|
||||||
word[MAXPHONETUTF8LEN] = '\0';
|
|
||||||
|
|
||||||
/** check word **/
|
|
||||||
i = j = z = 0;
|
|
||||||
while ((c = word[i]) != '\0') {
|
|
||||||
n = parms.hash[(uchar) c];
|
|
||||||
z0 = 0;
|
|
||||||
|
|
||||||
if (n >= 0) {
|
|
||||||
/** check all rules for the same letter **/
|
|
||||||
while (parms.rules[n][0] == c) {
|
|
||||||
|
|
||||||
/** check whole string **/
|
|
||||||
k = 1; /** number of found letters **/
|
|
||||||
p = 5; /** default priority **/
|
|
||||||
s = parms.rules[n];
|
|
||||||
s++; /** important for (see below) "*(s-1)" **/
|
|
||||||
|
|
||||||
while (*s != '\0' && word[i+k] == *s
|
|
||||||
&& !isdigit ((unsigned char) *s) && strchr ("(-<^$", *s) == NULL) {
|
|
||||||
k++;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (*s == '(') {
|
|
||||||
/** check letters in "(..)" **/
|
|
||||||
if (myisalpha(word[i+k]) // ...could be implied?
|
|
||||||
&& strchr(s+1, word[i+k]) != NULL) {
|
|
||||||
k++;
|
|
||||||
while (*s != ')')
|
|
||||||
s++;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p0 = (int) *s;
|
|
||||||
k0 = k;
|
|
||||||
while (*s == '-' && k > 1) {
|
|
||||||
k--;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (*s == '<')
|
|
||||||
s++;
|
|
||||||
if (isdigit ((unsigned char) *s)) {
|
|
||||||
/** determine priority **/
|
|
||||||
p = *s - '0';
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (*s == '^' && *(s+1) == '^')
|
|
||||||
s++;
|
|
||||||
|
|
||||||
if (*s == '\0'
|
|
||||||
|| (*s == '^'
|
|
||||||
&& (i == 0 || ! myisalpha(word[i-1]))
|
|
||||||
&& (*(s+1) != '$'
|
|
||||||
|| (! myisalpha(word[i+k0]) )))
|
|
||||||
|| (*s == '$' && i > 0
|
|
||||||
&& myisalpha(word[i-1])
|
|
||||||
&& (! myisalpha(word[i+k0]) )))
|
|
||||||
{
|
|
||||||
/** search for followup rules, if: **/
|
|
||||||
/** parms.followup and k > 1 and NO '-' in searchstring **/
|
|
||||||
c0 = word[i+k-1];
|
|
||||||
n0 = parms.hash[(uchar) c0];
|
|
||||||
|
|
||||||
// if (parms.followup && k > 1 && n0 >= 0
|
|
||||||
if (k > 1 && n0 >= 0
|
|
||||||
&& p0 != (int) '-' && word[i+k] != '\0') {
|
|
||||||
/** test follow-up rule for "word[i+k]" **/
|
|
||||||
while (parms.rules[n0][0] == c0) {
|
|
||||||
|
|
||||||
/** check whole string **/
|
|
||||||
k0 = k;
|
|
||||||
p0 = 5;
|
|
||||||
s = parms.rules[n0];
|
|
||||||
s++;
|
|
||||||
while (*s != '\0' && word[i+k0] == *s
|
|
||||||
&& ! isdigit((unsigned char) *s) && strchr("(-<^$",*s) == NULL) {
|
|
||||||
k0++;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (*s == '(') {
|
|
||||||
/** check letters **/
|
|
||||||
if (myisalpha(word[i+k0])
|
|
||||||
&& strchr (s+1, word[i+k0]) != NULL) {
|
|
||||||
k0++;
|
|
||||||
while (*s != ')' && *s != '\0')
|
|
||||||
s++;
|
|
||||||
if (*s == ')')
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (*s == '-') {
|
|
||||||
/** "k0" gets NOT reduced **/
|
|
||||||
/** because "if (k0 == k)" **/
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (*s == '<')
|
|
||||||
s++;
|
|
||||||
if (isdigit ((unsigned char) *s)) {
|
|
||||||
p0 = *s - '0';
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*s == '\0'
|
|
||||||
/** *s == '^' cuts **/
|
|
||||||
|| (*s == '$' && ! myisalpha(word[i+k0])))
|
|
||||||
{
|
|
||||||
if (k0 == k) {
|
|
||||||
/** this is just a piece of the string **/
|
|
||||||
n0 += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p0 < p) {
|
|
||||||
/** priority too low **/
|
|
||||||
n0 += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/** rule fits; stop search **/
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
n0 += 2;
|
|
||||||
} /** End of "while (parms.rules[n0][0] == c0)" **/
|
|
||||||
|
|
||||||
if (p0 >= p && parms.rules[n0][0] == c0) {
|
|
||||||
n += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} /** end of follow-up stuff **/
|
|
||||||
|
|
||||||
/** replace string **/
|
|
||||||
s = parms.rules[n+1];
|
|
||||||
p0 = (parms.rules[n][0] != '\0'
|
|
||||||
&& strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
|
|
||||||
if (p0 == 1 && z == 0) {
|
|
||||||
/** rule with '<' is used **/
|
|
||||||
if (j > 0 && *s != '\0'
|
|
||||||
&& (target[j-1] == c || target[j-1] == *s)) {
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
z0 = 1;
|
|
||||||
z = 1;
|
|
||||||
k0 = 0;
|
|
||||||
while (*s != '\0' && word[i+k0] != '\0') {
|
|
||||||
word[i+k0] = *s;
|
|
||||||
k0++;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
if (k > k0)
|
|
||||||
strmove (&word[0]+i+k0, &word[0]+i+k);
|
|
||||||
|
|
||||||
/** new "actual letter" **/
|
|
||||||
c = word[i];
|
|
||||||
}
|
|
||||||
else { /** no '<' rule used **/
|
|
||||||
i += k - 1;
|
|
||||||
z = 0;
|
|
||||||
while (*s != '\0'
|
|
||||||
&& *(s+1) != '\0' && j < len) {
|
|
||||||
if (j == 0 || target[j-1] != *s) {
|
|
||||||
target[j] = *s;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
/** new "actual letter" **/
|
|
||||||
c = *s;
|
|
||||||
if (parms.rules[n][0] != '\0'
|
|
||||||
&& strstr (parms.rules[n]+1, "^^") != NULL) {
|
|
||||||
if (c != '\0') {
|
|
||||||
target[j] = c;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
strmove (&word[0], &word[0]+i+1);
|
|
||||||
i = 0;
|
|
||||||
z0 = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
} /** end of follow-up stuff **/
|
|
||||||
n += 2;
|
|
||||||
} /** end of while (parms.rules[n][0] == c) **/
|
|
||||||
} /** end of if (n >= 0) **/
|
|
||||||
if (z0 == 0) {
|
|
||||||
// if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'
|
|
||||||
// && (!parms.collapse_result || j == 0 || target[j-1] != c)){
|
|
||||||
if (k && !p0 && j < len && c != '\0'
|
|
||||||
&& (1 || j == 0 || target[j-1] != c)){
|
|
||||||
/** condense only double letters **/
|
|
||||||
target[j] = c;
|
|
||||||
///printf("\n setting \n");
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
|
|
||||||
i++;
|
|
||||||
z = 0;
|
|
||||||
k=0;
|
|
||||||
}
|
|
||||||
} /** end of while ((c = word[i]) != '\0') **/
|
|
||||||
|
|
||||||
target[j] = '\0';
|
|
||||||
return (j);
|
|
||||||
|
|
||||||
} /** end of function "phonet" **/
|
|
@ -1,52 +0,0 @@
|
|||||||
/* phonetic.c - generic replacement aglogithms for phonetic transformation
|
|
||||||
Copyright (C) 2000 Bjoern Jacke
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License version 2.1 as published by the Free Software Foundation;
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; If not, see
|
|
||||||
<http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
Changelog:
|
|
||||||
|
|
||||||
2000-01-05 Bjoern Jacke <bjoern at j3e.de>
|
|
||||||
Initial Release insprired by the article about phonetic
|
|
||||||
transformations out of c't 25/1999
|
|
||||||
|
|
||||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
|
||||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
|
||||||
|
|
||||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
|
||||||
Porting from Aspell to Hunspell using C-like structs
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __PHONETHXX__
|
|
||||||
#define __PHONETHXX__
|
|
||||||
|
|
||||||
#define HASHSIZE 256
|
|
||||||
#define MAXPHONETLEN 256
|
|
||||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
struct phonetable {
|
|
||||||
char utf8;
|
|
||||||
cs_info * lang;
|
|
||||||
int num;
|
|
||||||
char * * rules;
|
|
||||||
int hash[HASHSIZE];
|
|
||||||
};
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
|
|
||||||
int len, phonetable & phone);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,87 +0,0 @@
|
|||||||
#include "license.hunspell"
|
|
||||||
#include "license.myspell"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "replist.hxx"
|
|
||||||
#include "csutil.hxx"
|
|
||||||
|
|
||||||
RepList::RepList(int n) {
|
|
||||||
dat = (replentry **) malloc(sizeof(replentry *) * n);
|
|
||||||
if (dat == 0) size = 0; else size = n;
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
RepList::~RepList()
|
|
||||||
{
|
|
||||||
for (int i = 0; i < pos; i++) {
|
|
||||||
free(dat[i]->pattern);
|
|
||||||
free(dat[i]->pattern2);
|
|
||||||
free(dat[i]);
|
|
||||||
}
|
|
||||||
free(dat);
|
|
||||||
}
|
|
||||||
|
|
||||||
int RepList::get_pos() {
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
replentry * RepList::item(int n) {
|
|
||||||
return dat[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
int RepList::near(const char * word) {
|
|
||||||
int p1 = 0;
|
|
||||||
int p2 = pos;
|
|
||||||
while ((p2 - p1) > 1) {
|
|
||||||
int m = (p1 + p2) / 2;
|
|
||||||
int c = strcmp(word, dat[m]->pattern);
|
|
||||||
if (c <= 0) {
|
|
||||||
if (c < 0) p2 = m; else p1 = p2 = m;
|
|
||||||
} else p1 = m;
|
|
||||||
}
|
|
||||||
return p1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int RepList::match(const char * word, int n) {
|
|
||||||
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int RepList::add(char * pat1, char * pat2) {
|
|
||||||
if (pos >= size || pat1 == NULL || pat2 == NULL) return 1;
|
|
||||||
replentry * r = (replentry *) malloc(sizeof(replentry));
|
|
||||||
if (r == NULL) return 1;
|
|
||||||
r->pattern = mystrrep(pat1, "_", " ");
|
|
||||||
r->pattern2 = mystrrep(pat2, "_", " ");
|
|
||||||
r->start = false;
|
|
||||||
r->end = false;
|
|
||||||
dat[pos++] = r;
|
|
||||||
for (int i = pos - 1; i > 0; i--) {
|
|
||||||
r = dat[i];
|
|
||||||
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
|
|
||||||
dat[i] = dat[i - 1];
|
|
||||||
dat[i - 1] = r;
|
|
||||||
} else break;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int RepList::conv(const char * word, char * dest) {
|
|
||||||
int stl = 0;
|
|
||||||
int change = 0;
|
|
||||||
for (size_t i = 0; i < strlen(word); i++) {
|
|
||||||
int n = near(word + i);
|
|
||||||
int l = match(word + i, n);
|
|
||||||
if (l) {
|
|
||||||
strcpy(dest + stl, dat[n]->pattern2);
|
|
||||||
stl += strlen(dat[n]->pattern2);
|
|
||||||
i += l - 1;
|
|
||||||
change = 1;
|
|
||||||
} else dest[stl++] = word[i];
|
|
||||||
}
|
|
||||||
dest[stl] = '\0';
|
|
||||||
return change;
|
|
||||||
}
|
|
@ -1,30 +0,0 @@
|
|||||||
/* string replacement list class */
|
|
||||||
#ifndef _REPLIST_HXX_
|
|
||||||
#define _REPLIST_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include "w_char.hxx"
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
RepList(const RepList&);
|
|
||||||
RepList& operator = (const RepList&);
|
|
||||||
protected:
|
|
||||||
replentry ** dat;
|
|
||||||
int size;
|
|
||||||
int pos;
|
|
||||||
|
|
||||||
public:
|
|
||||||
RepList(int n);
|
|
||||||
~RepList();
|
|
||||||
|
|
||||||
int get_pos();
|
|
||||||
int add(char * pat1, char * pat2);
|
|
||||||
replentry * item(int n);
|
|
||||||
int near(const char * word);
|
|
||||||
int match(const char * word, int n);
|
|
||||||
int conv(const char * word, char * dest);
|
|
||||||
};
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@ -1,115 +0,0 @@
|
|||||||
#ifndef _SUGGESTMGR_HXX_
|
|
||||||
#define _SUGGESTMGR_HXX_
|
|
||||||
|
|
||||||
#define MAXSWL 100
|
|
||||||
#define MAXSWUTF8L (MAXSWL * 4)
|
|
||||||
#define MAX_ROOTS 100
|
|
||||||
#define MAX_WORDS 100
|
|
||||||
#define MAX_GUESS 200
|
|
||||||
#define MAXNGRAMSUGS 4
|
|
||||||
#define MAXPHONSUGS 2
|
|
||||||
#define MAXCOMPOUNDSUGS 3
|
|
||||||
|
|
||||||
// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
|
|
||||||
#define TIMELIMIT (CLOCKS_PER_SEC >> 2)
|
|
||||||
#define MINTIMER 100
|
|
||||||
#define MAXPLUSTIMER 100
|
|
||||||
|
|
||||||
#define NGRAM_LONGER_WORSE (1 << 0)
|
|
||||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
|
||||||
#define NGRAM_LOWERING (1 << 2)
|
|
||||||
#define NGRAM_WEIGHTED (1 << 3)
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include "atypes.hxx"
|
|
||||||
#include "affixmgr.hxx"
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "langnum.hxx"
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
SuggestMgr(const SuggestMgr&);
|
|
||||||
SuggestMgr& operator = (const SuggestMgr&);
|
|
||||||
private:
|
|
||||||
char * ckey;
|
|
||||||
int ckeyl;
|
|
||||||
w_char * ckey_utf;
|
|
||||||
|
|
||||||
char * ctry;
|
|
||||||
int ctryl;
|
|
||||||
w_char * ctry_utf;
|
|
||||||
|
|
||||||
AffixMgr* pAMgr;
|
|
||||||
int maxSug;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
int utf8;
|
|
||||||
int langnum;
|
|
||||||
int nosplitsugs;
|
|
||||||
int maxngramsugs;
|
|
||||||
int maxcpdsugs;
|
|
||||||
int complexprefixes;
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
|
|
||||||
~SuggestMgr();
|
|
||||||
|
|
||||||
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
|
|
||||||
int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
|
|
||||||
int suggest_auto(char*** slst, const char * word, int nsug);
|
|
||||||
int suggest_stems(char*** slst, const char * word, int nsug);
|
|
||||||
int suggest_pos_stems(char*** slst, const char * word, int nsug);
|
|
||||||
|
|
||||||
char * suggest_morph(const char * word);
|
|
||||||
char * suggest_gen(char ** pl, int pln, char * pattern);
|
|
||||||
char * suggest_morph_for_spelling_error(const char * word);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
|
|
||||||
int * timer, clock_t * timelimit);
|
|
||||||
int checkword(const char *, int, int, int *, clock_t *);
|
|
||||||
int check_forbidden(const char *, int);
|
|
||||||
|
|
||||||
int capchars(char **, const char *, int, int);
|
|
||||||
int replchars(char**, const char *, int, int);
|
|
||||||
int doubletwochars(char**, const char *, int, int);
|
|
||||||
int forgotchar(char **, const char *, int, int);
|
|
||||||
int swapchar(char **, const char *, int, int);
|
|
||||||
int longswapchar(char **, const char *, int, int);
|
|
||||||
int movechar(char **, const char *, int, int);
|
|
||||||
int extrachar(char **, const char *, int, int);
|
|
||||||
int badcharkey(char **, const char *, int, int);
|
|
||||||
int badchar(char **, const char *, int, int);
|
|
||||||
int twowords(char **, const char *, int, int);
|
|
||||||
int fixstems(char **, const char *, int);
|
|
||||||
|
|
||||||
int capchars_utf(char **, const w_char *, int wl, int, int);
|
|
||||||
int doubletwochars_utf(char**, const w_char *, int wl, int, int);
|
|
||||||
int forgotchar_utf(char**, const w_char *, int wl, int, int);
|
|
||||||
int extrachar_utf(char**, const w_char *, int wl, int, int);
|
|
||||||
int badcharkey_utf(char **, const w_char *, int wl, int, int);
|
|
||||||
int badchar_utf(char **, const w_char *, int wl, int, int);
|
|
||||||
int swapchar_utf(char **, const w_char *, int wl, int, int);
|
|
||||||
int longswapchar_utf(char **, const w_char *, int, int, int);
|
|
||||||
int movechar_utf(char **, const w_char *, int, int, int);
|
|
||||||
|
|
||||||
int mapchars(char**, const char *, int, int);
|
|
||||||
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
|
|
||||||
int ngram(int n, char * s1, const char * s2, int opt);
|
|
||||||
int mystrlen(const char * word);
|
|
||||||
int leftcommonsubstring(char * s1, const char * s2);
|
|
||||||
int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
|
|
||||||
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
|
|
||||||
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
|
|
||||||
int lcslen(const char * s, const char* s2);
|
|
||||||
char * suggest_hentry_gen(hentry * rv, char * pattern);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,21 +0,0 @@
|
|||||||
#ifndef __WCHARHXX__
|
|
||||||
#define __WCHARHXX__
|
|
||||||
|
|
||||||
#ifndef GCC
|
|
||||||
typedef struct {
|
|
||||||
#else
|
|
||||||
typedef struct __attribute__ ((packed)) {
|
|
||||||
#endif
|
|
||||||
unsigned char l;
|
|
||||||
unsigned char h;
|
|
||||||
} w_char;
|
|
||||||
|
|
||||||
// two character arrays
|
|
||||||
struct replentry {
|
|
||||||
char * pattern;
|
|
||||||
char * pattern2;
|
|
||||||
bool start;
|
|
||||||
bool end;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
Loading…
x
Reference in New Issue
Block a user