mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Update bundled copy of hunspell to 1.3.3
This commit is contained in:
parent
a837768348
commit
66b8f4dcc6
@ -1,9 +1,9 @@
|
||||
# Makefile.in generated by automake 1.11.1 from Makefile.am.
|
||||
# Makefile.in generated by automake 1.11.3 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
||||
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
|
||||
# Inc.
|
||||
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
|
||||
# Foundation, Inc.
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
@ -86,6 +86,12 @@ am__nobase_list = $(am__nobase_strip_setup); \
|
||||
am__base_list = \
|
||||
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
|
||||
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
|
||||
am__uninstall_files_from_dir = { \
|
||||
test -z "$$files" \
|
||||
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|
||||
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
|
||||
$(am__cd) "$$dir" && rm -f $$files; }; \
|
||||
}
|
||||
am__installdirs = "$(DESTDIR)$(libdir)" \
|
||||
"$(DESTDIR)$(libhunspell_1_3_includedir)"
|
||||
LTLIBRARIES = $(lib_LTLIBRARIES)
|
||||
@ -374,7 +380,7 @@ clean-libLTLIBRARIES:
|
||||
echo "rm -f \"$${dir}/so_locations\""; \
|
||||
rm -f "$${dir}/so_locations"; \
|
||||
done
|
||||
libhunspell-1.3.la: $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_DEPENDENCIES)
|
||||
libhunspell-1.3.la: $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_DEPENDENCIES) $(EXTRA_libhunspell_1_3_la_DEPENDENCIES)
|
||||
$(libhunspell_1_3_la_LINK) -rpath $(libdir) $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_LIBADD) $(LIBS)
|
||||
|
||||
mostlyclean-compile:
|
||||
@ -438,9 +444,7 @@ uninstall-libhunspell_1_3_includeHEADERS:
|
||||
@$(NORMAL_UNINSTALL)
|
||||
@list='$(libhunspell_1_3_include_HEADERS)'; test -n "$(libhunspell_1_3_includedir)" || list=; \
|
||||
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||
test -n "$$files" || exit 0; \
|
||||
echo " ( cd '$(DESTDIR)$(libhunspell_1_3_includedir)' && rm -f" $$files ")"; \
|
||||
cd "$(DESTDIR)$(libhunspell_1_3_includedir)" && rm -f $$files
|
||||
dir='$(DESTDIR)$(libhunspell_1_3_includedir)'; $(am__uninstall_files_from_dir)
|
||||
|
||||
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||
@ -541,10 +545,15 @@ install-am: all-am
|
||||
|
||||
installcheck: installcheck-am
|
||||
install-strip:
|
||||
if test -z '$(STRIP)'; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
`test -z '$(STRIP)' || \
|
||||
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
|
||||
install; \
|
||||
else \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||
fi
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
@ -9,13 +9,17 @@
|
||||
#include "affentry.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4)
|
||||
|
||||
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||
{
|
||||
// register affix manager
|
||||
pmyMgr = pmgr;
|
||||
|
||||
: pmyMgr(pmgr)
|
||||
, next(NULL)
|
||||
, nexteq(NULL)
|
||||
, nextne(NULL)
|
||||
, flgnxt(NULL)
|
||||
{
|
||||
// set up its initial values
|
||||
|
||||
aflag = dp->aflag; // flag
|
||||
strip = dp->strip; // string to strip
|
||||
appnd = dp->appnd; // string to append
|
||||
@ -28,9 +32,6 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||
memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
|
||||
c.l.conds2 = dp->c.l.conds2;
|
||||
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||
next = NULL;
|
||||
nextne = NULL;
|
||||
nexteq = NULL;
|
||||
morphcode = dp->morphcode;
|
||||
contclass = dp->contclass;
|
||||
contclasslen = dp->contclasslen;
|
||||
@ -53,16 +54,17 @@ PfxEntry::~PfxEntry()
|
||||
// add prefix to this word assuming conditions hold
|
||||
char * PfxEntry::add(const char * word, int len)
|
||||
{
|
||||
char tword[MAXWORDUTF8LEN + 4];
|
||||
char tword[MAXTEMPWORDLEN];
|
||||
|
||||
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word) &&
|
||||
(!stripl || (strncmp(word, strip, stripl) == 0)) &&
|
||||
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
||||
((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
|
||||
/* we have a match so add prefix */
|
||||
char * pp = tword;
|
||||
if (appndl) {
|
||||
strcpy(tword,appnd);
|
||||
strncpy(tword, appnd, MAXTEMPWORDLEN-1);
|
||||
tword[MAXTEMPWORDLEN-1] = '\0';
|
||||
pp += appndl;
|
||||
}
|
||||
strcpy(pp, (word + stripl));
|
||||
@ -110,13 +112,15 @@ inline int PfxEntry::test_condition(const char * st)
|
||||
if (*st == '\0' && p) return 0; // word <= condition
|
||||
break;
|
||||
}
|
||||
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
||||
case '.':
|
||||
if (!pos) { // dots are not metacharacters in groups: [.]
|
||||
p = nextchar(p);
|
||||
// skip the next character
|
||||
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
|
||||
if (*st == '\0' && p) return 0; // word <= condition
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
default: {
|
||||
if (*st == *p) {
|
||||
st++;
|
||||
@ -133,11 +137,11 @@ inline int PfxEntry::test_condition(const char * st)
|
||||
}
|
||||
if (pos && st != pos) {
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && (p = nextchar(p)));
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
}
|
||||
} else if (pos) {
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && (p = nextchar(p)));
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
}
|
||||
} else if (pos) { // group
|
||||
p = nextchar(p);
|
||||
@ -153,7 +157,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry of root word or NULL
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
@ -167,7 +171,10 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
if (stripl) strcpy (tmpword, strip);
|
||||
if (stripl) {
|
||||
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
}
|
||||
strcpy ((tmpword + stripl), (word + appndl));
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
@ -214,7 +221,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry of root word or NULL
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
@ -229,7 +236,10 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
if (stripl) strcpy (tmpword, strip);
|
||||
if (stripl) {
|
||||
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
}
|
||||
strcpy ((tmpword + stripl), (word + appndl));
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
@ -261,7 +271,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag)
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
@ -276,7 +286,10 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
if (stripl) strcpy (tmpword, strip);
|
||||
if (stripl) {
|
||||
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
}
|
||||
strcpy ((tmpword + stripl), (word + appndl));
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
@ -308,7 +321,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry of root word or NULL
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
char result[MAXLNLEN];
|
||||
char * st;
|
||||
|
||||
@ -327,7 +340,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
if (stripl) strcpy (tmpword, strip);
|
||||
if (stripl) {
|
||||
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
}
|
||||
strcpy ((tmpword + stripl), (word + appndl));
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
@ -395,10 +411,15 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||
}
|
||||
|
||||
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||
: pmyMgr(pmgr) // register affix manager
|
||||
, next(NULL)
|
||||
, nexteq(NULL)
|
||||
, nextne(NULL)
|
||||
, flgnxt(NULL)
|
||||
, l_morph(NULL)
|
||||
, r_morph(NULL)
|
||||
, eq_morph(NULL)
|
||||
{
|
||||
// register affix manager
|
||||
pmyMgr = pmgr;
|
||||
|
||||
// set up its initial values
|
||||
aflag = dp->aflag; // char flag
|
||||
strip = dp->strip; // string to strip
|
||||
@ -413,7 +434,6 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||
memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
|
||||
c.l.conds2 = dp->c.l.conds2;
|
||||
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||
|
||||
rappnd = myrevstrdup(appnd);
|
||||
morphcode = dp->morphcode;
|
||||
contclass = dp->contclass;
|
||||
@ -438,15 +458,16 @@ SfxEntry::~SfxEntry()
|
||||
// add suffix to this word assuming conditions hold
|
||||
char * SfxEntry::add(const char * word, int len)
|
||||
{
|
||||
char tword[MAXWORDUTF8LEN + 4];
|
||||
char tword[MAXTEMPWORDLEN];
|
||||
|
||||
/* make sure all conditions match */
|
||||
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word + len, word) &&
|
||||
(!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
|
||||
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
||||
((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
|
||||
/* we have a match so add suffix */
|
||||
strcpy(tword,word);
|
||||
strncpy(tword, word, MAXTEMPWORDLEN-1);
|
||||
tword[MAXTEMPWORDLEN-1] = '\0';
|
||||
if (appndl) {
|
||||
strcpy(tword + len - stripl, appnd);
|
||||
} else {
|
||||
@ -481,13 +502,23 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||
int i = 1;
|
||||
while (1) {
|
||||
switch (*p) {
|
||||
case '\0': return 1;
|
||||
case '[': { p = nextchar(p); pos = st; break; }
|
||||
case '^': { p = nextchar(p); neg = true; break; }
|
||||
case ']': { if (!neg && !ingroup) return 0;
|
||||
case '\0':
|
||||
return 1;
|
||||
case '[':
|
||||
p = nextchar(p);
|
||||
pos = st;
|
||||
break;
|
||||
case '^':
|
||||
p = nextchar(p);
|
||||
neg = true;
|
||||
break;
|
||||
case ']':
|
||||
if (!neg && !ingroup)
|
||||
return 0;
|
||||
i++;
|
||||
// skip the next character
|
||||
if (!ingroup) {
|
||||
if (!ingroup)
|
||||
{
|
||||
for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
||||
st--;
|
||||
}
|
||||
@ -495,10 +526,13 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||
neg = false;
|
||||
ingroup = false;
|
||||
p = nextchar(p);
|
||||
if (st < beg && p) return 0; // word <= condition
|
||||
if (st < beg && p)
|
||||
return 0; // word <= condition
|
||||
break;
|
||||
}
|
||||
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
||||
case '.':
|
||||
if (!pos)
|
||||
{
|
||||
// dots are not metacharacters in groups: [.]
|
||||
p = nextchar(p);
|
||||
// skip the next character
|
||||
for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
||||
@ -513,6 +547,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
default: {
|
||||
if (*st == *p) {
|
||||
p = nextchar(p);
|
||||
@ -533,7 +568,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||
if (neg) return 0;
|
||||
else if (i == numconds) return 1;
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && (p = nextchar(p)));
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
st--;
|
||||
}
|
||||
if (p && *p != ']') p = nextchar(p);
|
||||
@ -541,7 +576,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||
if (neg) return 0;
|
||||
else if (i == numconds) return 1;
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && (p = nextchar(p)));
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
// if (p && *p != ']') p = nextchar(p);
|
||||
st--;
|
||||
}
|
||||
@ -567,7 +602,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry pointer
|
||||
unsigned char * cp;
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
@ -592,7 +627,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
strcpy (tmpword, word);
|
||||
strncpy (tmpword, word, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
cp = (unsigned char *)(tmpword + tmpl);
|
||||
if (stripl) {
|
||||
strcpy ((char *)cp, strip);
|
||||
@ -645,7 +681,10 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||
} else if (wlst && (*ns < maxSug)) {
|
||||
int cwrd = 1;
|
||||
for (int k=0; k < *ns; k++)
|
||||
if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
|
||||
if (strcmp(tmpword, wlst[k]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
if (cwrd) {
|
||||
wlst[*ns] = mystrdup(tmpword);
|
||||
if (wlst[*ns] == NULL) {
|
||||
@ -668,7 +707,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
|
||||
int tmpl; // length of tmpword
|
||||
struct hentry * he; // hash entry pointer
|
||||
unsigned char * cp;
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
|
||||
@ -692,7 +731,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
strcpy (tmpword, word);
|
||||
strncpy(tmpword, word, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
cp = (unsigned char *)(tmpword + tmpl);
|
||||
if (stripl) {
|
||||
strcpy ((char *)cp, strip);
|
||||
@ -729,7 +769,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
|
||||
{
|
||||
int tmpl; // length of tmpword
|
||||
unsigned char * cp;
|
||||
char tmpword[MAXWORDUTF8LEN + 4];
|
||||
char tmpword[MAXTEMPWORDLEN];
|
||||
PfxEntry* ep = ppfx;
|
||||
char * st;
|
||||
|
||||
@ -757,7 +797,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
strcpy (tmpword, word);
|
||||
strncpy(tmpword, word, MAXTEMPWORDLEN-1);
|
||||
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||
cp = (unsigned char *)(tmpword + tmpl);
|
||||
if (stripl) {
|
||||
strcpy ((char *)cp, strip);
|
||||
|
@ -11,6 +11,10 @@
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
||||
{
|
||||
private:
|
||||
PfxEntry(const PfxEntry&);
|
||||
PfxEntry& operator = (const PfxEntry&);
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
|
||||
PfxEntry * next;
|
||||
@ -67,6 +71,10 @@ public:
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
||||
{
|
||||
private:
|
||||
SfxEntry(const SfxEntry&);
|
||||
SfxEntry& operator = (const SfxEntry&);
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
char * rappnd;
|
||||
|
||||
|
@ -48,6 +48,7 @@ AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, in
|
||||
compoundroot = FLAG_NULL; // compound word signing flag
|
||||
compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
|
||||
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
|
||||
compoundmoresuffixes = 0; // allow more suffixes within compound words
|
||||
checkcompounddup = 0; // forbid double words in compounds
|
||||
checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
|
||||
checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
|
||||
@ -111,7 +112,7 @@ AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, in
|
||||
}
|
||||
|
||||
if (parse_file(aff_data, aff_len)) {
|
||||
HUNSPELL_WARNING(stderr, "Failure loading aff file\n");
|
||||
HUNSPELL_WARNING(stderr, "Failure loading aff file \n");
|
||||
}
|
||||
|
||||
if (cpdmin == -1) cpdmin = MINCPDLEN;
|
||||
@ -253,6 +254,14 @@ AffixMgr::~AffixMgr()
|
||||
#endif
|
||||
}
|
||||
|
||||
void AffixMgr::finishFileMgr(FileMgr *afflst)
|
||||
{
|
||||
delete afflst;
|
||||
|
||||
// convert affix trees to sorted list
|
||||
process_pfx_tree_to_list();
|
||||
process_sfx_tree_to_list();
|
||||
}
|
||||
|
||||
// read in aff file and build up prefix and suffix entry objects
|
||||
int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
@ -279,7 +288,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
|
||||
// read in each line ignoring any that do not
|
||||
// start with a known line type indicator
|
||||
while ((line = afflst->getline())) {
|
||||
while ((line = afflst->getline()) != NULL) {
|
||||
mychomp(line);
|
||||
|
||||
/* remove byte order mark */
|
||||
@ -294,7 +303,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the keyboard string */
|
||||
if (strncmp(line,"KEY",3) == 0) {
|
||||
if (parse_string(line, &keystring, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -302,7 +311,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the try string */
|
||||
if (strncmp(line,"TRY",3) == 0) {
|
||||
if (parse_string(line, &trystring, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -310,7 +319,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the name of the character set used by the .dict and .aff */
|
||||
if (strncmp(line,"SET",3) == 0) {
|
||||
if (parse_string(line, &encoding, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(encoding, "UTF-8") == 0) {
|
||||
@ -330,7 +339,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by the controlled compound words */
|
||||
if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
|
||||
if (parse_flag(line, &compoundflag, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -339,12 +348,12 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
|
||||
if (complexprefixes) {
|
||||
if (parse_flag(line, &compoundend, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (parse_flag(line, &compoundbegin, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -353,7 +362,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by compound words */
|
||||
if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
|
||||
if (parse_flag(line, &compoundmiddle, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -361,12 +370,12 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
if (strncmp(line,"COMPOUNDEND",11) == 0) {
|
||||
if (complexprefixes) {
|
||||
if (parse_flag(line, &compoundbegin, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (parse_flag(line, &compoundend, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -375,7 +384,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the data used by compound_check() method */
|
||||
if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
|
||||
if (parse_num(line, &cpdwordmax, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -383,7 +392,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag sign compounds in dictionary */
|
||||
if (strncmp(line,"COMPOUNDROOT",12) == 0) {
|
||||
if (parse_flag(line, &compoundroot, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -391,7 +400,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by compound_check() method */
|
||||
if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
|
||||
if (parse_flag(line, &compoundpermitflag, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -399,11 +408,15 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by compound_check() method */
|
||||
if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
|
||||
if (parse_flag(line, &compoundforbidflag, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
|
||||
compoundmoresuffixes = 1;
|
||||
}
|
||||
|
||||
if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
|
||||
checkcompounddup = 1;
|
||||
}
|
||||
@ -426,14 +439,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
|
||||
if (strncmp(line,"NOSUGGEST",9) == 0) {
|
||||
if (parse_flag(line, &nosuggest, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
|
||||
if (parse_flag(line, &nongramsuggest, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -441,7 +454,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by forbidden words */
|
||||
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
|
||||
if (parse_flag(line, &forbiddenword, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -449,7 +462,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by forbidden words */
|
||||
if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
|
||||
if (parse_flag(line, &lemma_present, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -457,7 +470,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by circumfixes */
|
||||
if (strncmp(line,"CIRCUMFIX",9) == 0) {
|
||||
if (parse_flag(line, &circumfix, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -465,7 +478,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by fogemorphemes */
|
||||
if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
|
||||
if (parse_flag(line, &onlyincompound, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -473,7 +486,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by `needaffixs' */
|
||||
if (strncmp(line,"PSEUDOROOT",10) == 0) {
|
||||
if (parse_flag(line, &needaffix, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -481,7 +494,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by `needaffixs' */
|
||||
if (strncmp(line,"NEEDAFFIX",9) == 0) {
|
||||
if (parse_flag(line, &needaffix, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -489,7 +502,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the minimal length for words in compounds */
|
||||
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
|
||||
if (parse_num(line, &cpdmin, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
if (cpdmin < 1) cpdmin = 1;
|
||||
@ -498,7 +511,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the max. words and syllables in compounds */
|
||||
if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
|
||||
if (parse_cpdsyllable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -506,7 +519,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by compound_check() method */
|
||||
if (strncmp(line,"SYLLABLENUM",11) == 0) {
|
||||
if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -519,7 +532,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the extra word characters */
|
||||
if (strncmp(line,"WORDCHARS",9) == 0) {
|
||||
if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -527,7 +540,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the ignored characters (for example, Arabic optional diacretics charachters */
|
||||
if (strncmp(line,"IGNORE",6) == 0) {
|
||||
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -535,7 +548,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the typical fault correcting table */
|
||||
if (strncmp(line,"REP",3) == 0) {
|
||||
if (parse_reptable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -543,7 +556,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the input conversion table */
|
||||
if (strncmp(line,"ICONV",5) == 0) {
|
||||
if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -551,7 +564,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the input conversion table */
|
||||
if (strncmp(line,"OCONV",5) == 0) {
|
||||
if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -559,7 +572,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the phonetic translation table */
|
||||
if (strncmp(line,"PHONE",5) == 0) {
|
||||
if (parse_phonetable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -567,7 +580,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the checkcompoundpattern table */
|
||||
if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
|
||||
if (parse_checkcpdtable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -575,7 +588,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the defcompound table */
|
||||
if (strncmp(line,"COMPOUNDRULE",12) == 0) {
|
||||
if (parse_defcpdtable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -583,7 +596,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the related character map table */
|
||||
if (strncmp(line,"MAP",3) == 0) {
|
||||
if (parse_maptable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -591,7 +604,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the word breakpoints table */
|
||||
if (strncmp(line,"BREAK",5) == 0) {
|
||||
if (parse_breaktable(line, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -599,7 +612,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the language for language specific codes */
|
||||
if (strncmp(line,"LANG",4) == 0) {
|
||||
if (parse_string(line, &lang, afflst->getlinenum())) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
langnum = get_lang_num(lang);
|
||||
@ -612,7 +625,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
|
||||
if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
|
||||
if (parse_num(line, &maxngramsugs, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -622,14 +635,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
|
||||
if (strncmp(line,"MAXDIFF",7) == 0) {
|
||||
if (parse_num(line, &maxdiff, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(line,"MAXCPDSUGS",10) == 0) {
|
||||
if (parse_num(line, &maxcpdsugs, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -649,7 +662,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by forbidden words */
|
||||
if (strncmp(line,"KEEPCASE",8) == 0) {
|
||||
if (parse_flag(line, &keepcase, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -657,7 +670,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by `forceucase' */
|
||||
if (strncmp(line,"FORCEUCASE",10) == 0) {
|
||||
if (parse_flag(line, &forceucase, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -665,7 +678,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by `warn' */
|
||||
if (strncmp(line,"WARN",4) == 0) {
|
||||
if (parse_flag(line, &warn, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -677,7 +690,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
/* parse in the flag used by the affix generator */
|
||||
if (strncmp(line,"SUBSTANDARD",11) == 0) {
|
||||
if (parse_flag(line, &substandard, afflst)) {
|
||||
delete afflst;
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -696,19 +709,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len)
|
||||
dupflags_ini = 0;
|
||||
}
|
||||
if (parse_affix(line, ft, afflst, dupflags)) {
|
||||
delete afflst;
|
||||
process_pfx_tree_to_list();
|
||||
process_sfx_tree_to_list();
|
||||
finishFileMgr(afflst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
delete afflst;
|
||||
|
||||
// convert affix trees to sorted list
|
||||
process_pfx_tree_to_list();
|
||||
process_sfx_tree_to_list();
|
||||
finishFileMgr(afflst);
|
||||
// affix trees are sorted now
|
||||
|
||||
// now we can speed up performance greatly taking advantage of the
|
||||
// relationship between the affixes and the idea of "subsets".
|
||||
@ -1319,7 +1327,7 @@ int AffixMgr::cpdrep_check(const char * word, int wl)
|
||||
}
|
||||
|
||||
// forbid compoundings when there are special patterns at word bound
|
||||
int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
|
||||
int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/)
|
||||
{
|
||||
int len;
|
||||
for (int i = 0; i < numcheckcpd; i++) {
|
||||
@ -1332,7 +1340,7 @@ int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2,
|
||||
// zero pattern (0/flag) => unmodified stem (zero affixes allowed)
|
||||
(!*(checkcpdtable[i].pattern) || (
|
||||
(*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
|
||||
(*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
|
||||
(*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) &&
|
||||
strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
|
||||
return 1;
|
||||
}
|
||||
@ -1393,7 +1401,10 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry **
|
||||
for (i = 0; i < numdefcpd; i++) {
|
||||
for (j = 0; j < defcpdtable[i].len; j++) {
|
||||
if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
|
||||
TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
|
||||
TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) {
|
||||
ok = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ok == 0) {
|
||||
@ -1544,7 +1555,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||
int oldlen = 0;
|
||||
int checkedstriple = 0;
|
||||
int onlycpdrule;
|
||||
int affixed = 0;
|
||||
char affixed = 0;
|
||||
hentry ** oldwords = words;
|
||||
|
||||
int checked_prefix;
|
||||
@ -1626,8 +1637,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||
if (onlycpdrule) break;
|
||||
if (compoundflag &&
|
||||
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
||||
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
|
||||
if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
|
||||
sfx->getCont() &&
|
||||
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
||||
sfx->getContLen())) || (compoundend &&
|
||||
@ -1640,9 +1652,11 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||
if (rv ||
|
||||
(((wordnum == 0) && compoundbegin &&
|
||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound
|
||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
||||
((wordnum > 0) && compoundmiddle &&
|
||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound
|
||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
||||
) checked_prefix = 1;
|
||||
// else check forbiddenwords and needaffix
|
||||
@ -2045,7 +2059,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||
int cmax;
|
||||
|
||||
int onlycpdrule;
|
||||
int affixed = 0;
|
||||
char affixed = 0;
|
||||
hentry ** oldwords = words;
|
||||
|
||||
setcminmax(&cmin, &cmax, word, len);
|
||||
@ -2115,11 +2129,12 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||
}
|
||||
|
||||
if (!rv) {
|
||||
if (onlycpdrule) break;
|
||||
if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break;
|
||||
if (compoundflag &&
|
||||
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
||||
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
|
||||
if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
|
||||
sfx->getCont() &&
|
||||
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
||||
sfx->getContLen())) || (compoundend &&
|
||||
@ -2132,9 +2147,11 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||
if (rv ||
|
||||
(((wordnum == 0) && compoundbegin &&
|
||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound
|
||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
||||
((wordnum > 0) && compoundmiddle &&
|
||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound
|
||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
||||
) {
|
||||
// char * p = prefix_check_morph(st, i, 0, compound);
|
||||
@ -3554,7 +3571,7 @@ int AffixMgr::parse_reptable(char * line, FileMgr * af)
|
||||
/* now parse the numrep lines to read in the remainder of the table */
|
||||
char * nl;
|
||||
for (int j=0; j < numrep; j++) {
|
||||
if (!(nl = af->getline())) return 1;
|
||||
if ((nl = af->getline()) == NULL) return 1;
|
||||
mychomp(nl);
|
||||
tp = nl;
|
||||
i = 0;
|
||||
@ -3639,7 +3656,6 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c
|
||||
|
||||
/* now parse the num lines to read in the remainder of the table */
|
||||
char * nl;
|
||||
size_t keywordlen = strlen(keyword);
|
||||
for (int j=0; j < numrl; j++) {
|
||||
if (!(nl = af->getline())) return 1;
|
||||
mychomp(nl);
|
||||
@ -3652,7 +3668,7 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c
|
||||
if (*piece != '\0') {
|
||||
switch(i) {
|
||||
case 0: {
|
||||
if (strncmp(piece, keyword, keywordlen) != 0) {
|
||||
if (strncmp(piece, keyword, strlen(keyword)) != 0) {
|
||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
||||
delete *rl;
|
||||
*rl = NULL;
|
||||
@ -4259,7 +4275,7 @@ int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupf
|
||||
std::vector<affentry>::iterator start = affentries.begin();
|
||||
std::vector<affentry>::iterator end = affentries.end();
|
||||
for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
|
||||
if (!(nl = af->getline())) return 1;
|
||||
if ((nl = af->getline()) == NULL) return 1;
|
||||
mychomp(nl);
|
||||
tp = nl;
|
||||
i = 0;
|
||||
|
@ -41,6 +41,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||
FLAG compoundroot;
|
||||
FLAG compoundforbidflag;
|
||||
FLAG compoundpermitflag;
|
||||
int compoundmoresuffixes;
|
||||
int checkcompounddup;
|
||||
int checkcompoundrep;
|
||||
int checkcompoundcase;
|
||||
@ -243,6 +244,7 @@ private:
|
||||
int process_sfx_tree_to_list();
|
||||
int redundant_condition(char, char * strip, int stripl,
|
||||
const char * cond, int);
|
||||
void finishFileMgr(FileMgr *afflst);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -57,7 +57,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
||||
#define FLAG_NULL 0x00
|
||||
#define FREE_FLAG(a) a = 0
|
||||
|
||||
#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c)
|
||||
#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c))
|
||||
|
||||
struct affentry
|
||||
{
|
||||
|
@ -5,7 +5,11 @@
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
||||
{
|
||||
private:
|
||||
AffEntry(const AffEntry&);
|
||||
AffEntry& operator = (const AffEntry&);
|
||||
protected:
|
||||
AffEntry() {}
|
||||
char * appnd;
|
||||
char * strip;
|
||||
unsigned char appndl;
|
||||
|
@ -18,7 +18,7 @@
|
||||
#define PACKAGE_NAME "hunspell"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "hunspell 1.3.2"
|
||||
#define PACKAGE_STRING "hunspell 1.3.3"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "hunspell"
|
||||
@ -27,10 +27,10 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.3.2"
|
||||
#define PACKAGE_VERSION "1.3.3"
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "1.3.2"
|
||||
#define VERSION "1.3.3"
|
||||
|
||||
#endif /* !__config_h__ */
|
||||
|
||||
|
@ -17,6 +17,11 @@ struct unicode_info {
|
||||
unsigned short clower;
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENOFFICEORG
|
||||
# include <unicode/uchar.h>
|
||||
#else
|
||||
@ -46,6 +51,21 @@ struct unicode_info2 {
|
||||
static struct unicode_info2 * utf_tbl = NULL;
|
||||
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
|
||||
|
||||
FILE * myfopen(const char * path, const char * mode) {
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
|
||||
if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
|
||||
wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
|
||||
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
|
||||
FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
|
||||
free(buff);
|
||||
return f;
|
||||
}
|
||||
#endif
|
||||
return fopen(path, mode);
|
||||
}
|
||||
|
||||
/* only UTF-16 (BMP) implementation */
|
||||
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
|
||||
signed char * u8 = (signed char *)dest;
|
||||
@ -342,7 +362,10 @@ char * line_uniq(char * text, char breakchar) {
|
||||
for ( i = 1; i < linenum; i++ ) {
|
||||
int dup = 0;
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (strcmp(lines[i], lines[j]) == 0) dup = 1;
|
||||
if (strcmp(lines[i], lines[j]) == 0) {
|
||||
dup = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!dup) {
|
||||
if ((i > 1) || (*(lines[0]) != '\0')) {
|
||||
@ -5468,7 +5491,15 @@ struct cs_info * get_current_cs(const char * es) {
|
||||
// conversion tables static in this file, create them when needed
|
||||
// with help the mozilla backend.
|
||||
struct cs_info * get_current_cs(const char * es) {
|
||||
struct cs_info *ccs;
|
||||
struct cs_info *ccs = new cs_info[256];
|
||||
// Initialze the array with dummy data so that we wouldn't need
|
||||
// to return null in case of failures.
|
||||
for (int i = 0; i <= 0xff; ++i) {
|
||||
ccs[i].ccase = false;
|
||||
ccs[i].clower = i;
|
||||
ccs[i].cupper = i;
|
||||
}
|
||||
|
||||
|
||||
nsCOMPtr<nsIUnicodeEncoder> encoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> decoder;
|
||||
@ -5476,21 +5507,19 @@ struct cs_info * get_current_cs(const char * es) {
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
return ccs;
|
||||
|
||||
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
return ccs;
|
||||
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
|
||||
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
return ccs;
|
||||
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
||||
|
||||
if (NS_FAILED(rv))
|
||||
return nsnull;
|
||||
|
||||
ccs = new cs_info[256];
|
||||
return ccs;
|
||||
|
||||
for (unsigned int i = 0; i <= 0xff; ++i) {
|
||||
PRBool success = PR_FALSE;
|
||||
@ -5653,7 +5682,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum)
|
||||
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||
return 0x0130;
|
||||
#ifdef OPENOFFICEORG
|
||||
return u_toupper(c);
|
||||
return static_cast<unsigned short>(u_toupper(c));
|
||||
#else
|
||||
#ifdef MOZILLA_CLIENT
|
||||
return ToUpperCase((PRUnichar) c);
|
||||
@ -5671,7 +5700,7 @@ unsigned short unicodetolower(unsigned short c, int langnum)
|
||||
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||
return 0x0131;
|
||||
#ifdef OPENOFFICEORG
|
||||
return u_tolower(c);
|
||||
return static_cast<unsigned short>(u_tolower(c));
|
||||
#else
|
||||
#ifdef MOZILLA_CLIENT
|
||||
return ToLowerCase((PRUnichar) c);
|
||||
|
@ -52,6 +52,9 @@
|
||||
#define FORBIDDENWORD 65510
|
||||
#define ONLYUPCASEFLAG 65511
|
||||
|
||||
// fopen or optional _wfopen to fix long pathname problem of WIN32
|
||||
LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
|
||||
|
||||
// convert UTF-16 characters to UTF-8
|
||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "dictmgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
|
||||
{
|
||||
@ -57,7 +58,7 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||
|
||||
// open the dictionary list file
|
||||
FILE * dictlst;
|
||||
dictlst = fopen(dictpath,"r");
|
||||
dictlst = myfopen(dictpath,"r");
|
||||
if (!dictlst) {
|
||||
return 1;
|
||||
}
|
||||
@ -100,7 +101,8 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||
case 3:
|
||||
free(pdict->region);
|
||||
pdict->region=NULL;
|
||||
case 2: //deliberate fallthrough
|
||||
/* FALLTHROUGH */
|
||||
case 2:
|
||||
free(pdict->lang);
|
||||
pdict->lang=NULL;
|
||||
default:
|
||||
|
@ -15,7 +15,10 @@ struct dictentry {
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
||||
{
|
||||
|
||||
private:
|
||||
DictMgr(const DictMgr&);
|
||||
DictMgr& operator = (const DictMgr&);
|
||||
private:
|
||||
int numdict;
|
||||
dictentry * pdentry;
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <limits>
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
@ -15,12 +16,19 @@
|
||||
// build a hash table from a munched word list
|
||||
|
||||
HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len)
|
||||
: tablesize(0)
|
||||
, tableptr(NULL)
|
||||
, userword(0)
|
||||
, flag_mode(FLAG_CHAR)
|
||||
, complexprefixes(0)
|
||||
, utf8(0)
|
||||
, forbiddenword(FORBIDDENWORD) // forbidden word signing flag
|
||||
, numaliasf(0)
|
||||
, aliasf(NULL)
|
||||
, aliasflen(0)
|
||||
, numaliasm(0)
|
||||
, aliasm(NULL)
|
||||
{
|
||||
tablesize = 0;
|
||||
tableptr = NULL;
|
||||
flag_mode = FLAG_CHAR;
|
||||
complexprefixes = 0;
|
||||
utf8 = 0;
|
||||
langnum = 0;
|
||||
lang = NULL;
|
||||
enc = NULL;
|
||||
@ -28,11 +36,6 @@ HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_dat
|
||||
ignorechars = NULL;
|
||||
ignorechars_utf16 = NULL;
|
||||
ignorechars_utf16_len = 0;
|
||||
numaliasf = 0;
|
||||
aliasf = NULL;
|
||||
numaliasm = 0;
|
||||
aliasm = NULL;
|
||||
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
|
||||
load_config(aff_data, aff_len);
|
||||
int ec = load_tables(dic_data, dic_len);
|
||||
if (ec) {
|
||||
@ -118,7 +121,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
||||
int al, const char * desc, bool onlyupcase)
|
||||
{
|
||||
bool upcasehomonym = false;
|
||||
int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
|
||||
int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
|
||||
// variable-length hash record with word and optional fields
|
||||
struct hentry* hp =
|
||||
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
|
||||
@ -212,18 +215,21 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
||||
}
|
||||
|
||||
int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||
unsigned short * flags, int al, char * dp, int captype)
|
||||
unsigned short * flags, int flagslen, char * dp, int captype)
|
||||
{
|
||||
if (flags == NULL)
|
||||
flagslen = 0;
|
||||
|
||||
// add inner capitalized forms to handle the following allcap forms:
|
||||
// Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
|
||||
// Allcaps with suffixes: CIA's -> CIA'S
|
||||
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
|
||||
((captype == ALLCAP) && (flags != NULL))) &&
|
||||
!((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
|
||||
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
|
||||
((captype == ALLCAP) && (flagslen != 0))) &&
|
||||
!((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
|
||||
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1));
|
||||
if (!flags2) return 1;
|
||||
if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
|
||||
flags2[al] = ONLYUPCASEFLAG;
|
||||
if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short));
|
||||
flags2[flagslen] = ONLYUPCASEFLAG;
|
||||
if (utf8) {
|
||||
char st[BUFSIZE];
|
||||
w_char w[BUFSIZE];
|
||||
@ -231,11 +237,11 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||
mkallsmall_utf(w, wlen, langnum);
|
||||
mkallcap_utf(w, 1, langnum);
|
||||
u16_u8(st, BUFSIZE, w, wlen);
|
||||
return add_word(st,wbl,wcl,flags2,al+1,dp, true);
|
||||
return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true);
|
||||
} else {
|
||||
mkallsmall(word, csconv);
|
||||
mkinitcap(word, csconv);
|
||||
return add_word(word,wbl,wcl,flags2,al+1,dp, true);
|
||||
return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -365,8 +371,8 @@ int HashMgr::load_tables(const char *dic_data, const size_t dic_len)
|
||||
if (dict == NULL) return 1;
|
||||
|
||||
// first read the first line of file to get hash table size */
|
||||
if (!(ts = dict->getline())) {
|
||||
HUNSPELL_WARNING(stderr, "error: empty dic file\n");
|
||||
if ((ts = dict->getline()) == NULL) {
|
||||
HUNSPELL_WARNING(stderr, "error: empty dic file \n");
|
||||
delete dict;
|
||||
return 2;
|
||||
}
|
||||
@ -379,30 +385,32 @@ int HashMgr::load_tables(const char *dic_data, const size_t dic_len)
|
||||
}
|
||||
|
||||
tablesize = atoi(ts);
|
||||
if (tablesize == 0) {
|
||||
|
||||
int nExtra = 5 + USERWORD;
|
||||
|
||||
if (tablesize <= 0 || (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) {
|
||||
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
|
||||
delete dict;
|
||||
return 4;
|
||||
}
|
||||
tablesize = tablesize + 5 + USERWORD;
|
||||
if ((tablesize %2) == 0) tablesize++;
|
||||
tablesize += nExtra;
|
||||
if ((tablesize % 2) == 0) tablesize++;
|
||||
|
||||
// allocate the hash table
|
||||
tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
|
||||
tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *));
|
||||
if (! tableptr) {
|
||||
delete dict;
|
||||
return 3;
|
||||
}
|
||||
for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
|
||||
|
||||
// loop through all words on much list and add to hash
|
||||
// table and create word and affix strings
|
||||
|
||||
while ((ts = dict->getline())) {
|
||||
while ((ts = dict->getline()) != NULL) {
|
||||
mychomp(ts);
|
||||
// split each line into word and morphological description
|
||||
dp = ts;
|
||||
while ((dp = strchr(dp, ':'))) {
|
||||
while ((dp = strchr(dp, ':')) != NULL) {
|
||||
if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
|
||||
for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
|
||||
if (dp < ts) { // missing word
|
||||
@ -611,14 +619,14 @@ int HashMgr::load_config(const char *aff_data, const size_t aff_len)
|
||||
// open the affix file
|
||||
FileMgr * afflst = new FileMgr(aff_data, aff_len);
|
||||
if (!afflst) {
|
||||
HUNSPELL_WARNING(stderr, "Error - could not open affix description file");
|
||||
HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// read in each line ignoring any that do not
|
||||
// start with a known line type indicator
|
||||
|
||||
while ((line = afflst->getline())) {
|
||||
while ((line = afflst->getline()) != NULL) {
|
||||
mychomp(line);
|
||||
|
||||
/* remove byte order mark */
|
||||
@ -758,7 +766,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af)
|
||||
/* now parse the numaliasf lines to read in the remainder of the table */
|
||||
char * nl;
|
||||
for (int j=0; j < numaliasf; j++) {
|
||||
if (!(nl = af->getline())) return 1;
|
||||
if ((nl = af->getline()) == NULL) return 1;
|
||||
mychomp(nl);
|
||||
tp = nl;
|
||||
i = 0;
|
||||
@ -865,7 +873,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af)
|
||||
/* now parse the numaliasm lines to read in the remainder of the table */
|
||||
char * nl = line;
|
||||
for (int j=0; j < numaliasm; j++) {
|
||||
if (!(nl = af->getline())) return 1;
|
||||
if ((nl = af->getline()) == NULL) return 1;
|
||||
mychomp(nl);
|
||||
tp = nl;
|
||||
i = 0;
|
||||
|
@ -11,6 +11,8 @@
|
||||
#endif
|
||||
#include "csutil.hxx"
|
||||
|
||||
#include <string>
|
||||
|
||||
Hunspell::Hunspell(const char *affix_data, const size_t aff_len, const char *dic_data, const size_t dic_len)
|
||||
{
|
||||
encoding = NULL;
|
||||
@ -316,6 +318,10 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||
char cw[MAXWORDUTF8LEN];
|
||||
char wspace[MAXWORDUTF8LEN];
|
||||
w_char unicw[MAXWORDLEN];
|
||||
|
||||
int info2 = 0;
|
||||
if (!info) info = &info2; else *info = 0;
|
||||
|
||||
// Hunspell supports XML input of the simplified API (see manual)
|
||||
if (strcmp(word, SPELL_XML) == 0) return 1;
|
||||
int nc = strlen(word);
|
||||
@ -334,7 +340,6 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||
if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
||||
else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
||||
|
||||
int info2 = 0;
|
||||
if (wl == 0 || maxdic == 0) return 1;
|
||||
if (root) *root = NULL;
|
||||
|
||||
@ -352,13 +357,14 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||
} else break;
|
||||
}
|
||||
if ((i == wl) && (nstate == NNUM)) return 1;
|
||||
if (!info) info = &info2; else *info = 0;
|
||||
|
||||
switch(captype) {
|
||||
case HUHCAP:
|
||||
/* FALLTHROUGH */
|
||||
case HUHINITCAP:
|
||||
*info += SPELL_ORIGCAP;
|
||||
case NOCAP: {
|
||||
/* FALLTHROUGH */
|
||||
case NOCAP:
|
||||
rv = checkword(cw, info, root);
|
||||
if ((abbv) && !(rv)) {
|
||||
memcpy(wspace,cw,wl);
|
||||
@ -367,7 +373,6 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||
rv = checkword(wspace, info, root);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ALLCAP: {
|
||||
*info += SPELL_ORIGCAP;
|
||||
rv = checkword(cw, info, root);
|
||||
@ -391,7 +396,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||
*apostrophe = '\0';
|
||||
wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
|
||||
*apostrophe = '\'';
|
||||
if (wl2 < nc) {
|
||||
if (wl2 >= 0 && wl2 < nc) {
|
||||
mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
|
||||
rv = checkword(cw, info, root);
|
||||
if (rv) break;
|
||||
@ -738,15 +743,24 @@ int Hunspell::suggest(char*** slst, const char * word)
|
||||
char * dot = strchr(cw, '.');
|
||||
if (dot && (dot > cw)) {
|
||||
int captype_;
|
||||
if (utf8) {
|
||||
if (utf8)
|
||||
{
|
||||
w_char w_[MAXWORDLEN];
|
||||
int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
|
||||
captype_ = get_captype_utf8(w_, wl_, langnum);
|
||||
} else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
|
||||
if (captype_ == INITCAP) {
|
||||
if (captype_ == INITCAP)
|
||||
{
|
||||
char * st = mystrdup(cw);
|
||||
if (st) st = (char *) realloc(st, wl + 2);
|
||||
if (st) {
|
||||
if (st)
|
||||
{
|
||||
char *newst = (char *) realloc(st, wl + 2);
|
||||
if (newst == NULL)
|
||||
free(st);
|
||||
st = newst;
|
||||
}
|
||||
if (st)
|
||||
{
|
||||
st[(dot - cw) + 1] = ' ';
|
||||
strcpy(st + (dot - cw) + 2, dot + 1);
|
||||
ns = insert_sug(slst, st, ns);
|
||||
@ -836,7 +850,7 @@ int Hunspell::suggest(char*** slst, const char * word)
|
||||
*pos = '\0';
|
||||
strcpy(w, (*slst)[j]);
|
||||
strcat(w, pos + 1);
|
||||
spell(w, &info, NULL);
|
||||
(void)spell(w, &info, NULL);
|
||||
if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
||||
*pos = ' ';
|
||||
} else *pos = '-';
|
||||
@ -1658,6 +1672,13 @@ int Hunspell::get_langnum() const
|
||||
return langnum;
|
||||
}
|
||||
|
||||
int Hunspell::input_conv(const char * word, char * dest)
|
||||
{
|
||||
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
||||
return (rl && rl->conv(word, dest));
|
||||
}
|
||||
|
||||
|
||||
// return the beginning of the element (attr == NULL) or the attribute
|
||||
const char * Hunspell::get_xml_pos(const char * s, const char * attr)
|
||||
{
|
||||
@ -1682,11 +1703,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
|
||||
int n = 0;
|
||||
char * p;
|
||||
if (!list) return 0;
|
||||
for (p = list; (p = strstr(p, tag)); p++) n++;
|
||||
for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
|
||||
if (n == 0) return 0;
|
||||
*slst = (char **) malloc(sizeof(char *) * n);
|
||||
if (!*slst) return 0;
|
||||
for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
|
||||
for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
|
||||
int l = strlen(p);
|
||||
(*slst)[n] = (char *) malloc(l + 1);
|
||||
if (!(*slst)[n]) return n;
|
||||
@ -1698,6 +1719,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
|
||||
return n;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void myrep(std::string& str, const std::string& search, const std::string& replace)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while ((pos = str.find(search, pos)) != std::string::npos)
|
||||
{
|
||||
str.replace(pos, search.length(), replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Hunspell::spellml(char*** slst, const char * word)
|
||||
{
|
||||
char *q, *q2;
|
||||
@ -1709,26 +1743,26 @@ int Hunspell::spellml(char*** slst, const char * word)
|
||||
q2 = strstr(q2, "<word");
|
||||
if (!q2) return 0; // bad XML input
|
||||
if (check_xml_par(q, "type=", "analyze")) {
|
||||
int n = 0, s = 0;
|
||||
int n = 0;
|
||||
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
|
||||
if (n == 0) return 0;
|
||||
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
|
||||
for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
|
||||
char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&
|
||||
if (!r) return 0;
|
||||
strcpy(r, "<code>");
|
||||
std::string r;
|
||||
r.append("<code>");
|
||||
for (int i = 0; i < n; i++) {
|
||||
int l = strlen(r);
|
||||
strcpy(r + l, "<a>");
|
||||
strcpy(r + l + 3, (*slst)[i]);
|
||||
mystrrep(r + l + 3, "\t", " ");
|
||||
mystrrep(r + l + 3, "<", "<");
|
||||
mystrrep(r + l + 3, "&", "&");
|
||||
strcat(r, "</a>");
|
||||
r.append("<a>");
|
||||
|
||||
std::string entry((*slst)[i]);
|
||||
free((*slst)[i]);
|
||||
myrep(entry, "\t", " ");
|
||||
myrep(entry, "&", "&");
|
||||
myrep(entry, "<", "<");
|
||||
r.append(entry);
|
||||
|
||||
r.append("</a>");
|
||||
}
|
||||
strcat(r, "</code>");
|
||||
(*slst)[0] = r;
|
||||
r.append("</code>");
|
||||
(*slst)[0] = mystrdup(r.c_str());
|
||||
return 1;
|
||||
} else if (check_xml_par(q, "type=", "stem")) {
|
||||
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
|
||||
@ -1741,9 +1775,9 @@ int Hunspell::spellml(char*** slst, const char * word)
|
||||
return generate(slst, cw, cw2);
|
||||
}
|
||||
} else {
|
||||
if ((q2 = strstr(q2 + 1, "<code"))) {
|
||||
if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
|
||||
char ** slst2;
|
||||
if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
|
||||
if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
|
||||
int n2 = generate(slst, cw, slst2, n);
|
||||
freelist(&slst2, n);
|
||||
return uniqlist(*slst, n2);
|
||||
|
@ -19,6 +19,10 @@
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||
{
|
||||
private:
|
||||
Hunspell(const Hunspell&);
|
||||
Hunspell& operator = (const Hunspell&);
|
||||
private:
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
int maxdic;
|
||||
@ -34,6 +38,11 @@ public:
|
||||
|
||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||
* input: The affix and dictionary data as bytes
|
||||
*
|
||||
* In WIN32 environment, use UTF-8 encoded paths started with the long path
|
||||
* prefix \\\\?\\ to handle system-independent character encoding and very
|
||||
* long path names (without the long path prefix Hunspell will use fopen()
|
||||
* with system-dependent character encoding instead of _wfopen()).
|
||||
*/
|
||||
|
||||
Hunspell(const char *affix_data, const size_t affix_len, const char *dic_data, const size_t dic_len);
|
||||
@ -128,6 +137,9 @@ public:
|
||||
|
||||
int get_langnum() const;
|
||||
|
||||
/* need for putdic */
|
||||
int input_conv(const char * word, char * dest);
|
||||
|
||||
/* experimental and deprecated functions */
|
||||
|
||||
#ifdef HUNSPELL_EXPERIMENTAL
|
||||
|
@ -9,7 +9,7 @@
|
||||
# else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||
# endif
|
||||
#elif BUILDING_LIBHUNSPELL && 1
|
||||
#elif defined(BUILDING_LIBHUNSPELL) && 1
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||
#else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
|
@ -9,7 +9,7 @@
|
||||
# else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||
# endif
|
||||
#elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@
|
||||
#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||
#else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
|
@ -87,7 +87,8 @@ int phonet (const char * inword, char * target,
|
||||
char word[MAXPHONETUTF8LEN + 1];
|
||||
if (len == -1) len = strlen(inword);
|
||||
if (len > MAXPHONETUTF8LEN) return 0;
|
||||
strcpy(word, inword);
|
||||
strncpy(word, inword, MAXPHONETUTF8LEN);
|
||||
word[MAXPHONETUTF8LEN] = '\0';
|
||||
|
||||
/** check word **/
|
||||
i = j = z = 0;
|
||||
|
@ -8,6 +8,9 @@
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
||||
{
|
||||
private:
|
||||
RepList(const RepList&);
|
||||
RepList& operator = (const RepList&);
|
||||
protected:
|
||||
replentry ** dat;
|
||||
int size;
|
||||
|
@ -107,7 +107,10 @@ int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int
|
||||
int cwrd = 1;
|
||||
if (ns == maxSug) return maxSug;
|
||||
for (int k=0; k < ns; k++) {
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
if (strcmp(candidate,wlst[k]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
@ -364,8 +367,12 @@ int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn,
|
||||
int cwrd = 1;
|
||||
*(candidate + cn) = '\0';
|
||||
int wl = strlen(candidate);
|
||||
for (int m=0; m < ns; m++)
|
||||
if (strcmp(candidate, wlst[m]) == 0) cwrd = 0;
|
||||
for (int m=0; m < ns; m++) {
|
||||
if (strcmp(candidate, wlst[m]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
||||
if (ns < maxSug) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
@ -678,7 +685,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest
|
||||
// error is missing a letter it needs
|
||||
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
|
||||
{
|
||||
char candidate[MAXSWUTF8L];
|
||||
char candidate[MAXSWUTF8L + 4];
|
||||
char * p;
|
||||
clock_t timelimit = clock();
|
||||
int timer = MINTIMER;
|
||||
@ -700,8 +707,8 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge
|
||||
// error is missing a letter it needs
|
||||
int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
|
||||
{
|
||||
w_char candidate_utf[MAXSWL];
|
||||
char candidate[MAXSWUTF8L];
|
||||
w_char candidate_utf[MAXSWL + 1];
|
||||
char candidate[MAXSWUTF8L + 4];
|
||||
w_char * p;
|
||||
clock_t timelimit = clock();
|
||||
int timer = MINTIMER;
|
||||
@ -761,8 +768,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
|
||||
((c1 == 3) && (c2 >= 2)))) *p = '-';
|
||||
|
||||
cwrd = 1;
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
for (int k=0; k < ns; k++) {
|
||||
if (strcmp(candidate,wlst[k]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ns < maxSug) {
|
||||
if (cwrd) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
@ -777,8 +788,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
|
||||
mystrlen(p + 1) > 1 &&
|
||||
mystrlen(candidate) - mystrlen(p) > 1) {
|
||||
*p = '-';
|
||||
for (int k=0; k < ns; k++)
|
||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||
for (int k=0; k < ns; k++) {
|
||||
if (strcmp(candidate,wlst[k]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ns < maxSug) {
|
||||
if (cwrd) {
|
||||
wlst[ns] = mystrdup(candidate);
|
||||
@ -1333,7 +1348,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md
|
||||
if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
|
||||
(guessorig[i] && strstr(guessorig[i], wlst[j])) ||
|
||||
// check forbidden words
|
||||
!checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0;
|
||||
!checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) {
|
||||
unique = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unique) {
|
||||
wlst[ns++] = guess[i];
|
||||
@ -1361,7 +1379,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md
|
||||
// don't suggest previous suggestions or a previous suggestion with prefixes or affixes
|
||||
if (strstr(rootsphon[i], wlst[j]) ||
|
||||
// check forbidden words
|
||||
!checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0;
|
||||
!checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) {
|
||||
unique = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unique) {
|
||||
wlst[ns++] = mystrdup(rootsphon[i]);
|
||||
@ -1855,6 +1876,10 @@ int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw
|
||||
w_char su2[MAXSWL];
|
||||
int l1 = u8_u16(su1, MAXSWL, s1);
|
||||
int l2 = u8_u16(su2, MAXSWL, s2);
|
||||
|
||||
if (l1 <= 0 || l2 <= 0)
|
||||
return 0;
|
||||
|
||||
// decapitalize dictionary word
|
||||
if (complexprefixes) {
|
||||
mkallsmall_utf(su2+l2-1, 1, langnum);
|
||||
|
@ -32,6 +32,10 @@ enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
||||
{
|
||||
private:
|
||||
SuggestMgr(const SuggestMgr&);
|
||||
SuggestMgr& operator = (const SuggestMgr&);
|
||||
private:
|
||||
char * ckey;
|
||||
int ckeyl;
|
||||
w_char * ckey_utf;
|
||||
|
Loading…
x
Reference in New Issue
Block a user