From 66b8f4dcc6342990750485a77ca59d7366217e35 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 22 Jul 2015 14:35:25 +0530 Subject: [PATCH] Update bundled copy of hunspell to 1.3.3 --- src/hunspell/Makefile.in | 31 +++++--- src/hunspell/affentry.cxx | 151 +++++++++++++++++++++++------------- src/hunspell/affentry.hxx | 8 ++ src/hunspell/affixmgr.cxx | 148 +++++++++++++++++++---------------- src/hunspell/affixmgr.hxx | 2 + src/hunspell/atypes.hxx | 2 +- src/hunspell/baseaffix.hxx | 4 + src/hunspell/config.h | 6 +- src/hunspell/csutil.cxx | 49 +++++++++--- src/hunspell/csutil.hxx | 3 + src/hunspell/dictmgr.cxx | 6 +- src/hunspell/dictmgr.hxx | 5 +- src/hunspell/hashmgr.cxx | 72 +++++++++-------- src/hunspell/hunspell.cxx | 98 +++++++++++++++-------- src/hunspell/hunspell.hxx | 12 +++ src/hunspell/hunvisapi.h | 2 +- src/hunspell/hunvisapi.h.in | 2 +- src/hunspell/phonet.cxx | 3 +- src/hunspell/replist.hxx | 3 + src/hunspell/suggestmgr.cxx | 49 +++++++++--- src/hunspell/suggestmgr.hxx | 4 + 21 files changed, 432 insertions(+), 228 deletions(-) diff --git a/src/hunspell/Makefile.in b/src/hunspell/Makefile.in index 23371ef8e5..86b70c2ca6 100644 --- a/src/hunspell/Makefile.in +++ b/src/hunspell/Makefile.in @@ -1,9 +1,9 @@ -# Makefile.in generated by automake 1.11.1 from Makefile.am. +# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, -# Inc. +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -86,6 +86,12 @@ am__nobase_list = $(am__nobase_strip_setup); \ am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } am__installdirs = "$(DESTDIR)$(libdir)" \ "$(DESTDIR)$(libhunspell_1_3_includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) @@ -374,7 +380,7 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done -libhunspell-1.3.la: $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_DEPENDENCIES) +libhunspell-1.3.la: $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_DEPENDENCIES) $(EXTRA_libhunspell_1_3_la_DEPENDENCIES) $(libhunspell_1_3_la_LINK) -rpath $(libdir) $(libhunspell_1_3_la_OBJECTS) $(libhunspell_1_3_la_LIBADD) $(LIBS) mostlyclean-compile: @@ -438,9 +444,7 @@ uninstall-libhunspell_1_3_includeHEADERS: @$(NORMAL_UNINSTALL) @list='$(libhunspell_1_3_include_HEADERS)'; test -n "$(libhunspell_1_3_includedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ - test -n "$$files" || exit 0; \ - echo " ( cd '$(DESTDIR)$(libhunspell_1_3_includedir)' && rm -f" $$files ")"; \ - cd "$(DESTDIR)$(libhunspell_1_3_includedir)" && rm -f $$files + dir='$(DESTDIR)$(libhunspell_1_3_includedir)'; $(am__uninstall_files_from_dir) ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ @@ -541,10 +545,15 @@ install-am: all-am installcheck: installcheck-am install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi mostlyclean-generic: clean-generic: diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx index fef0cca5f5..45c9ef58e2 100644 --- a/src/hunspell/affentry.cxx +++ b/src/hunspell/affentry.cxx @@ -9,13 +9,17 @@ #include "affentry.hxx" #include "csutil.hxx" +#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) + PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) + // register affix manager + : pmyMgr(pmgr) + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values - aflag = dp->aflag; // flag strip = dp->strip; // string to strip appnd = dp->appnd; // string to append @@ -28,9 +32,6 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - next = NULL; - nextne = NULL; - nexteq = NULL; morphcode = dp->morphcode; contclass = dp->contclass; contclasslen = dp->contclasslen; @@ -53,16 +54,17 @@ PfxEntry::~PfxEntry() // add prefix to this word assuming conditions hold char * PfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word) && (!stripl || (strncmp(word, strip, stripl) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add prefix */ char * pp = tword; if (appndl) { - strcpy(tword,appnd); + strncpy(tword, appnd, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; pp += appndl; } strcpy(pp, (word + stripl)); @@ -110,13 +112,15 @@ inline int PfxEntry::test_condition(const char * st) if (*st == '\0' && p) return 0; // word <= condition break; } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '.': + if (!pos) { // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++); if (*st == '\0' && p) return 0; // word <= condition break; } + /* FALLTHROUGH */ default: { if (*st == *p) { st++; @@ -133,11 +137,11 @@ inline int PfxEntry::test_condition(const char * st) } if (pos && st != pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { // group p = nextchar(p); @@ -153,7 +157,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -167,7 +171,10 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -214,7 +221,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -229,7 +236,10 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -261,7 +271,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, char in_compound, const FLAG needflag) { int tmpl; // length of tmpword - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -276,7 +286,10 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -308,7 +321,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; char result[MAXLNLEN]; char * st; @@ -327,7 +340,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -395,10 +411,15 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const } SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) + : pmyMgr(pmgr) // register affix manager + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) + , l_morph(NULL) + , r_morph(NULL) + , eq_morph(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values aflag = dp->aflag; // char flag strip = dp->strip; // string to strip @@ -413,7 +434,6 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - rappnd = myrevstrdup(appnd); morphcode = dp->morphcode; contclass = dp->contclass; @@ -438,15 +458,16 @@ SfxEntry::~SfxEntry() // add suffix to this word assuming conditions hold char * SfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; /* make sure all conditions match */ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word + len, word) && (!stripl || (strcmp(word + len - stripl, strip) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add suffix */ - strcpy(tword,word); + strncpy(tword, word, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; if (appndl) { strcpy(tword + len - stripl, appnd); } else { @@ -481,24 +502,37 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) int i = 1; while (1) { switch (*p) { - case '\0': return 1; - case '[': { p = nextchar(p); pos = st; break; } - case '^': { p = nextchar(p); neg = true; break; } - case ']': { if (!neg && !ingroup) return 0; - i++; - // skip the next character - if (!ingroup) { - for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); - st--; - } - pos = NULL; - neg = false; - ingroup = false; - p = nextchar(p); - if (st < beg && p) return 0; // word <= condition - break; - } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '\0': + return 1; + case '[': + p = nextchar(p); + pos = st; + break; + case '^': + p = nextchar(p); + neg = true; + break; + case ']': + if (!neg && !ingroup) + return 0; + i++; + // skip the next character + if (!ingroup) + { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); + st--; + } + pos = NULL; + neg = false; + ingroup = false; + p = nextchar(p); + if (st < beg && p) + return 0; // word <= condition + break; + case '.': + if (!pos) + { + // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); @@ -513,6 +547,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) } break; } + /* FALLTHROUGH */ default: { if (*st == *p) { p = nextchar(p); @@ -533,7 +568,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); st--; } if (p && *p != ']') p = nextchar(p); @@ -541,7 +576,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); // if (p && *p != ']') p = nextchar(p); st--; } @@ -567,7 +602,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; // if this suffix is being cross checked with a prefix @@ -592,7 +627,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy (tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -645,7 +681,10 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, } else if (wlst && (*ns < maxSug)) { int cwrd = 1; for (int k=0; k < *ns; k++) - if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0; + if (strcmp(tmpword, wlst[k]) == 0) { + cwrd = 0; + break; + } if (cwrd) { wlst[*ns] = mystrdup(tmpword); if (wlst[*ns] == NULL) { @@ -668,7 +707,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; @@ -692,7 +731,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -729,7 +769,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, { int tmpl; // length of tmpword unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; char * st; @@ -757,7 +797,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); diff --git a/src/hunspell/affentry.hxx b/src/hunspell/affentry.hxx index a032d61ed4..c67c09825c 100644 --- a/src/hunspell/affentry.hxx +++ b/src/hunspell/affentry.hxx @@ -11,6 +11,10 @@ class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry { +private: + PfxEntry(const PfxEntry&); + PfxEntry& operator = (const PfxEntry&); +private: AffixMgr* pmyMgr; PfxEntry * next; @@ -67,6 +71,10 @@ public: class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry { +private: + SfxEntry(const SfxEntry&); + SfxEntry& operator = (const SfxEntry&); +private: AffixMgr* pmyMgr; char * rappnd; diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx index 6e1003b18b..7aad320472 100644 --- a/src/hunspell/affixmgr.cxx +++ b/src/hunspell/affixmgr.cxx @@ -48,6 +48,7 @@ AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, in compoundroot = FLAG_NULL; // compound word signing flag compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + compoundmoresuffixes = 0; // allow more suffixes within compound words checkcompounddup = 0; // forbid double words in compounds checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds @@ -111,7 +112,7 @@ AffixMgr::AffixMgr(const char *aff_data, const size_t aff_len, HashMgr** ptr, in } if (parse_file(aff_data, aff_len)) { - HUNSPELL_WARNING(stderr, "Failure loading aff file\n"); + HUNSPELL_WARNING(stderr, "Failure loading aff file \n"); } if (cpdmin == -1) cpdmin = MINCPDLEN; @@ -253,6 +254,14 @@ AffixMgr::~AffixMgr() #endif } +void AffixMgr::finishFileMgr(FileMgr *afflst) +{ + delete afflst; + + // convert affix trees to sorted list + process_pfx_tree_to_list(); + process_sfx_tree_to_list(); +} // read in aff file and build up prefix and suffix entry objects int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) @@ -279,7 +288,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -294,7 +303,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the keyboard string */ if (strncmp(line,"KEY",3) == 0) { if (parse_string(line, &keystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -302,7 +311,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the try string */ if (strncmp(line,"TRY",3) == 0) { if (parse_string(line, &trystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -310,7 +319,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the name of the character set used by the .dict and .aff */ if (strncmp(line,"SET",3) == 0) { if (parse_string(line, &encoding, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } if (strcmp(encoding, "UTF-8") == 0) { @@ -330,7 +339,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDFLAG",12) == 0) { if (parse_flag(line, &compoundflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -339,12 +348,12 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) if (strncmp(line,"COMPOUNDBEGIN",13) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -353,7 +362,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by compound words */ if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) { if (parse_flag(line, &compoundmiddle, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -361,12 +370,12 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) if (strncmp(line,"COMPOUNDEND",11) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -375,7 +384,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the data used by compound_check() method */ if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) { if (parse_num(line, &cpdwordmax, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -383,7 +392,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag sign compounds in dictionary */ if (strncmp(line,"COMPOUNDROOT",12) == 0) { if (parse_flag(line, &compoundroot, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -391,7 +400,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) { if (parse_flag(line, &compoundpermitflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -399,11 +408,15 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) { if (parse_flag(line, &compoundforbidflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } + if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) { + compoundmoresuffixes = 1; + } + if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) { checkcompounddup = 1; } @@ -426,14 +439,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) if (strncmp(line,"NOSUGGEST",9) == 0) { if (parse_flag(line, &nosuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"NONGRAMSUGGEST",14) == 0) { if (parse_flag(line, &nongramsuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -441,7 +454,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by forbidden words */ if (strncmp(line,"FORBIDDENWORD",13) == 0) { if (parse_flag(line, &forbiddenword, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -449,7 +462,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by forbidden words */ if (strncmp(line,"LEMMA_PRESENT",13) == 0) { if (parse_flag(line, &lemma_present, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -457,7 +470,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by circumfixes */ if (strncmp(line,"CIRCUMFIX",9) == 0) { if (parse_flag(line, &circumfix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -465,7 +478,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by fogemorphemes */ if (strncmp(line,"ONLYINCOMPOUND",14) == 0) { if (parse_flag(line, &onlyincompound, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -473,7 +486,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"PSEUDOROOT",10) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -481,7 +494,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"NEEDAFFIX",9) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -489,7 +502,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the minimal length for words in compounds */ if (strncmp(line,"COMPOUNDMIN",11) == 0) { if (parse_num(line, &cpdmin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } if (cpdmin < 1) cpdmin = 1; @@ -498,7 +511,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the max. words and syllables in compounds */ if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) { if (parse_cpdsyllable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -506,7 +519,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by compound_check() method */ if (strncmp(line,"SYLLABLENUM",11) == 0) { if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -519,7 +532,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the extra word characters */ if (strncmp(line,"WORDCHARS",9) == 0) { if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -527,7 +540,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the ignored characters (for example, Arabic optional diacretics charachters */ if (strncmp(line,"IGNORE",6) == 0) { if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -535,7 +548,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the typical fault correcting table */ if (strncmp(line,"REP",3) == 0) { if (parse_reptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -543,7 +556,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the input conversion table */ if (strncmp(line,"ICONV",5) == 0) { if (parse_convtable(line, afflst, &iconvtable, "ICONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -551,7 +564,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the input conversion table */ if (strncmp(line,"OCONV",5) == 0) { if (parse_convtable(line, afflst, &oconvtable, "OCONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -559,7 +572,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the phonetic translation table */ if (strncmp(line,"PHONE",5) == 0) { if (parse_phonetable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -567,7 +580,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the checkcompoundpattern table */ if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) { if (parse_checkcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -575,7 +588,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the defcompound table */ if (strncmp(line,"COMPOUNDRULE",12) == 0) { if (parse_defcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -583,7 +596,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the related character map table */ if (strncmp(line,"MAP",3) == 0) { if (parse_maptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -591,7 +604,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the word breakpoints table */ if (strncmp(line,"BREAK",5) == 0) { if (parse_breaktable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -599,7 +612,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the language for language specific codes */ if (strncmp(line,"LANG",4) == 0) { if (parse_string(line, &lang, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } langnum = get_lang_num(lang); @@ -612,7 +625,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) if (strncmp(line,"MAXNGRAMSUGS",12) == 0) { if (parse_num(line, &maxngramsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -622,14 +635,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) if (strncmp(line,"MAXDIFF",7) == 0) { if (parse_num(line, &maxdiff, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"MAXCPDSUGS",10) == 0) { if (parse_num(line, &maxcpdsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -649,7 +662,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by forbidden words */ if (strncmp(line,"KEEPCASE",8) == 0) { if (parse_flag(line, &keepcase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -657,7 +670,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by `forceucase' */ if (strncmp(line,"FORCEUCASE",10) == 0) { if (parse_flag(line, &forceucase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -665,7 +678,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by `warn' */ if (strncmp(line,"WARN",4) == 0) { if (parse_flag(line, &warn, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -677,7 +690,7 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) /* parse in the flag used by the affix generator */ if (strncmp(line,"SUBSTANDARD",11) == 0) { if (parse_flag(line, &substandard, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -696,19 +709,14 @@ int AffixMgr::parse_file(const char *aff_data, const size_t aff_len) dupflags_ini = 0; } if (parse_affix(line, ft, afflst, dupflags)) { - delete afflst; - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); return 1; } } - } - delete afflst; - // convert affix trees to sorted list - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); + // affix trees are sorted now // now we can speed up performance greatly taking advantage of the // relationship between the affixes and the idea of "subsets". @@ -1319,7 +1327,7 @@ int AffixMgr::cpdrep_check(const char * word, int wl) } // forbid compoundings when there are special patterns at word bound -int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed) +int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/) { int len; for (int i = 0; i < numcheckcpd; i++) { @@ -1332,7 +1340,7 @@ int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, // zero pattern (0/flag) => unmodified stem (zero affixes allowed) (!*(checkcpdtable[i].pattern) || ( (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || - (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) && + (*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) && strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) { return 1; } @@ -1393,7 +1401,10 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** for (i = 0; i < numdefcpd; i++) { for (j = 0; j < defcpdtable[i].len; j++) { if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && - TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1; + TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) { + ok = 1; + break; + } } } if (ok == 0) { @@ -1544,7 +1555,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, int oldlen = 0; int checkedstriple = 0; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; int checked_prefix; @@ -1626,8 +1637,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (onlycpdrule) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -1640,9 +1652,11 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) checked_prefix = 1; // else check forbiddenwords and needaffix @@ -2045,7 +2059,7 @@ int AffixMgr::compound_check_morph(const char * word, int len, int cmax; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; setcminmax(&cmin, &cmax, word, len); @@ -2115,11 +2129,12 @@ int AffixMgr::compound_check_morph(const char * word, int len, } if (!rv) { - if (onlycpdrule) break; + if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -2132,9 +2147,11 @@ int AffixMgr::compound_check_morph(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) { // char * p = prefix_check_morph(st, i, 0, compound); @@ -3554,7 +3571,7 @@ int AffixMgr::parse_reptable(char * line, FileMgr * af) /* now parse the numrep lines to read in the remainder of the table */ char * nl; for (int j=0; j < numrep; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -3639,7 +3656,6 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c /* now parse the num lines to read in the remainder of the table */ char * nl; - size_t keywordlen = strlen(keyword); for (int j=0; j < numrl; j++) { if (!(nl = af->getline())) return 1; mychomp(nl); @@ -3652,7 +3668,7 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c if (*piece != '\0') { switch(i) { case 0: { - if (strncmp(piece, keyword, keywordlen) != 0) { + if (strncmp(piece, keyword, strlen(keyword)) != 0) { HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); delete *rl; *rl = NULL; @@ -4259,7 +4275,7 @@ int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupf std::vector::iterator start = affentries.begin(); std::vector::iterator end = affentries.end(); for (std::vector::iterator entry = start; entry != end; ++entry) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx index 0f20a024fd..8676ad2c8a 100644 --- a/src/hunspell/affixmgr.hxx +++ b/src/hunspell/affixmgr.hxx @@ -41,6 +41,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr FLAG compoundroot; FLAG compoundforbidflag; FLAG compoundpermitflag; + int compoundmoresuffixes; int checkcompounddup; int checkcompoundrep; int checkcompoundcase; @@ -243,6 +244,7 @@ private: int process_sfx_tree_to_list(); int redundant_condition(char, char * strip, int stripl, const char * cond, int); + void finishFileMgr(FileMgr *afflst); }; #endif diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx index df27c4d1cb..61c59d5ff9 100644 --- a/src/hunspell/atypes.hxx +++ b/src/hunspell/atypes.hxx @@ -57,7 +57,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {} #define FLAG_NULL 0x00 #define FREE_FLAG(a) a = 0 -#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) +#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c)) struct affentry { diff --git a/src/hunspell/baseaffix.hxx b/src/hunspell/baseaffix.hxx index ed64f3d84f..f417acaa44 100644 --- a/src/hunspell/baseaffix.hxx +++ b/src/hunspell/baseaffix.hxx @@ -5,7 +5,11 @@ class LIBHUNSPELL_DLL_EXPORTED AffEntry { +private: + AffEntry(const AffEntry&); + AffEntry& operator = (const AffEntry&); protected: + AffEntry() {} char * appnd; char * strip; unsigned char appndl; diff --git a/src/hunspell/config.h b/src/hunspell/config.h index 94a398a245..f38ae5835a 100644 --- a/src/hunspell/config.h +++ b/src/hunspell/config.h @@ -18,7 +18,7 @@ #define PACKAGE_NAME "hunspell" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "hunspell 1.3.2" +#define PACKAGE_STRING "hunspell 1.3.3" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "hunspell" @@ -27,10 +27,10 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.3.2" +#define PACKAGE_VERSION "1.3.3" /* Version number of package */ -#define VERSION "1.3.2" +#define VERSION "1.3.3" #endif /* !__config_h__ */ diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx index dd89c19090..f877f28155 100644 --- a/src/hunspell/csutil.cxx +++ b/src/hunspell/csutil.cxx @@ -17,6 +17,11 @@ struct unicode_info { unsigned short clower; }; +#ifdef _WIN32 +#include +#include +#endif + #ifdef OPENOFFICEORG # include #else @@ -46,6 +51,21 @@ struct unicode_info2 { static struct unicode_info2 * utf_tbl = NULL; static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances +FILE * myfopen(const char * path, const char * mode) { +#ifdef _WIN32 +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { + int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); + wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); + FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb"); + free(buff); + return f; + } +#endif + return fopen(path, mode); +} + /* only UTF-16 (BMP) implementation */ char * u16_u8(char * dest, int size, const w_char * src, int srclen) { signed char * u8 = (signed char *)dest; @@ -342,7 +362,10 @@ char * line_uniq(char * text, char breakchar) { for ( i = 1; i < linenum; i++ ) { int dup = 0; for (int j = 0; j < i; j++) { - if (strcmp(lines[i], lines[j]) == 0) dup = 1; + if (strcmp(lines[i], lines[j]) == 0) { + dup = 1; + break; + } } if (!dup) { if ((i > 1) || (*(lines[0]) != '\0')) { @@ -5468,7 +5491,15 @@ struct cs_info * get_current_cs(const char * es) { // conversion tables static in this file, create them when needed // with help the mozilla backend. struct cs_info * get_current_cs(const char * es) { - struct cs_info *ccs; + struct cs_info *ccs = new cs_info[256]; + // Initialze the array with dummy data so that we wouldn't need + // to return null in case of failures. + for (int i = 0; i <= 0xff; ++i) { + ccs[i].ccase = false; + ccs[i].clower = i; + ccs[i].cupper = i; + } + nsCOMPtr encoder; nsCOMPtr decoder; @@ -5476,21 +5507,19 @@ struct cs_info * get_current_cs(const char * es) { nsresult rv; nsCOMPtr ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_FAILED(rv)) - return nsnull; + return ccs; rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?'); rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; decoder->SetInputErrorBehavior(decoder->kOnError_Signal); if (NS_FAILED(rv)) - return nsnull; - - ccs = new cs_info[256]; + return ccs; for (unsigned int i = 0; i <= 0xff; ++i) { PRBool success = PR_FALSE; @@ -5653,7 +5682,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0130; #ifdef OPENOFFICEORG - return u_toupper(c); + return static_cast(u_toupper(c)); #else #ifdef MOZILLA_CLIENT return ToUpperCase((PRUnichar) c); @@ -5671,7 +5700,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0131; #ifdef OPENOFFICEORG - return u_tolower(c); + return static_cast(u_tolower(c)); #else #ifdef MOZILLA_CLIENT return ToLowerCase((PRUnichar) c); diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx index 7bd0b919be..e034b53fd2 100644 --- a/src/hunspell/csutil.hxx +++ b/src/hunspell/csutil.hxx @@ -52,6 +52,9 @@ #define FORBIDDENWORD 65510 #define ONLYUPCASEFLAG 65511 +// fopen or optional _wfopen to fix long pathname problem of WIN32 +LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode); + // convert UTF-16 characters to UTF-8 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); diff --git a/src/hunspell/dictmgr.cxx b/src/hunspell/dictmgr.cxx index b4a15b1a5b..a94429e593 100644 --- a/src/hunspell/dictmgr.cxx +++ b/src/hunspell/dictmgr.cxx @@ -5,6 +5,7 @@ #include #include "dictmgr.hxx" +#include "csutil.hxx" DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0) { @@ -57,7 +58,7 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) // open the dictionary list file FILE * dictlst; - dictlst = fopen(dictpath,"r"); + dictlst = myfopen(dictpath,"r"); if (!dictlst) { return 1; } @@ -100,7 +101,8 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) case 3: free(pdict->region); pdict->region=NULL; - case 2: //deliberate fallthrough + /* FALLTHROUGH */ + case 2: free(pdict->lang); pdict->lang=NULL; default: diff --git a/src/hunspell/dictmgr.hxx b/src/hunspell/dictmgr.hxx index bb197f84fb..692ed964c3 100644 --- a/src/hunspell/dictmgr.hxx +++ b/src/hunspell/dictmgr.hxx @@ -15,7 +15,10 @@ struct dictentry { class LIBHUNSPELL_DLL_EXPORTED DictMgr { - +private: + DictMgr(const DictMgr&); + DictMgr& operator = (const DictMgr&); +private: int numdict; dictentry * pdentry; diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx index 956bb4b5c0..ae5a0a3782 100644 --- a/src/hunspell/hashmgr.cxx +++ b/src/hunspell/hashmgr.cxx @@ -5,6 +5,7 @@ #include #include #include +#include #include "hashmgr.hxx" #include "csutil.hxx" @@ -15,12 +16,19 @@ // build a hash table from a munched word list HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_data, const size_t dic_len) + : tablesize(0) + , tableptr(NULL) + , userword(0) + , flag_mode(FLAG_CHAR) + , complexprefixes(0) + , utf8(0) + , forbiddenword(FORBIDDENWORD) // forbidden word signing flag + , numaliasf(0) + , aliasf(NULL) + , aliasflen(0) + , numaliasm(0) + , aliasm(NULL) { - tablesize = 0; - tableptr = NULL; - flag_mode = FLAG_CHAR; - complexprefixes = 0; - utf8 = 0; langnum = 0; lang = NULL; enc = NULL; @@ -28,11 +36,6 @@ HashMgr::HashMgr(const char *aff_data, const size_t aff_len, const char *dic_dat ignorechars = NULL; ignorechars_utf16 = NULL; ignorechars_utf16_len = 0; - numaliasf = 0; - aliasf = NULL; - numaliasm = 0; - aliasm = NULL; - forbiddenword = FORBIDDENWORD; // forbidden word signing flag load_config(aff_data, aff_len); int ec = load_tables(dic_data, dic_len); if (ec) { @@ -118,7 +121,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, int al, const char * desc, bool onlyupcase) { bool upcasehomonym = false; - int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; + int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); @@ -212,18 +215,21 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, } int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, - unsigned short * flags, int al, char * dp, int captype) + unsigned short * flags, int flagslen, char * dp, int captype) { + if (flags == NULL) + flagslen = 0; + // add inner capitalized forms to handle the following allcap forms: // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG // Allcaps with suffixes: CIA's -> CIA'S if (((captype == HUHCAP) || (captype == HUHINITCAP) || - ((captype == ALLCAP) && (flags != NULL))) && - !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { - unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1)); + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1)); if (!flags2) return 1; - if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); - flags2[al] = ONLYUPCASEFLAG; + if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; if (utf8) { char st[BUFSIZE]; w_char w[BUFSIZE]; @@ -231,11 +237,11 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, mkallsmall_utf(w, wlen, langnum); mkallcap_utf(w, 1, langnum); u16_u8(st, BUFSIZE, w, wlen); - return add_word(st,wbl,wcl,flags2,al+1,dp, true); + return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true); } else { mkallsmall(word, csconv); mkinitcap(word, csconv); - return add_word(word,wbl,wcl,flags2,al+1,dp, true); + return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true); } } return 0; @@ -365,8 +371,8 @@ int HashMgr::load_tables(const char *dic_data, const size_t dic_len) if (dict == NULL) return 1; // first read the first line of file to get hash table size */ - if (!(ts = dict->getline())) { - HUNSPELL_WARNING(stderr, "error: empty dic file\n"); + if ((ts = dict->getline()) == NULL) { + HUNSPELL_WARNING(stderr, "error: empty dic file \n"); delete dict; return 2; } @@ -379,30 +385,32 @@ int HashMgr::load_tables(const char *dic_data, const size_t dic_len) } tablesize = atoi(ts); - if (tablesize == 0) { + + int nExtra = 5 + USERWORD; + + if (tablesize <= 0 || (tablesize >= (std::numeric_limits::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) { HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); delete dict; return 4; } - tablesize = tablesize + 5 + USERWORD; - if ((tablesize %2) == 0) tablesize++; + tablesize += nExtra; + if ((tablesize % 2) == 0) tablesize++; // allocate the hash table - tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); + tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *)); if (! tableptr) { delete dict; return 3; } - for (int i=0; igetline())) { + while ((ts = dict->getline()) != NULL) { mychomp(ts); // split each line into word and morphological description dp = ts; - while ((dp = strchr(dp, ':'))) { + while ((dp = strchr(dp, ':')) != NULL) { if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); if (dp < ts) { // missing word @@ -611,14 +619,14 @@ int HashMgr::load_config(const char *aff_data, const size_t aff_len) // open the affix file FileMgr * afflst = new FileMgr(aff_data, aff_len); if (!afflst) { - HUNSPELL_WARNING(stderr, "Error - could not open affix description file"); + HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); return 1; } // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -758,7 +766,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) /* now parse the numaliasf lines to read in the remainder of the table */ char * nl; for (int j=0; j < numaliasf; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -865,7 +873,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) /* now parse the numaliasm lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < numaliasm; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx index 83878ff60b..7f0c0343aa 100644 --- a/src/hunspell/hunspell.cxx +++ b/src/hunspell/hunspell.cxx @@ -11,6 +11,8 @@ #endif #include "csutil.hxx" +#include + Hunspell::Hunspell(const char *affix_data, const size_t aff_len, const char *dic_data, const size_t dic_len) { encoding = NULL; @@ -316,6 +318,10 @@ int Hunspell::spell(const char * word, int * info, char ** root) char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; w_char unicw[MAXWORDLEN]; + + int info2 = 0; + if (!info) info = &info2; else *info = 0; + // Hunspell supports XML input of the simplified API (see manual) if (strcmp(word, SPELL_XML) == 0) return 1; int nc = strlen(word); @@ -334,7 +340,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - int info2 = 0; if (wl == 0 || maxdic == 0) return 1; if (root) *root = NULL; @@ -352,13 +357,14 @@ int Hunspell::spell(const char * word, int * info, char ** root) } else break; } if ((i == wl) && (nstate == NNUM)) return 1; - if (!info) info = &info2; else *info = 0; switch(captype) { case HUHCAP: + /* FALLTHROUGH */ case HUHINITCAP: *info += SPELL_ORIGCAP; - case NOCAP: { + /* FALLTHROUGH */ + case NOCAP: rv = checkword(cw, info, root); if ((abbv) && !(rv)) { memcpy(wspace,cw,wl); @@ -367,7 +373,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) rv = checkword(wspace, info, root); } break; - } case ALLCAP: { *info += SPELL_ORIGCAP; rv = checkword(cw, info, root); @@ -391,7 +396,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) *apostrophe = '\0'; wl2 = u8_u16(tmpword, MAXWORDLEN, cw); *apostrophe = '\''; - if (wl2 < nc) { + if (wl2 >= 0 && wl2 < nc) { mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); rv = checkword(cw, info, root); if (rv) break; @@ -738,19 +743,28 @@ int Hunspell::suggest(char*** slst, const char * word) char * dot = strchr(cw, '.'); if (dot && (dot > cw)) { int captype_; - if (utf8) { + if (utf8) + { w_char w_[MAXWORDLEN]; int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); captype_ = get_captype_utf8(w_, wl_, langnum); } else captype_ = get_captype(dot+1, strlen(dot+1), csconv); - if (captype_ == INITCAP) { + if (captype_ == INITCAP) + { char * st = mystrdup(cw); - if (st) st = (char *) realloc(st, wl + 2); - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); + if (st) + { + char *newst = (char *) realloc(st, wl + 2); + if (newst == NULL) + free(st); + st = newst; + } + if (st) + { + st[(dot - cw) + 1] = ' '; + strcpy(st + (dot - cw) + 2, dot + 1); + ns = insert_sug(slst, st, ns); + free(st); } } } @@ -836,7 +850,7 @@ int Hunspell::suggest(char*** slst, const char * word) *pos = '\0'; strcpy(w, (*slst)[j]); strcat(w, pos + 1); - spell(w, &info, NULL); + (void)spell(w, &info, NULL); if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { *pos = ' '; } else *pos = '-'; @@ -1658,6 +1672,13 @@ int Hunspell::get_langnum() const return langnum; } +int Hunspell::input_conv(const char * word, char * dest) +{ + RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + return (rl && rl->conv(word, dest)); +} + + // return the beginning of the element (attr == NULL) or the attribute const char * Hunspell::get_xml_pos(const char * s, const char * attr) { @@ -1682,11 +1703,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { int n = 0; char * p; if (!list) return 0; - for (p = list; (p = strstr(p, tag)); p++) n++; + for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++; if (n == 0) return 0; *slst = (char **) malloc(sizeof(char *) * n); if (!*slst) return 0; - for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) { + for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { int l = strlen(p); (*slst)[n] = (char *) malloc(l + 1); if (!(*slst)[n]) return n; @@ -1698,6 +1719,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { return n; } +namespace +{ + void myrep(std::string& str, const std::string& search, const std::string& replace) + { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) + { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + } +} + int Hunspell::spellml(char*** slst, const char * word) { char *q, *q2; @@ -1709,26 +1743,26 @@ int Hunspell::spellml(char*** slst, const char * word) q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw); if (n == 0) return 0; // convert the result to ana1ana2 format - for (int i = 0; i < n; i++) s+= strlen((*slst)[i]); - char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->& - if (!r) return 0; - strcpy(r, ""); + std::string r; + r.append(""); for (int i = 0; i < n; i++) { - int l = strlen(r); - strcpy(r + l, ""); - strcpy(r + l + 3, (*slst)[i]); - mystrrep(r + l + 3, "\t", " "); - mystrrep(r + l + 3, "<", "<"); - mystrrep(r + l + 3, "&", "&"); - strcat(r, ""); + r.append(""); + + std::string entry((*slst)[i]); free((*slst)[i]); + myrep(entry, "\t", " "); + myrep(entry, "&", "&"); + myrep(entry, "<", "<"); + r.append(entry); + + r.append(""); } - strcat(r, ""); - (*slst)[0] = r; + r.append(""); + (*slst)[0] = mystrdup(r.c_str()); return 1; } else if (check_xml_par(q, "type=", "stem")) { if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw); @@ -1741,9 +1775,9 @@ int Hunspell::spellml(char*** slst, const char * word) return generate(slst, cw, cw2); } } else { - if ((q2 = strstr(q2 + 1, "'), ""))) { + if ((n = get_xml_list(&slst2, strchr(q2, '>'), "")) != 0) { int n2 = generate(slst, cw, slst2, n); freelist(&slst2, n); return uniqlist(*slst, n2); diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx index 5fc88eaa8a..c2b4e4fd7f 100644 --- a/src/hunspell/hunspell.hxx +++ b/src/hunspell/hunspell.hxx @@ -19,6 +19,10 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell { +private: + Hunspell(const Hunspell&); + Hunspell& operator = (const Hunspell&); +private: AffixMgr* pAMgr; HashMgr* pHMgr[MAXDIC]; int maxdic; @@ -34,6 +38,11 @@ public: /* Hunspell(aff, dic) - constructor of Hunspell class * input: The affix and dictionary data as bytes + * + * In WIN32 environment, use UTF-8 encoded paths started with the long path + * prefix \\\\?\\ to handle system-independent character encoding and very + * long path names (without the long path prefix Hunspell will use fopen() + * with system-dependent character encoding instead of _wfopen()). */ Hunspell(const char *affix_data, const size_t affix_len, const char *dic_data, const size_t dic_len); @@ -127,6 +136,9 @@ public: const char * get_version(); int get_langnum() const; + + /* need for putdic */ + int input_conv(const char * word, char * dest); /* experimental and deprecated functions */ diff --git a/src/hunspell/hunvisapi.h b/src/hunspell/hunvisapi.h index 4712280ad2..503c20f664 100644 --- a/src/hunspell/hunvisapi.h +++ b/src/hunspell/hunvisapi.h @@ -9,7 +9,7 @@ # else # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) # endif -#elif BUILDING_LIBHUNSPELL && 1 +#elif defined(BUILDING_LIBHUNSPELL) && 1 # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) #else # define LIBHUNSPELL_DLL_EXPORTED diff --git a/src/hunspell/hunvisapi.h.in b/src/hunspell/hunvisapi.h.in index 9c7f1b7b13..abf025ae97 100644 --- a/src/hunspell/hunvisapi.h.in +++ b/src/hunspell/hunvisapi.h.in @@ -9,7 +9,7 @@ # else # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) # endif -#elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@ +#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@ # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) #else # define LIBHUNSPELL_DLL_EXPORTED diff --git a/src/hunspell/phonet.cxx b/src/hunspell/phonet.cxx index 144bd40d08..b33edeb023 100644 --- a/src/hunspell/phonet.cxx +++ b/src/hunspell/phonet.cxx @@ -87,7 +87,8 @@ int phonet (const char * inword, char * target, char word[MAXPHONETUTF8LEN + 1]; if (len == -1) len = strlen(inword); if (len > MAXPHONETUTF8LEN) return 0; - strcpy(word, inword); + strncpy(word, inword, MAXPHONETUTF8LEN); + word[MAXPHONETUTF8LEN] = '\0'; /** check word **/ i = j = z = 0; diff --git a/src/hunspell/replist.hxx b/src/hunspell/replist.hxx index 9c37e29bb3..2dbc0160bd 100644 --- a/src/hunspell/replist.hxx +++ b/src/hunspell/replist.hxx @@ -8,6 +8,9 @@ class LIBHUNSPELL_DLL_EXPORTED RepList { +private: + RepList(const RepList&); + RepList& operator = (const RepList&); protected: replentry ** dat; int size; diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx index ebf9bc0a47..f0e336c974 100644 --- a/src/hunspell/suggestmgr.cxx +++ b/src/hunspell/suggestmgr.cxx @@ -107,7 +107,10 @@ int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int int cwrd = 1; if (ns == maxSug) return maxSug; for (int k=0; k < ns; k++) { - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { wlst[ns] = mystrdup(candidate); @@ -364,8 +367,12 @@ int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn, int cwrd = 1; *(candidate + cn) = '\0'; int wl = strlen(candidate); - for (int m=0; m < ns; m++) - if (strcmp(candidate, wlst[m]) == 0) cwrd = 0; + for (int m=0; m < ns; m++) { + if (strcmp(candidate, wlst[m]) == 0) { + cwrd = 0; + break; + } + } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { if (ns < maxSug) { wlst[ns] = mystrdup(candidate); @@ -678,7 +685,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest // error is missing a letter it needs int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest) { - char candidate[MAXSWUTF8L]; + char candidate[MAXSWUTF8L + 4]; char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -700,8 +707,8 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge // error is missing a letter it needs int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; + w_char candidate_utf[MAXSWL + 1]; + char candidate[MAXSWUTF8L + 4]; w_char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -761,8 +768,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest ((c1 == 3) && (c2 >= 2)))) *p = '-'; cwrd = 1; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -777,8 +788,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { *p = '-'; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -1333,7 +1348,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md if ((!guessorig[i] && strstr(guess[i], wlst[j])) || (guessorig[i] && strstr(guessorig[i], wlst[j])) || // check forbidden words - !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0; + !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = guess[i]; @@ -1361,7 +1379,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md // don't suggest previous suggestions or a previous suggestion with prefixes or affixes if (strstr(rootsphon[i], wlst[j]) || // check forbidden words - !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0; + !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = mystrdup(rootsphon[i]); @@ -1855,6 +1876,10 @@ int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw w_char su2[MAXSWL]; int l1 = u8_u16(su1, MAXSWL, s1); int l2 = u8_u16(su2, MAXSWL, s2); + + if (l1 <= 0 || l2 <= 0) + return 0; + // decapitalize dictionary word if (complexprefixes) { mkallsmall_utf(su2+l2-1, 1, langnum); diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx index 5f043fdfd2..8456b5b3e2 100644 --- a/src/hunspell/suggestmgr.hxx +++ b/src/hunspell/suggestmgr.hxx @@ -32,6 +32,10 @@ enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; class LIBHUNSPELL_DLL_EXPORTED SuggestMgr { +private: + SuggestMgr(const SuggestMgr&); + SuggestMgr& operator = (const SuggestMgr&); +private: char * ckey; int ckeyl; w_char * ckey_utf;