correct.cpp Source File

tdespell2

/* enchant
 * Copyright (C) 2003 Dom Lachowicz
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 * In addition, as a special exception, Dom Lachowicz
 * gives permission to link the code of this program with
 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
 * spell checker backend) and distribute linked combinations including
 * the two.  You must obey the GNU Lesser General Public License in all
 * respects for all of the code used other than said providers.  If you modify
 * this file, you may extend this exception to your version of the
 * file, but you are not obligated to do so.  If you do not wish to
 * do so, delete this exception statement from your version.
 */
 
/*
 * correct.c - Routines to manage the higher-level aspects of spell-checking
 *
 * This code originally resided in ispell.c, but was moved here to keep
 * file sizes smaller.
 *
 * Copyright (c), 1983, by Pace Willisson
 *
 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 * 4. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgment:
 *      This product includes software developed by Geoff Kuenning and
 *      other unpaid contributors.
 * 5. The name of Geoff Kuenning may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 
/*
 * $Log$
 * Revision 1.1  2004/01/31 16:44:12  zrusin
 * ISpell plugin.
 *
 * Revision 1.4  2003/08/14 17:51:26  dom
 * update license - exception clause should be Lesser GPL
 *
 * Revision 1.3  2003/07/28 20:40:25  dom
 * fix up the license clause, further win32-registry proof some directory getting functions
 *
 * Revision 1.2  2003/07/16 22:52:35  dom
 * LGPL + exception license
 *
 * Revision 1.1  2003/07/15 01:15:04  dom
 * ispell enchant backend
 *
 * Revision 1.2  2003/01/29 05:50:11  hippietrail
 *
 * Fixed my mess in EncodingManager.
 * Changed many C casts to C++ casts.
 *
 * Revision 1.1  2003/01/24 05:52:31  hippietrail
 *
 * Refactored ispell code. Old ispell global variables had been put into
 * an allocated structure, a pointer to which was passed to many functions.
 * I have now made all such functions and variables private members of the
 * ISpellChecker class. It was C OO, now it's C++ OO.
 *
 * I've fixed the makefiles and tested compilation but am unable to test
 * operation. Please back out my changes if they cause problems which
 * are not obvious or easy to fix.
 *
 * Revision 1.7  2002/09/19 05:31:15  hippietrail
 *
 * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
 * K&R function declarations removed, converted to Doxygen style comments
 * where possible.  No code has been changed (I hope).  Compiles for me but
 * unable to test.
 *
 * Revision 1.6  2002/09/17 03:03:28  hippietrail
 *
 * After seeking permission on the developer list I've reformatted all the
 * spelling source which seemed to have parts which used 2, 3, 4, and 8
 * spaces for tabs.  It should all look good with our standard 4-space
 * tabs now.
 * I've concentrated just on indentation in the actual code.  More prettying
 * could be done.
 * * NO code changes were made *
 *
 * Revision 1.5  2002/09/13 17:20:12  mpritchett
 * Fix more warnings for Linux build
 *
 * Revision 1.4  2002/03/06 08:27:16  fjfranklin
 * o Only activate compound handling when the hash file says so (Per Larsson)
 *
 * Revision 1.3  2001/05/14 09:52:50  hub
 * Removed newMain.c from GNUmakefile.am
 *
 * C++ comments are not C comment. Changed to C comments
 *
 * Revision 1.2  2001/05/12 16:05:42  thomasf
 * Big pseudo changes to ispell to make it pass around a structure rather
 * than rely on all sorts of gloabals willy nilly here and there.  Also
 * fixed our spelling class to work with accepting suggestions once more.
 * This code is dirty, gross and ugly (not to mention still not supporting
 * multiple hash sized just yet) but it works on my machine and will no
 * doubt break other machines.
 *
 * Revision 1.1  2001/04/15 16:01:24  tomas_f
 * moving to spell/xp
 *
 * Revision 1.2  1999/10/05 16:17:28  paul
 * Fixed build, and other tidyness.
 * Spell dialog enabled by default, with keyboard binding of F7.
 *
 * Revision 1.1  1999/09/29 23:33:32  justin
 * Updates to the underlying ispell-based code to support suggested corrections.
 *
 * Revision 1.59  1995/08/05  23:19:43  geoff
 * Fix a bug that caused offsets for long lines to be confused if the
 * line started with a quoting uparrow.
 *
 * Revision 1.58  1994/11/02  06:56:00  geoff
 * Remove the anyword feature, which I've decided is a bad idea.
 *
 * Revision 1.57  1994/10/26  05:12:39  geoff
 * Try boundary characters when inserting or substituting letters, except
 * (naturally) at word boundaries.
 *
 * Revision 1.56  1994/10/25  05:46:30  geoff
 * Fix an assignment inside a conditional that could generate spurious
 * warnings (as well as being bad style).  Add support for the FF_ANYWORD
 * option.
 *
 * Revision 1.55  1994/09/16  04:48:24  geoff
 * Don't pass newlines from the input to various other routines, and
 * don't assume that those routines leave the input unchanged.
 *
 * Revision 1.54  1994/09/01  06:06:41  geoff
 * Change erasechar/killchar to uerasechar/ukillchar to avoid
 * shared-library problems on HP systems.
 *
 * Revision 1.53  1994/08/31  05:58:38  geoff
 * Add code to handle extremely long lines in -a mode without splitting
 * words or reporting incorrect offsets.
 *
 * Revision 1.52  1994/05/25  04:29:24  geoff
 * Fix a bug that caused line widths to be calculated incorrectly when
 * displaying lines containing tabs.  Fix a couple of places where
 * characters were sign-extended incorrectly, which could cause 8-bit
 * characters to be displayed wrong.
 *
 * Revision 1.51  1994/05/17  06:44:05  geoff
 * Add support for controlled compound formation and the COMPOUNDONLY
 * option to affix flags.
 *
 * Revision 1.50  1994/04/27  05:20:14  geoff
 * Allow compound words to be formed from more than two components
 *
 * Revision 1.49  1994/04/27  01:50:31  geoff
 * Add support to correctly capitalize words generated as a result of a
 * missing-space suggestion.
 *
 * Revision 1.48  1994/04/03  23:23:02  geoff
 * Clean up the code in missingspace() to be a bit simpler and more
 * efficient.
 *
 * Revision 1.47  1994/03/15  06:24:23  geoff
 * Fix the +/-/~ commands to be independent.  Allow the + command to
 * receive a suffix which is a deformatter type (currently hardwired to
 * be either tex or nroff/troff).
 *
 * Revision 1.46  1994/02/21  00:20:03  geoff
 * Fix some bugs that could cause bad displays in the interaction between
 * TeX parsing and string characters.  Show_char now will not overrun
 * the inverse-video display area by accident.
 *
 * Revision 1.45  1994/02/14  00:34:51  geoff
 * Fix correct to accept length parameters for ctok and itok, so that it
 * can pass them to the to/from ichar routines.
 *
 * Revision 1.44  1994/01/25  07:11:22  geoff
 * Get rid of all old RCS log lines in preparation for the 3.1 release.
 *
 */
 
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "ispell_checker.h"
#include "msgs.h"
 
/*
extern void upcase P ((ichar_t * string));
extern void lowcase P ((ichar_t * string));
extern ichar_t * strtosichar P ((char * in, int canonical));
 
int compoundflag = COMPOUND_CONTROLLED;
*/
 
/*
 * \param a
 * \param b
 * \param canonical NZ for canonical string chars
 *
 * \return
 */
int
ISpellChecker::casecmp (char *a, char *b, int canonical)
{
    ichar_t *   ap;
    ichar_t *   bp;
    ichar_t     inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
    ichar_t     intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
 
    strtoichar (inta, a, sizeof inta, canonical);
    strtoichar (intb, b, sizeof intb, canonical);
    for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++)
    {
        if (*ap != *bp)
        {
            if (*bp == '\0')
                return m_hashheader.sortorder[*ap];
            else if (mylower (*ap))
            {
                if (mylower (*bp)  ||  mytoupper (*ap) != *bp)
                    return static_cast<int>(m_hashheader.sortorder[*ap])
                      - static_cast<int>(m_hashheader.sortorder[*bp]);
            }
            else
            {
                if (myupper (*bp)  ||  mytolower (*ap) != *bp)
                    return static_cast<int>(m_hashheader.sortorder[*ap])
                      - static_cast<int>(m_hashheader.sortorder[*bp]);
            }
        }
    }
    if (*bp != '\0')
        return -static_cast<int>(m_hashheader.sortorder[*bp]);
    for (ap = inta, bp = intb;  *ap;  ap++, bp++)
    {
        if (*ap != *bp)
        {
            return static_cast<int>(m_hashheader.sortorder[*ap])
              - static_cast<int>(m_hashheader.sortorder[*bp]);
        }
    }
    return 0;
}
 
/*
 * \param word
 */
void
ISpellChecker::makepossibilities (ichar_t *word)
{
    int i;
 
    for (i = 0; i < MAXPOSSIBLE; i++)
    m_possibilities[i][0] = 0;
    m_pcount = 0;
    m_maxposslen = 0;
    m_easypossibilities = 0;
 
#ifndef NO_CAPITALIZATION_SUPPORT
    wrongcapital (word);
#endif
 
/* 
 * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
 * page 363, the correct order for this is:
 * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
 * thus, it was exactly backwards in the old version. -- PWP
 */
 
    if (m_pcount < MAXPOSSIBLE)
        missingletter (word);       /* omission */
    if (m_pcount < MAXPOSSIBLE)
        transposedletter (word);    /* transposition */
    if (m_pcount < MAXPOSSIBLE)
        extraletter (word);     /* insertion */
    if (m_pcount < MAXPOSSIBLE)
        wrongletter (word);     /* substitution */
 
    if ((m_hashheader.compoundflag != COMPOUND_ANYTIME)  &&
          m_pcount < MAXPOSSIBLE)
        missingspace (word);    /* two words */
 
}
 
/*
 * \param word
 *
 * \return
 */
int
ISpellChecker::insert (ichar_t *word)
{
    int i;
    char *  realword;
 
    realword = ichartosstr (word, 0);
    for (i = 0; i < m_pcount; i++)
    {
        if (strcmp (m_possibilities[i], realword) == 0)
            return (0);
    }
 
    strcpy (m_possibilities[m_pcount++], realword);
    i = strlen (realword);
    if (i > m_maxposslen)
        m_maxposslen = i;
    if (m_pcount >= MAXPOSSIBLE)
        return (-1);
    else
        return (0);
}
 
#ifndef NO_CAPITALIZATION_SUPPORT
/*
 * \param word
 */
void
ISpellChecker::wrongcapital (ichar_t *word)
{
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
 
    /*
    ** When the third parameter to "good" is nonzero, it ignores
    ** case.  If the word matches this way, "ins_cap" will recapitalize
    ** it correctly.
    */
    if (good (word, 0, 1, 0, 0))
    {
        icharcpy (newword, word);
        upcase (newword);
        ins_cap (newword, word);
    }
}
#endif
 
/*
 * \param word
 */
void
ISpellChecker::wrongletter (ichar_t *word)
{
    int i;
    int j;
    int n;
    ichar_t     savechar;
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
 
    n = icharlen (word);
    icharcpy (newword, word);
#ifndef NO_CAPITALIZATION_SUPPORT
    upcase (newword);
#endif
 
    for (i = 0; i < n; i++)
    {
        savechar = newword[i];
        for (j=0; j < m_Trynum; ++j)
        {
            if (m_Try[j] == savechar)
                continue;
            else if (isboundarych (m_Try[j])  &&  (i == 0  ||  i == n - 1))
                continue;
            newword[i] = m_Try[j];
            if (good (newword, 0, 1, 0, 0))
            {
                if (ins_cap (newword, word) < 0)
                    return;
            }
        }
        newword[i] = savechar;
    }
}
 
/*
 * \param word
 */
void
ISpellChecker::extraletter (ichar_t *word)
{
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
    ichar_t *   p;
    ichar_t *   r;
 
    if (icharlen (word) < 2)
        return;
 
    icharcpy (newword, word + 1);
    for (p = word, r = newword;  *p != 0;  )
    {
        if (good (newword, 0, 1, 0, 0))
        {
            if (ins_cap (newword, word) < 0)
                return;
        }
        *r++ = *p++;
    }
}
 
/*
 * \param word
 */
void
ISpellChecker::missingletter (ichar_t *word)
{
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
    ichar_t *   p;
    ichar_t *   r;
    int i;
 
    icharcpy (newword + 1, word);
    for (p = word, r = newword;  *p != 0;  )
    {
        for (i = 0;  i < m_Trynum;  i++)
        {
            if (isboundarych (m_Try[i])  &&  r == newword)
                continue;
            *r = m_Try[i];
            if (good (newword, 0, 1, 0, 0))
            {
                if (ins_cap (newword, word) < 0)
                    return;
            }
        }
        *r++ = *p++;
    }
    for (i = 0;  i < m_Trynum;  i++)
    {
        if (isboundarych (m_Try[i]))
            continue;
        *r = m_Try[i];
        if (good (newword, 0, 1, 0, 0))
        {
            if (ins_cap (newword, word) < 0)
                return;
        }
    }
}
 
/*
 * \param word
 */
void ISpellChecker::missingspace (ichar_t *word)
{
    ichar_t     firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
    int         firstno;    /* Index into first */
    ichar_t *       firstp;     /* Ptr into current firsthalf word */
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
    int         nfirsthalf; /* No. words saved in 1st half */
    int         nsecondhalf;    /* No. words saved in 2nd half */
    ichar_t *   p;
    ichar_t     secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
    int         secondno;   /* Index into second */
 
    /*
    ** We don't do words of length less than 3;  this keeps us from
    ** splitting all two-letter words into two single letters.  We
    ** also don't do maximum-length words, since adding the space
    ** would exceed the size of the "possibilities" array.
    */
    nfirsthalf = icharlen (word);
    if (nfirsthalf < 3  ||  nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
        return;
    icharcpy (newword + 1, word);
    for (p = newword + 1;  p[1] != '\0';  p++)
    {
        p[-1] = *p;
        *p = '\0';
        if (good (newword, 0, 1, 0, 0))
        {
            /*
             * Save_cap must be called before good() is called on the
             * second half, because it uses state left around by
             * good().  This is unfortunate because it wastes a bit of
             * time, but I don't think it's a significant performance
             * problem.
             */
            nfirsthalf = save_cap (newword, word, firsthalf);
            if (good (p + 1, 0, 1, 0, 0))
            {
                nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
                for (firstno = 0;  firstno < nfirsthalf;  firstno++)
                {
                    firstp = &firsthalf[firstno][p - newword];
                    for (secondno = 0;  secondno < nsecondhalf;  secondno++)
                    {
                        *firstp = ' ';
                        icharcpy (firstp + 1, secondhalf[secondno]);
                        if (insert (firsthalf[firstno]) < 0)
                            return;
                        *firstp = '-';
                        if (insert (firsthalf[firstno]) < 0)
                            return;
                    }
                }
            }
        }
    }
}
 
/*
 * \param word
 * \param pfxopts Options to apply to prefixes
 */
int
ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
{
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
    ichar_t *   p;
    ichar_t savech;
    long        secondcap;  /* Capitalization of 2nd half */
 
    /*
    ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
    */
    if (m_hashheader.compoundflag == COMPOUND_NEVER)
        return 0;
    /*
    ** Test for a possible compound word (for languages like German that
    ** form lots of compounds).
    **
    ** This is similar to missingspace, except we quit on the first hit,
    ** and we won't allow either member of the compound to be a single
    ** letter.
    **
    ** We don't do words of length less than 2 * compoundmin, since
    ** both halves must at least compoundmin letters.
    */
    if (icharlen (word) < 2 * m_hashheader.compoundmin)
        return 0;
    icharcpy (newword, word);
    p = newword + m_hashheader.compoundmin;
    for (  ;  p[m_hashheader.compoundmin - 1] != 0;  p++)
    {
        savech = *p;
        *p = 0;
        if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
        {
            *p = savech;
            if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
              ||  compoundgood (p, FF_COMPOUNDONLY))
            {
                secondcap = whatcap (p);
                switch (whatcap (newword))
                {
                case ANYCASE:
                case CAPITALIZED:
                case FOLLOWCASE:    /* Followcase can have l.c. suffix */
                    return secondcap == ANYCASE;
                case ALLCAPS:
                    return secondcap == ALLCAPS;
                }
            }
        }
        else
            *p = savech;
    }
    return 0;
}
 
/*
 * \param word
 */
void
ISpellChecker::transposedletter (ichar_t *word)
{
    ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
    ichar_t *   p;
    ichar_t temp;
 
    icharcpy (newword, word);
    for (p = newword;  p[1] != 0;  p++)
    {
        temp = *p;
        *p = p[1];
        p[1] = temp;
        if (good (newword, 0, 1, 0, 0))
        {
            if (ins_cap (newword, word) < 0)
                return;
        }
        temp = *p;
        *p = p[1];
        p[1] = temp;
    }
}
 
int
ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
{
    int         i;      /* Index into savearea */
    int         nsaved;     /* No. of words saved */
    ichar_t     savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
 
    nsaved = save_cap (word, pattern, savearea);
    for (i = 0;  i < nsaved;  i++)
    {
        if (insert (savearea[i]) < 0)
            return -1;
    }
    return 0;
}
 
int
ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern, 
                    ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
{
    int         hitno;      /* Index into hits array */
    int         nsaved;     /* Number of words saved */
    int         preadd;     /* No. chars added to front of root */
    int         prestrip;   /* No. chars stripped from front */
    int         sufadd;     /* No. chars added to back of root */
    int         sufstrip;   /* No. chars stripped from back */
 
    if (*word == 0)
        return 0;
 
    for (hitno = m_numhits, nsaved = 0;  --hitno >= 0  &&  nsaved < MAX_CAPS;  )
    {
        if (m_hits[hitno].prefix)
        {
            prestrip = m_hits[hitno].prefix->stripl;
            preadd = m_hits[hitno].prefix->affl;
        }
        else
            prestrip = preadd = 0;
        if (m_hits[hitno].suffix)
        {
            sufstrip = m_hits[hitno].suffix->stripl;
            sufadd = m_hits[hitno].suffix->affl;
        }
        else
            sufadd = sufstrip = 0;
        save_root_cap (word, pattern, prestrip, preadd,
            sufstrip, sufadd,
            m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
            savearea, &nsaved);
    }
    return nsaved;
}
 
/*
 * \param word
 * \param pattern
 * \param prestrip
 * \param preadd
 * \param sufstrip
 * \param sufadd
 * \param firstdent
 * \param pfxent
 * \param sufent
 *
 * \return
 */
int
ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern, 
                 int prestrip, int preadd, int sufstrip, int sufadd,
                 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
{
    int         i;      /* Index into savearea */
    ichar_t     savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
    int         nsaved;     /* Number of words saved */
 
    nsaved = 0;
    save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
      firstdent, pfxent, sufent, savearea, &nsaved);
    for (i = 0;  i < nsaved;  i++)
    {
        if (insert (savearea[i]) < 0)
            return -1;
    }
    return 0;
}
 
/* ARGSUSED */
void
ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, 
                          int prestrip, int preadd, int sufstrip, int sufadd,
                          struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, 
                          ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], 
                          int * nsaved)
{
#ifndef NO_CAPITALIZATION_SUPPORT
    struct dent * dent;
#endif /* NO_CAPITALIZATION_SUPPORT */
    int         firstisupper;
    ichar_t     newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
#ifndef NO_CAPITALIZATION_SUPPORT
    ichar_t *   p;
    int         len;
    int         i;
    int         limit;
#endif /* NO_CAPITALIZATION_SUPPORT */
 
    if (*nsaved >= MAX_CAPS)
        return;
    icharcpy (newword, word);
    firstisupper = myupper (pattern[0]);
#ifdef NO_CAPITALIZATION_SUPPORT
    /*
    ** Apply the old, simple-minded capitalization rules.
    */
    if (firstisupper)
    {
        if (myupper (pattern[1]))
            upcase (newword);
        else
        {
            lowcase (newword);
            newword[0] = mytoupper (newword[0]);
        }
    }
    else
        lowcase (newword);
    icharcpy (savearea[*nsaved], newword);
    (*nsaved)++;
    return;
#else /* NO_CAPITALIZATION_SUPPORT */
#define flagsareok(dent)    \
    ((pfxent == NULL \
    ||  TSTMASKBIT (dent->mask, pfxent->flagbit)) \
      &&  (sufent == NULL \
    ||  TSTMASKBIT (dent->mask, sufent->flagbit)))
 
    dent = firstdent;
    if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
    {
        upcase (newword);   /* Uppercase required */
        icharcpy (savearea[*nsaved], newword);
        (*nsaved)++;
        return;
    }
    for (p = pattern;  *p;  p++)
    {
        if (mylower (*p))
            break;
    }
    if (*p == 0)
    {
        upcase (newword);   /* Pattern was all caps */
        icharcpy (savearea[*nsaved], newword);
        (*nsaved)++;
        return;
    }
    for (p = pattern + 1;  *p;  p++)
    {
        if (myupper (*p))
            break;
    }
    if (*p == 0)
    {
        /*
        ** The pattern was all-lower or capitalized.  If that's
        ** legal, insert only that version.
        */
        if (firstisupper)
        {
            if (captype (dent->flagfield) == CAPITALIZED
              ||  captype (dent->flagfield) == ANYCASE)
            {
                lowcase (newword);
                newword[0] = mytoupper (newword[0]);
                icharcpy (savearea[*nsaved], newword);
                (*nsaved)++;
                return;
            }
        }
        else
        {
            if (captype (dent->flagfield) == ANYCASE)
            {
                lowcase (newword);
                icharcpy (savearea[*nsaved], newword);
                (*nsaved)++;
                return;
            }
        }
        while (dent->flagfield & MOREVARIANTS)
        {
            dent = dent->next;
            if (captype (dent->flagfield) == FOLLOWCASE
              ||  !flagsareok (dent))
                continue;
            if (firstisupper)
            {
                if (captype (dent->flagfield) == CAPITALIZED)
                {
                    lowcase (newword);
                    newword[0] = mytoupper (newword[0]);
                    icharcpy (savearea[*nsaved], newword);
                    (*nsaved)++;
                    return;
                }
            }
            else
            {
                if (captype (dent->flagfield) == ANYCASE)
                {
                    lowcase (newword);
                    icharcpy (savearea[*nsaved], newword);
                    (*nsaved)++;
                    return;
                }
            }
        }
    }
    /*
    ** Either the sample had complex capitalization, or the simple
    ** capitalizations (all-lower or capitalized) are illegal.
    ** Insert all legal capitalizations, including those that are
    ** all-lower or capitalized.  If the prototype is capitalized,
    ** capitalized all-lower samples.  Watch out for affixes.
    */
    dent = firstdent;
    p = strtosichar (dent->word, 1);
    len = icharlen (p);
    if (dent->flagfield & MOREVARIANTS)
        dent = dent->next;  /* Skip place-holder entry */
    for (  ;  ;  )
    {
        if (flagsareok (dent))
        {
            if (captype (dent->flagfield) != FOLLOWCASE)
            {
                lowcase (newword);
                if (firstisupper  ||  captype (dent->flagfield) == CAPITALIZED)
                    newword[0] = mytoupper (newword[0]);
                icharcpy (savearea[*nsaved], newword);
                (*nsaved)++;
                if (*nsaved >= MAX_CAPS)
                    return;
            }
            else
            {
                /* Followcase is the tough one. */
                p = strtosichar (dent->word, 1);
                memmove (
                  reinterpret_cast<char *>(newword + preadd),
                  reinterpret_cast<char *>(p + prestrip),
                  (len - prestrip - sufstrip) * sizeof (ichar_t));
                if (myupper (p[prestrip]))
                {
                    for (i = 0;  i < preadd;  i++)
                        newword[i] = mytoupper (newword[i]);
                }
                else
                {
                    for (i = 0;  i < preadd;  i++)
                        newword[i] = mytolower (newword[i]);
                }
                limit = len + preadd + sufadd - prestrip - sufstrip;
                i = len + preadd - prestrip - sufstrip;
                p += len - sufstrip - 1;
                if (myupper (*p))
                {
                    for (p = newword + i;  i < limit;  i++, p++)
                        *p = mytoupper (*p);
                }
                else
                {
                    for (p = newword + i;  i < limit;  i++, p++)
                        *p = mytolower (*p);
                }
                icharcpy (savearea[*nsaved], newword);
                (*nsaved)++;
                if (*nsaved >= MAX_CAPS)
                    return;
            }
        }
        if ((dent->flagfield & MOREVARIANTS) == 0)
            break;      /* End of the line */
        dent = dent->next;
    }
    return;
#endif /* NO_CAPITALIZATION_SUPPORT */
}