makedent.cpp Source File

tdespell2

/* enchant
 * Copyright (C) 2003 Dom Lachowicz
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 * In addition, as a special exception, Dom Lachowicz
 * gives permission to link the code of this program with
 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
 * spell checker backend) and distribute linked combinations including
 * the two.  You must obey the GNU Lesser General Public License in all
 * respects for all of the code used other than said providers.  If you modify
 * this file, you may extend this exception to your version of the
 * file, but you are not obligated to do so.  If you do not wish to
 * do so, delete this exception statement from your version.
 */
 
/*
 * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 * 4. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgment:
 *      This product includes software developed by Geoff Kuenning and
 *      other unpaid contributors.
 * 5. The name of Geoff Kuenning may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 
/*
 * $Log$
 * Revision 1.2  2004/02/01 04:46:46  zrusin
 * Both ispell and aspell plugins are not working properly. We can start switching.
 *
 * Revision 1.1  2004/01/31 16:44:12  zrusin
 * ISpell plugin.
 *
 * Revision 1.4  2003/08/14 17:51:28  dom
 * update license - exception clause should be Lesser GPL
 *
 * Revision 1.3  2003/07/28 20:40:27  dom
 * fix up the license clause, further win32-registry proof some directory getting functions
 *
 * Revision 1.2  2003/07/16 22:52:49  dom
 * LGPL + exception license
 *
 * Revision 1.1  2003/07/15 01:15:08  dom
 * ispell enchant backend
 *
 * Revision 1.3  2003/02/12 02:10:38  hippietrail
 *
 * C casts -> C++ casts
 * Improved const-correctness due to changing casts
 * Fixed some warnings
 *
 * Revision 1.2  2003/01/29 05:50:12  hippietrail
 *
 * Fixed my mess in EncodingManager.
 * Changed many C casts to C++ casts.
 *
 * Revision 1.1  2003/01/24 05:52:35  hippietrail
 *
 * Refactored ispell code. Old ispell global variables had been put into
 * an allocated structure, a pointer to which was passed to many functions.
 * I have now made all such functions and variables private members of the
 * ISpellChecker class. It was C OO, now it's C++ OO.
 *
 * I've fixed the makefiles and tested compilation but am unable to test
 * operation. Please back out my changes if they cause problems which
 * are not obvious or easy to fix.
 *
 * Revision 1.8  2003/01/06 18:48:40  dom
 * ispell cleanup, start of using new 'add' save features
 *
 * Revision 1.7  2003/01/04 19:09:04  dom
 * some tidying... bug pissing me off...
 *
 * Revision 1.6  2002/09/19 05:31:18  hippietrail
 *
 * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
 * K&R function declarations removed, converted to Doxygen style comments
 * where possible.  No code has been changed (I hope).  Compiles for me but
 * unable to test.
 *
 * Revision 1.5  2002/09/17 03:03:30  hippietrail
 *
 * After seeking permission on the developer list I've reformatted all the
 * spelling source which seemed to have parts which used 2, 3, 4, and 8
 * spaces for tabs.  It should all look good with our standard 4-space
 * tabs now.
 * I've concentrated just on indentation in the actual code.  More prettying
 * could be done.
 * * NO code changes were made *
 *
 * Revision 1.4  2002/09/13 17:20:13  mpritchett
 * Fix more warnings for Linux build
 *
 * Revision 1.3  2002/03/22 14:31:57  dom
 * fix mg's compile problem
 *
 * Revision 1.2  2001/05/12 16:05:42  thomasf
 * Big pseudo changes to ispell to make it pass around a structure rather
 * than rely on all sorts of gloabals willy nilly here and there.  Also
 * fixed our spelling class to work with accepting suggestions once more.
 * This code is dirty, gross and ugly (not to mention still not supporting
 * multiple hash sized just yet) but it works on my machine and will no
 * doubt break other machines.
 *
 * Revision 1.1  2001/04/15 16:01:24  tomas_f
 * moving to spell/xp
 *
 * Revision 1.6  1999/12/21 18:46:29  sterwill
 * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
 *
 * Revision 1.5  1999/10/20 03:19:35  paul
 * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary.  It ain't pretty, but at least we don't crash there any more.
 *
 * Revision 1.4  1999/04/13 17:12:51  jeff
 * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
 * Fixed crash on Win32 with the new code.
 *
 * Revision 1.3  1998/12/29 14:55:33  eric
 *
 * I've doctored the ispell code pretty extensively here.  It is now
 * warning-free on Win32.  It also *works* on Win32 now, since I
 * replaced all the I/O calls with ANSI standard ones.
 *
 * Revision 1.3  1998/12/29 14:55:33  eric
 *
 * I've doctored the ispell code pretty extensively here.  It is now
 * warning-free on Win32.  It also *works* on Win32 now, since I
 * replaced all the I/O calls with ANSI standard ones.
 *
 * Revision 1.2  1998/12/28 23:11:30  eric
 *
 * modified spell code and integration to build on Windows.
 * This is still a hack.
 *
 * Actually, it doesn't yet WORK on Windows.  It just builds.
 * SpellCheckInit is failing for some reason.
 *
 * Revision 1.1  1998/12/28 18:04:43  davet
 * Spell checker code stripped from ispell.  At this point, there are
 * two external routines...  the Init routine, and a check-a-word routine
 * which returns a boolean value, and takes a 16 bit char string.
 * The code resembles the ispell code as much as possible still.
 *
 * Revision 1.45  1994/12/27  23:08:52  geoff
 * Add code to makedent to reject words that contain non-word characters.
 * This helps protect people who use ISO 8-bit characters when ispell
 * isn't configured for that option.
 *
 * Revision 1.44  1994/10/25  05:46:20  geoff
 * Fix some incorrect declarations in the lint versions of some routines.
 *
 * Revision 1.43  1994/09/16  03:32:34  geoff
 * Issue an error message for bad affix flags
 *
 * Revision 1.42  1994/02/07  04:23:43  geoff
 * Correctly identify the deformatter when changing file types
 *
 * Revision 1.41  1994/01/25  07:11:55  geoff
 * Get rid of all old RCS log lines in preparation for the 3.1 release.
 *
 */
 
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
 
#include "ispell_checker.h"
#include "msgs.h"
 
int     makedent P ((char * lbuf, int lbuflen, struct dent * ent));
/*int       combinecaps P ((struct dent * hdr, struct dent * newent));
#ifndef NO_CAPITALIZATION_SUPPORT
static void forcevheader P ((struct dent * hdrp, struct dent * oldp,
          struct dent * newp));
#endif / * NO_CAPITALIZATION_SUPPORT * /
static int  combine_two_entries P ((struct dent * hdrp,
          struct dent * oldp, struct dent * newp));
static int  acoversb P ((struct dent * enta, struct dent * entb));
*/
/*static int    issubset P ((struct dent * ent1, struct dent * ent2));
static void combineaffixes P ((struct dent * ent1, struct dent * ent2));*/
 
void        toutent P ((FILE * outfile, struct dent * hent,
          int onlykeep));
/*static void   toutword P ((FILE * outfile, char * word,
          struct dent * cent));
static void flagout P ((FILE * outfile, int flag));
*/
#ifndef ICHAR_IS_CHAR
ichar_t *   icharcpy P ((ichar_t * out, ichar_t * in));
int     icharlen P ((ichar_t * str));
int     icharcmp P ((ichar_t * s1, ichar_t * s2));
int     icharncmp P ((ichar_t * s1, ichar_t * s2, int n));
#endif
 
/*static int    has_marker;*/
 
/*
 * Fill in a directory entry, including setting the capitalization flags, and
 * allocate and initialize memory for the d->word field.  Returns -1
 * if there was trouble.  The input word must be in canonical form.
int makedent (lbuf, lbuflen, d)
This function is not used by AbiWord.  I don't know if it'll be needed for 
other abi documents
 */
    
#ifndef NO_CAPITALIZATION_SUPPORT
long
ISpellChecker::whatcap (ichar_t *word)
{
    ichar_t *   p;
 
    for (p = word;  *p;  p++)
    {
        if (mylower (*p))
            break;
    }
    if (*p == '\0')
        return ALLCAPS;
    else
    {
        for (  ;  *p;  p++)
        {
            if (myupper (*p))
                break;
        }
        if (*p == '\0')
        {
            /*
            ** No uppercase letters follow the lowercase ones.
            ** If there is more than one uppercase letter, it's
            ** "followcase". If only the first one is capitalized,
            ** it's "capitalize".  If there are no capitals
            ** at all, it's ANYCASE.
            */
            if (myupper (word[0]))
            {
                for (p = word + 1;  *p != '\0';  p++)
                {
                    if (myupper (*p))
                        return FOLLOWCASE;
                }
                return CAPITALIZED;
            }
            else
                return ANYCASE;
        }
        else
            return FOLLOWCASE;  /* .../lower/upper */
    }
}
 
int ISpellChecker::addvheader ( struct dent *dp)
{
    struct dent *   tdent; /* Copy of entry */
 
    /*
    ** Add a second entry with the correct capitalization, and then make
    ** dp into a special dummy entry.
    */
    tdent = static_cast<struct dent *>(malloc(sizeof (struct dent)));
    if (tdent == NULL)
    {
        fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
        return -1;
    }
    *tdent = *dp;
    if (captype (tdent->flagfield) != FOLLOWCASE)
        tdent->word = NULL;
    else
    {
        /* Followcase words need a copy of the capitalization */
        tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1));
        if (tdent->word == NULL)
        {
            fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
            free (reinterpret_cast<char *>(tdent));
            return -1;
        }
        strcpy (tdent->word, dp->word);
    }
    chupcase (dp->word);
    dp->next = tdent;
    dp->flagfield &= ~CAPTYPEMASK;
    dp->flagfield |= (ALLCAPS | MOREVARIANTS);
    return 0;
}
#endif
 
/*
** Combine and resolve the entries describing two capitalizations of the same
** word.  This may require allocating yet more entries.
**
** Hdrp is a pointer into a hash table.  If the word covered by hdrp has
** variations, hdrp must point to the header.  Newp is a pointer to temporary
** storage, and space is malloc'ed if newp is to be kept.  The newp->word
** field must have been allocated with mymalloc, so that this routine may free
** the space if it keeps newp but not the word.
**
** Return value:  0 if the word was added, 1 if the word was combined
** with an existing entry, and -1 if trouble occurred (e.g., malloc).
** If 1 is returned, newp->word may have been be freed using myfree.
**
** Life is made much more difficult by the KEEP flag's possibilities.  We
** must ensure that a !KEEP word doesn't find its way into the personal
** dictionary as a result of this routine's actions.  However, a !KEEP
** word that has affixes must have come from the main dictionary, so it
** is acceptable to combine entries in that case (got that?).
**
** The net result of all this is a set of rules that is a bloody pain
** to figure out.  Basically, we want to choose one of the following actions:
**
**  (1) Add newp's affixes and KEEP flag to oldp, and discard newp.
**  (2) Add oldp's affixes and KEEP flag to newp, replace oldp with
**      newp, and discard newp.
#ifndef NO_CAPITALIZATION_SUPPORT
**  (3) Insert newp as a new entry in the variants list.  If there is
**      currently no variant header, this requires adding one.  Adding a
**      header splits into two sub-cases:
**
**      (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it
**      into the header.
**      (3b) Otherwise, add a new entry to serve as the header.
**      To ease list linking, this is done by copying oldp into
**      the new entry, and then performing (3a).
**
**      After newp has been added as a variant, its affixes and KEEP
**      flag are OR-ed into the variant header.
#endif
**
** So how to choose which?  The default is always case (3), which adds newp
** as a new entry in the variants list.  Cases (1) and (2) are symmetrical
** except for which entry is discarded.  We can use case (1) or (2) whenever
** one entry "covers" the other.  "Covering" is defined as follows:
**
**  (4) For entries with matching capitalization types, A covers B
**      if:
**
**      (4a) B's affix flags are a subset of A's, or the KEEP flags
**       match, and
**      (4b) either the KEEP flags match, or A's KEEP flag is set.
**      (Since A has more suffixes, combining B with it won't
**      cause any extra suffixes to be added to the dictionary.)
**      (4c) If the words are FOLLOWCASE, the capitalizations match
**      exactly.
**
#ifndef NO_CAPITALIZATION_SUPPORT
**  (5) For entries with mismatched capitalization types, A covers B
**      if (4a) and (4b) are true, and:
**
**      (5a) B is ALLCAPS, or
**      (5b) A is ANYCASE, and B is CAPITALIZED.
#endif
**
** For any "hdrp" without variants, oldp is the same as hdrp.  Otherwise,
** the above tests are applied using each variant in turn for oldp.
int combinecaps (hdrp, newp)
static void forcevheader (hdrp, oldp, newp)
static int combine_two_entries (hdrp, oldp, newp)
static int acoversb (enta, entb)
*/
 
/*
 * \param s
 */
void
ISpellChecker::upcase (ichar_t *s)
{
 
    while (*s)
    {
        *s = mytoupper (*s);
        s++;
    }
}
 
/*
 * \param s
 */
void
ISpellChecker::lowcase (ichar_t *s)
{
 
    while (*s)
    {
        *s = mytolower (*s);
        s++;
    }
}
 
void
ISpellChecker::chupcase (char *s)
{
    ichar_t *   is;
 
    is = strtosichar (s, 1);
    upcase (is);
    ichartostr (s, is, strlen (s) + 1, 1);
}
 
/*
** See if one affix field is a subset of another.  Returns NZ if ent1
** is a subset of ent2.  The KEEP flag is not taken into consideration.
static int issubset (ent1, ent2)
static void combineaffixes (ent1, ent2)
*/
 
/*
** Write out a dictionary entry, including capitalization variants.
** If onlykeep is true, only those variants with KEEP set will be
** written.
Removed -- not used by Abiword
void toutent_ (toutfile, hent, onlykeep)
static void toutword (toutfile, word, cent)
static void flagout (toutfile, flag)
*/
 
int
ISpellChecker::stringcharlen (char *bufp, int canonical)
{
#ifdef SLOWMULTIPLY
    static char *   sp[MAXSTRINGCHARS];
    static int      inited = 0;
#endif
    char *  bufcur;
    char *  stringcur;
    int stringno;
    int lowstringno;
    int highstringno;
    int         dupwanted;
 
#ifdef SLOWMULTIPLY
    if (!inited)
    {
        inited = 1;
        for (stringno = 0;  stringno < MAXSTRINGCHARS;  stringno++)
            sp[stringno] = &hashheader.stringchars[stringno][0];
    }
#endif
    lowstringno = 0;
    highstringno = m_hashheader.nstrchars - 1;
    dupwanted = canonical ? 0 : m_defdupchar;
    while (lowstringno <= highstringno)
    {
        stringno = (lowstringno + highstringno) >> 1;
#ifdef SLOWMULTIPLY
        stringcur = sp[stringno];
#else
        stringcur = &m_hashheader.stringchars[stringno][0];
#endif
        bufcur = bufp;
        while (*stringcur)
        {
#ifdef NO8BIT
            if (((*bufcur++ ^ *stringcur) & 0x7F) != 0)
#else
            if (*bufcur++ != *stringcur)
#endif
                break;
            /*
            ** We can't use autoincrement above because of the
            ** test below.
            */
            stringcur++;
        }
        if (*stringcur == '\0')
        {
            if (m_hashheader.dupnos[stringno] == dupwanted)
            {
                /* We have a match */
                m_laststringch = m_hashheader.stringdups[stringno];
#ifdef SLOWMULTIPLY
                return stringcur - sp[stringno];
#else
                return stringcur - &m_hashheader.stringchars[stringno][0];
#endif
            }
            else
                --stringcur;
        }
        /* No match - choose which side to search on */
#ifdef NO8BIT
        if ((*--bufcur & 0x7F) < (*stringcur & 0x7F))
            highstringno = stringno - 1;
        else if ((*bufcur & 0x7F) > (*stringcur & 0x7F))
            lowstringno = stringno + 1;
#else
        if (*--bufcur < *stringcur)
            highstringno = stringno - 1;
        else if (*bufcur > *stringcur)
            lowstringno = stringno + 1;
#endif
        else if (dupwanted < m_hashheader.dupnos[stringno])
            highstringno = stringno - 1;
        else
            lowstringno = stringno + 1;
    }
    m_laststringch = static_cast<unsigned int>(-1);
    return 0;           /* Not a string character */
}
 
/* MACROS CONVERTED TO FUNCTIONS
** These macros are similar to the ones above, but they take into account
** the possibility of string characters.  Note well that they take a POINTER,
** not a character.
**
** The "l_" versions set "len" to the length of the string character as a
** handy side effect.  (Note that the global "laststringch" is also set,
** and sometimes used, by these macros.)
**
** The "l1_" versions go one step further and guarantee that the "len"
** field is valid for *all* characters, being set to 1 even if the macro
** returns false.  This macro is a great example of how NOT to write
** readable C.
*/
#define isstringch(ptr, canon)  (isstringstart (*(ptr)) \
                  &&  stringcharlen ((ptr), (canon)) > 0)
/*
int isstringch(char *ptr, int canon) {
    return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0);
}
*/
 
#define l_isstringch(ptr, len, canon)   \
                (isstringstart (*(ptr)) \
                  &&  (len = stringcharlen ((ptr), (canon))) \
                    > 0)
/*
int l_isstringch(char *ptr, int len, int canon) {
    return (isstringstart (*(ptr)) &&  (len = stringcharlen ((ptr), (canon))) > 0);
}
*/
 
#define l1_isstringch(ptr, len, canon)  \
                (len = 1, \
                  isstringstart ((unsigned char)(*(ptr))) \
                    &&  ((len = \
                      stringcharlen ((ptr), (canon))) \
                    > 0 \
                      ? 1 : (len = 1, 0)))
/*
int l1_isstringch(char *ptr, int len, int canon) {
    return (len = 1, isstringstart ((unsigned char)(*(ptr))) &&  
           ((len = stringcharlen ((ptr), (canon))) > 0 ? 1 : (len = 1, 0)));
}
*/
 
/*** END MACRO CONVERSION ***/
 
int
ISpellChecker::strtoichar (ichar_t *out, char *in, int outlen, int canonical)
{
    int len = 1;        /* Length of next character */
 
    outlen /= sizeof (ichar_t);     /* Convert to an ichar_t count */
    for (  ;  --outlen > 0  &&  *in != '\0';  in += len)
    {
        if (l1_isstringch (in, len , canonical)) {
            *out++ = SET_SIZE + m_laststringch;
        } else {
            *out++ = (unsigned char)( *in );
        }
    }
    *out = 0;
    return outlen <= 0;
}
 
int
ISpellChecker::ichartostr ( char *out, ichar_t *in, int outlen, int canonical)
{
    int ch;     /* Next character to store */
    int i;      /* Index into duplicates list */
    char *  scharp;     /* Pointer into a string char */
 
    while (--outlen > 0  &&  (ch = *in++) != 0)
    {
        if (ch < SET_SIZE)
            *out++ = static_cast<char>(ch);
        else
        {
            ch -= SET_SIZE;
            if (!canonical)
            {
                for (i = m_hashheader.nstrchars;  --i >= 0;  )
                {
                    if (m_hashheader.dupnos[i] == m_defdupchar
                      &&  (static_cast<int>(m_hashheader.stringdups[i])) == ch)
                    {
                        ch = i;
                        break;
                    }
                }
            }
            scharp = m_hashheader.stringchars[static_cast<unsigned>(ch)];
            while ((*out++ = *scharp++) != '\0')
                ;
            out--;
        }
    }
    *out = '\0';
    return outlen <= 0;
}
 
ichar_t *
ISpellChecker::strtosichar ( char *in, int canonical)
{
    static ichar_t  out[STRTOSICHAR_SIZE / sizeof (ichar_t)];
 
    if (strtoichar (out, in, sizeof out, canonical))
        fprintf (stderr, WORD_TOO_LONG (in));
    return out;
}
 
char *
ISpellChecker::ichartosstr (ichar_t *in, int canonical)
{
    static char     out[ICHARTOSSTR_SIZE];
 
    if (ichartostr (out, in, sizeof out, canonical))
        fprintf (stderr, WORD_TOO_LONG (out));
    return out;
}
 
char *
ISpellChecker::printichar (int in)
{
    static char     out[MAXSTRINGCHARLEN + 1];
 
    if (in < SET_SIZE)
    {
        out[0] = static_cast<char>(in);
        out[1] = '\0';
    }
    else
        strcpy (out, m_hashheader.stringchars[static_cast<unsigned>(in) - SET_SIZE]);
    return out;
}
 
#ifndef ICHAR_IS_CHAR
ichar_t *
icharcpy (ichar_t *out, ichar_t *in)
{
    ichar_t *       origout;    /* Copy of destination for return */
 
    origout = out;
    while ((*out++ = *in++) != 0)
        ;
    return origout;
}
 
int
icharlen (ichar_t * in)
{
    int len;        /* Length so far */
 
    for (len = 0;  *in++ != 0;  len++)
        ;
    return len;
}
 
int
icharcmp (ichar_t * s1, ichar_t * s2)
{
 
    while (*s1 != 0)
    {
        if (*s1++ != *s2++)
            return *--s1 - *--s2;
    }
    return *s1 - *s2;
}
 
int
icharncmp (ichar_t *s1, ichar_t *s2, int n)
{
 
    while (--n >= 0  &&  *s1 != 0)
    {
        if (*s1++ != *s2++)
            return *--s1 - *--s2;
    }
    if (n < 0)
        return 0;
    else
        return *s1 - *s2;
}
 
#endif
 
/*
 * \param istate
 * \param name
 * \param searchnames
 * \param deformatter
 *
 * \return
 */
int
ISpellChecker::findfiletype (const char *name, int searchnames, int *deformatter)
{
    char *      cp;     /* Pointer into suffix list */
    int         cplen;      /* Length of current suffix */
    int i;      /* Index into type table */
    int         len;        /* Length of the name */
 
    /*
     * Note:  for now, the deformatter is set to 1 for tex, 0 for nroff.
     * Further, we assume that it's one or the other, so that a test
     * for tex is sufficient.  This needs to be generalized.
     */
    len = strlen (name);
    if (searchnames)
    {
        for (i = 0;  i < m_hashheader.nstrchartype;  i++)
        {
            if (strcmp (name, m_chartypes[i].name) == 0)
            {
                if (deformatter != NULL)
                    *deformatter =
                      (strcmp (m_chartypes[i].deformatter, "tex") == 0);
                return i;
            }
        }
    }
    for (i = 0;  i < m_hashheader.nstrchartype;  i++)
    {
        for (cp = m_chartypes[i].suffixes;  *cp != '\0';  cp += cplen + 1)
        {
            cplen = strlen (cp);
            if (len >= cplen  &&  strcmp (&name[len - cplen], cp) == 0)
            {
                if (deformatter != NULL)
                    *deformatter =
                      (strcmp (m_chartypes[i].deformatter, "tex") == 0);
                return i;
            }
        }
    }
    return -1;
}
 
/*
    HACK: macros replaced with function implementations 
    so we could do a side-effect-free check for unicode
    characters which aren't in hashheader
 
    TODO: this is just a workaround to keep us from crashing. 
    more sophisticated logic needed here. 
*/
char ISpellChecker::myupper(ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.upperchars[c];
    else
        return 0;
}
 
char ISpellChecker::mylower(ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.lowerchars[c];
    else
        return 0;
}
 
int myspace(ichar_t c)
{
    return ((c > 0)  &&  (c < 0x80) &&  isspace(static_cast<unsigned char>(c)));
}
 
char ISpellChecker::iswordch(ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.wordchars[c];
    else
        return 0;
}
 
char ISpellChecker::isboundarych(ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.boundarychars[c];
    else
        return 0;
}
 
char ISpellChecker::isstringstart(ichar_t c)
{
    if (c < (SET_SIZE))
        return m_hashheader.stringstarts[static_cast<unsigned char>(c)];
    else
        return 0;
}
 
ichar_t ISpellChecker::mytolower(ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.lowerconv[c];
    else
        return c;
}
 
ichar_t ISpellChecker::mytoupper (ichar_t c)
{
    if (c < (SET_SIZE + MAXSTRINGCHARS))
        return m_hashheader.upperconv[c];
    else
        return c;
}