41 #include "ispell_checker.h"
45 #include <tqfileinfo.h>
49 typedef struct str_ispell_map
56 static const char *ispell_dirs [] = {
60 "/usr/" SYSTEM_LIBDIR
"/ispell",
62 "/usr/local/" SYSTEM_LIBDIR
"/ispell",
63 "/usr/local/lib/ispell",
64 "/usr/local/share/ispell",
70 static const IspellMap ispell_map [] = {
71 {
"ca" ,
"catala.hash" ,
"iso-8859-1" },
72 {
"ca_ES" ,
"catala.hash" ,
"iso-8859-1" },
73 {
"cs" ,
"czech.hash" ,
"iso-8859-2" },
74 {
"cs_CZ" ,
"czech.hash" ,
"iso-8859-2" },
75 {
"da" ,
"dansk.hash" ,
"iso-8859-1" },
76 {
"da_DK" ,
"dansk.hash" ,
"iso-8859-1" },
77 {
"de" ,
"deutsch.hash" ,
"iso-8859-1" },
78 {
"de_CH" ,
"swiss.hash" ,
"iso-8859-1" },
79 {
"de_AT" ,
"deutsch.hash" ,
"iso-8859-1" },
80 {
"de_DE" ,
"deutsch.hash" ,
"iso-8859-1" },
81 {
"el" ,
"ellhnika.hash" ,
"iso-8859-7" },
82 {
"el_GR" ,
"ellhnika.hash" ,
"iso-8859-7" },
83 {
"en" ,
"british.hash" ,
"iso-8859-1" },
84 {
"en_AU" ,
"british.hash" ,
"iso-8859-1" },
85 {
"en_BZ" ,
"british.hash" ,
"iso-8859-1" },
86 {
"en_CA" ,
"british.hash" ,
"iso-8859-1" },
87 {
"en_GB" ,
"british.hash" ,
"iso-8859-1" },
88 {
"en_IE" ,
"british.hash" ,
"iso-8859-1" },
89 {
"en_JM" ,
"british.hash" ,
"iso-8859-1" },
90 {
"en_NZ" ,
"british.hash" ,
"iso-8859-1" },
91 {
"en_TT" ,
"british.hash" ,
"iso-8859-1" },
92 {
"en_ZA" ,
"british.hash" ,
"iso-8859-1" },
93 {
"en_ZW" ,
"british.hash" ,
"iso-8859-1" },
94 {
"en_PH" ,
"american.hash" ,
"iso-8859-1" },
95 {
"en_US" ,
"american.hash" ,
"iso-8859-1" },
96 {
"eo" ,
"esperanto.hash" ,
"iso-8859-3" },
97 {
"es" ,
"espanol.hash" ,
"iso-8859-1" },
98 {
"es_AR" ,
"espanol.hash" ,
"iso-8859-1" },
99 {
"es_BO" ,
"espanol.hash" ,
"iso-8859-1" },
100 {
"es_CL" ,
"espanol.hash" ,
"iso-8859-1" },
101 {
"es_CO" ,
"espanol.hash" ,
"iso-8859-1" },
102 {
"es_CR" ,
"espanol.hash" ,
"iso-8859-1" },
103 {
"es_DO" ,
"espanol.hash" ,
"iso-8859-1" },
104 {
"es_EC" ,
"espanol.hash" ,
"iso-8859-1" },
105 {
"es_ES" ,
"espanol.hash" ,
"iso-8859-1" },
106 {
"es_GT" ,
"espanol.hash" ,
"iso-8859-1" },
107 {
"es_HN" ,
"espanol.hash" ,
"iso-8859-1" },
108 {
"es_MX" ,
"espanol.hash" ,
"iso-8859-1" },
109 {
"es_NI" ,
"espanol.hash" ,
"iso-8859-1" },
110 {
"es_PA" ,
"espanol.hash" ,
"iso-8859-1" },
111 {
"es_PE" ,
"espanol.hash" ,
"iso-8859-1" },
112 {
"es_PR" ,
"espanol.hash" ,
"iso-8859-1" },
113 {
"es_PY" ,
"espanol.hash" ,
"iso-8859-1" },
114 {
"es_SV" ,
"espanol.hash" ,
"iso-8859-1" },
115 {
"es_UY" ,
"espanol.hash" ,
"iso-8859-1" },
116 {
"es_VE" ,
"espanol.hash" ,
"iso-8859-1" },
117 {
"fi" ,
"finnish.hash" ,
"iso-8859-1" },
118 {
"fi_FI" ,
"finnish.hash" ,
"iso-8859-1" },
119 {
"fr" ,
"francais.hash" ,
"iso-8859-1" },
120 {
"fr_BE" ,
"francais.hash" ,
"iso-8859-1" },
121 {
"fr_CA" ,
"francais.hash" ,
"iso-8859-1" },
122 {
"fr_CH" ,
"francais.hash" ,
"iso-8859-1" },
123 {
"fr_FR" ,
"francais.hash" ,
"iso-8859-1" },
124 {
"fr_LU" ,
"francais.hash" ,
"iso-8859-1" },
125 {
"fr_MC" ,
"francais.hash" ,
"iso-8859-1" },
126 {
"hu" ,
"hungarian.hash" ,
"iso-8859-2" },
127 {
"hu_HU" ,
"hungarian.hash" ,
"iso-8859-2" },
128 {
"ga" ,
"irish.hash" ,
"iso-8859-1" },
129 {
"ga_IE" ,
"irish.hash" ,
"iso-8859-1" },
130 {
"gl" ,
"galician.hash" ,
"iso-8859-1" },
131 {
"gl_ES" ,
"galician.hash" ,
"iso-8859-1" },
132 {
"ia" ,
"interlingua.hash" ,
"iso-8859-1" },
133 {
"it" ,
"italian.hash" ,
"iso-8859-1" },
134 {
"it_IT" ,
"italian.hash" ,
"iso-8859-1" },
135 {
"it_CH" ,
"italian.hash" ,
"iso-8859-1" },
136 {
"la" ,
"mlatin.hash" ,
"iso-8859-1" },
137 {
"la_IT" ,
"mlatin.hash" ,
"iso-8859-1" },
138 {
"lt" ,
"lietuviu.hash" ,
"iso-8859-13" },
139 {
"lt_LT" ,
"lietuviu.hash" ,
"iso-8859-13" },
140 {
"nl" ,
"nederlands.hash" ,
"iso-8859-1" },
141 {
"nl_NL" ,
"nederlands.hash" ,
"iso-8859-1" },
142 {
"nl_BE" ,
"nederlands.hash" ,
"iso-8859-1" },
143 {
"nb" ,
"norsk.hash" ,
"iso-8859-1" },
144 {
"nb_NO" ,
"norsk.hash" ,
"iso-8859-1" },
145 {
"nn" ,
"nynorsk.hash" ,
"iso-8859-1" },
146 {
"nn_NO" ,
"nynorsk.hash" ,
"iso-8859-1" },
147 {
"no" ,
"norsk.hash" ,
"iso-8859-1" },
148 {
"no_NO" ,
"norsk.hash" ,
"iso-8859-1" },
149 {
"pl" ,
"polish.hash" ,
"iso-8859-2" },
150 {
"pl_PL" ,
"polish.hash" ,
"iso-8859-2" },
151 {
"pt" ,
"brazilian.hash" ,
"iso-8859-1" },
152 {
"pt_BR" ,
"brazilian.hash" ,
"iso-8859-1" },
153 {
"pt_PT" ,
"portugues.hash" ,
"iso-8859-1" },
154 {
"ru" ,
"russian.hash" ,
"koi8-r" },
155 {
"ru_MD" ,
"russian.hash" ,
"koi8-r" },
156 {
"ru_RU" ,
"russian.hash" ,
"koi8-r" },
157 {
"sc" ,
"sardinian.hash" ,
"iso-8859-1" },
158 {
"sc_IT" ,
"sardinian.hash" ,
"iso-8859-1" },
159 {
"sk" ,
"slovak.hash" ,
"iso-8859-2" },
160 {
"sk_SK" ,
"slovak.hash" ,
"iso-8859-2" },
161 {
"sl" ,
"slovensko.hash" ,
"iso-8859-2" },
162 {
"sl_SI" ,
"slovensko.hash" ,
"iso-8859-2" },
163 {
"sv" ,
"svenska.hash" ,
"iso-8859-1" },
164 {
"sv_SE" ,
"svenska.hash" ,
"iso-8859-1" },
165 {
"uk" ,
"ukrainian.hash" ,
"koi8-u" },
166 {
"uk_UA" ,
"ukrainian.hash" ,
"koi8-u" },
167 {
"yi" ,
"yiddish-yivo.hash" ,
"utf-8" }
170 static const size_t size_ispell_map = (
sizeof(ispell_map) /
sizeof((ispell_map)[0]) );
171 static TQMap<TQString, TQString> ispell_dict_map;
175 ISpellChecker::try_autodetect_charset(
const char *
const inEncoding)
177 if (inEncoding && strlen(inEncoding))
179 m_translate_in = TQTextCodec::codecForName(inEncoding);
186 ISpellChecker::ISpellChecker()
189 m_bSuccessfulInit(false),
211 memset(m_sflagindex,0,
sizeof(m_sflagindex));
212 memset(m_pflagindex,0,
sizeof(m_pflagindex));
216 #define FREEP(p) do { if (p) free(p); } while (0)
219 ISpellChecker::~ISpellChecker()
221 if (m_bSuccessfulInit) {
224 clearindex (m_pflagindex);
225 clearindex (m_sflagindex);
229 FREEP(m_hashstrings);
233 delete m_translate_in;
238 ISpellChecker::checkWord(
const TQString& utf8Word )
240 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
241 if (!m_bSuccessfulInit)
244 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
253 int len_out = utf8Word.length();
255 out = m_translate_in->fromUnicode( utf8Word, len_out );
258 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
260 if (good(iWord, 0, 0, 1, 0) == 1 ||
261 compoundgood(iWord, 1) == 1)
271 ISpellChecker::suggestWord(
const TQString& utf8Word)
273 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
276 if (!m_bSuccessfulInit)
277 return TQStringList();
279 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
280 utf8Word.length() == 0)
281 return TQStringList();
285 return TQStringList();
290 int len_out = utf8Word.length();
291 out = m_translate_in->fromUnicode( utf8Word, len_out );
294 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
295 makepossibilities(iWord);
297 return TQStringList();
299 TQStringList sugg_arr;
300 for (c = 0; c < m_pcount; c++)
307 utf8Word = TQString::fromUtf8( m_possibilities[c] );
312 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
315 sugg_arr.append( utf8Word );
322 s_buildHashNames (std::vector<std::string> & names,
const char * dict)
324 const char * tmp = 0;
329 while ( (tmp = ispell_dirs[i++]) ) {
330 TQCString maybeFile = TQCString( tmp ) +
'/';
332 names.push_back( maybeFile.data() );
339 const char * tmp = 0;
342 while ( (tmp = ispell_dirs[i++]) ) {
344 TQStringList lst = dir.entryList(
"*.hash" );
345 for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
346 TQFileInfo info( *it );
347 for (
size_t i = 0; i < size_ispell_map; i++)
349 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
350 if (!strcmp (info.fileName().latin1(), mapping->dict))
352 ispell_dict_map.insert( mapping->lang, *it );
359 TQValueList<TQString>
360 ISpellChecker::allDics()
362 if ( ispell_dict_map.empty() )
365 return ispell_dict_map.keys();
369 ISpellChecker::loadDictionary (
const char * szdict)
371 std::vector<std::string> dict_names;
373 s_buildHashNames (dict_names, szdict);
375 for (
size_t i = 0; i < dict_names.size(); i++)
377 if (linit(
const_cast<char*
>(dict_names[i].c_str())) >= 0)
378 return dict_names[i].c_str();
381 return TQString::null;
391 ISpellChecker::loadDictionaryForLanguage (
const char * szLang )
395 const char * encoding = NULL;
396 const char * szFile = NULL;
398 for (
size_t i = 0; i < size_ispell_map; i++)
400 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
401 if (!strcmp (szLang, mapping->lang))
403 szFile = mapping->dict;
404 encoding = mapping->enc;
409 if (!szFile || !strlen(szFile))
412 alloc_ispell_struct();
414 hashname = loadDictionary(szFile);
415 if (hashname.isEmpty())
419 setDictionaryEncoding (hashname, encoding);
425 ISpellChecker::setDictionaryEncoding(
const TQString& hashname,
const char * encoding )
428 try_autodetect_charset(encoding);
433 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag
434 :
static_cast<int *
>(NULL));
436 if (prefstringchar < 0)
438 std::string teststring;
439 for(
int n1 = 1; n1 <= 15; n1++)
441 teststring =
"latin" + n1;
442 prefstringchar = findfiletype(teststring.c_str(), 1,
443 deftflag < 0 ? &deftflag :
static_cast<int *
>(NULL));
444 if (prefstringchar >= 0)
453 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag :
static_cast<int *
>(NULL));
454 if (prefstringchar >= 0)
456 m_translate_in = TQTextCodec::codecForName(
"utf8");
466 for(
int n1 = 1; n1 <= 15; n1++)
468 TQString teststring = TQString(
"latin%1").arg(n1);
469 prefstringchar = findfiletype(teststring.latin1(), 1,
470 deftflag < 0 ? &deftflag :
static_cast<int *
>(NULL));
471 if (prefstringchar >= 0)
474 m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
483 m_translate_in = TQTextCodec::codecForName(
"latin1");
488 ISpellChecker::requestDictionary(
const char *szLang)
490 if (!loadDictionaryForLanguage (szLang))
493 std::string shortened_dict (szLang);
496 if ((uscore_pos = shortened_dict.rfind (
'_')) != ((
size_t)-1)) {
497 shortened_dict = shortened_dict.substr(0, uscore_pos);
498 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
504 m_bSuccessfulInit =
true;
506 if (prefstringchar < 0)
509 m_defdupchar = prefstringchar;