• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • tdespell2
 

tdespell2

  • tdespell2
  • plugins
  • ispell
ispell_checker.cpp
1/* tdespell2 - adopted from Enchant
2 * Copyright (C) 2003 Dom Lachowicz
3 * Copyright (C) 2004 Zack Rusin <zack@kde.org>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 * In addition, as a special exception, Dom Lachowicz
21 * gives permission to link the code of this program with
22 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23 * spell checker backend) and distribute linked combinations including
24 * the two. You must obey the GNU Lesser General Public License in all
25 * respects for all of the code used other than said providers. If you modify
26 * this file, you may extend this exception to your version of the
27 * file, but you are not obligated to do so. If you do not wish to
28 * do so, delete this exception statement from your version.
29 */
30
31#include <config.h>
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36
37#include <string>
38#include <vector>
39
40#include "sp_spell.h"
41#include "ispell_checker.h"
42
43#include <tqmap.h>
44#include <tqdir.h>
45#include <tqfileinfo.h>
46
47/***************************************************************************/
48
49typedef struct str_ispell_map
50{
51 const char * lang;
52 const char * dict;
53 const char * enc;
54} IspellMap;
55
56static const char *ispell_dirs [] = {
57#ifdef ISPELL_LIBDIR
58 ISPELL_LIBDIR,
59#else
60 "/usr/" SYSTEM_LIBDIR "/ispell",
61 "/usr/lib/ispell",
62 "/usr/local/" SYSTEM_LIBDIR "/ispell",
63 "/usr/local/lib/ispell",
64 "/usr/local/share/ispell",
65 "/usr/share/ispell",
66 "/usr/pkg/lib",
67#endif
68 0
69};
70static const IspellMap ispell_map [] = {
71 {"ca" ,"catala.hash" ,"iso-8859-1" },
72 {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
73 {"cs" ,"czech.hash" ,"iso-8859-2" },
74 {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
75 {"da" ,"dansk.hash" ,"iso-8859-1" },
76 {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
77 {"de" ,"deutsch.hash" ,"iso-8859-1" },
78 {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
79 {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
80 {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
81 {"el" ,"ellhnika.hash" ,"iso-8859-7" },
82 {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
83 {"en" ,"british.hash" ,"iso-8859-1" },
84 {"en_AU" ,"british.hash" ,"iso-8859-1" },
85 {"en_BZ" ,"british.hash" ,"iso-8859-1" },
86 {"en_CA" ,"british.hash" ,"iso-8859-1" },
87 {"en_GB" ,"british.hash" ,"iso-8859-1" },
88 {"en_IE" ,"british.hash" ,"iso-8859-1" },
89 {"en_JM" ,"british.hash" ,"iso-8859-1" },
90 {"en_NZ" ,"british.hash" ,"iso-8859-1" },
91 {"en_TT" ,"british.hash" ,"iso-8859-1" },
92 {"en_ZA" ,"british.hash" ,"iso-8859-1" },
93 {"en_ZW" ,"british.hash" ,"iso-8859-1" },
94 {"en_PH" ,"american.hash" ,"iso-8859-1" },
95 {"en_US" ,"american.hash" ,"iso-8859-1" },
96 {"eo" ,"esperanto.hash" ,"iso-8859-3" },
97 {"es" ,"espanol.hash" ,"iso-8859-1" },
98 {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
99 {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
100 {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
101 {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
102 {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
103 {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
104 {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
105 {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
106 {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
107 {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
108 {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
109 {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
110 {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
111 {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
112 {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
113 {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
114 {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
115 {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
116 {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
117 {"fi" ,"finnish.hash" ,"iso-8859-1" },
118 {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
119 {"fr" ,"francais.hash" ,"iso-8859-1" },
120 {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
121 {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
122 {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
123 {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
124 {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
125 {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
126 {"hu" ,"hungarian.hash" ,"iso-8859-2" },
127 {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
128 {"ga" ,"irish.hash" ,"iso-8859-1" },
129 {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
130 {"gl" ,"galician.hash" ,"iso-8859-1" },
131 {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
132 {"ia" ,"interlingua.hash" ,"iso-8859-1" },
133 {"it" ,"italian.hash" ,"iso-8859-1" },
134 {"it_IT" ,"italian.hash" ,"iso-8859-1" },
135 {"it_CH" ,"italian.hash" ,"iso-8859-1" },
136 {"la" ,"mlatin.hash" ,"iso-8859-1" },
137 {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
138 {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
139 {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
140 {"nl" ,"nederlands.hash" ,"iso-8859-1" },
141 {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
142 {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
143 {"nb" ,"norsk.hash" ,"iso-8859-1" },
144 {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
145 {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
146 {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
147 {"no" ,"norsk.hash" ,"iso-8859-1" },
148 {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
149 {"pl" ,"polish.hash" ,"iso-8859-2" },
150 {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
151 {"pt" ,"brazilian.hash" ,"iso-8859-1" },
152 {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
153 {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
154 {"ru" ,"russian.hash" ,"koi8-r" },
155 {"ru_MD" ,"russian.hash" ,"koi8-r" },
156 {"ru_RU" ,"russian.hash" ,"koi8-r" },
157 {"sc" ,"sardinian.hash" ,"iso-8859-1" },
158 {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
159 {"sk" ,"slovak.hash" ,"iso-8859-2" },
160 {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
161 {"sl" ,"slovensko.hash" ,"iso-8859-2" },
162 {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
163 {"sv" ,"svenska.hash" ,"iso-8859-1" },
164 {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
165 {"uk" ,"ukrainian.hash" ,"koi8-u" },
166 {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
167 {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
168};
169
170static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
171static TQMap<TQString, TQString> ispell_dict_map;
172
173
174void
175ISpellChecker::try_autodetect_charset(const char * const inEncoding)
176{
177 if (inEncoding && strlen(inEncoding))
178 {
179 m_translate_in = TQTextCodec::codecForName(inEncoding);
180 }
181}
182
183/***************************************************************************/
184/***************************************************************************/
185
186ISpellChecker::ISpellChecker()
187 : deftflag(-1),
188 prefstringchar(-1),
189 m_bSuccessfulInit(false),
190 m_BC(NULL),
191 m_cd(NULL),
192 m_cl(NULL),
193 m_cm(NULL),
194 m_ho(NULL),
195 m_nd(NULL),
196 m_so(NULL),
197 m_se(NULL),
198 m_ti(NULL),
199 m_te(NULL),
200 m_hashstrings(NULL),
201 m_hashtbl(NULL),
202 m_pflaglist(NULL),
203 m_sflaglist(NULL),
204 m_chartypes(NULL),
205 m_infile(NULL),
206 m_outfile(NULL),
207 m_askfilename(NULL),
208 m_Trynum(0),
209 m_translate_in(0)
210{
211 memset(m_sflagindex,0,sizeof(m_sflagindex));
212 memset(m_pflagindex,0,sizeof(m_pflagindex));
213}
214
215#ifndef FREEP
216#define FREEP(p) do { if (p) free(p); } while (0)
217#endif
218
219ISpellChecker::~ISpellChecker()
220{
221 if (m_bSuccessfulInit) {
222 // only cleanup our mess if we were successfully initialized
223
224 clearindex (m_pflagindex);
225 clearindex (m_sflagindex);
226 }
227
228 FREEP(m_hashtbl);
229 FREEP(m_hashstrings);
230 FREEP(m_sflaglist);
231 FREEP(m_chartypes);
232
233 delete m_translate_in;
234 m_translate_in = 0;
235}
236
237bool
238ISpellChecker::checkWord( const TQString& utf8Word )
239{
240 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
241 if (!m_bSuccessfulInit)
242 return false;
243
244 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
245 return false;
246
247 bool retVal = false;
248 TQCString out;
249 if (!m_translate_in)
250 return false;
251 else {
252 /* convert to 8bit string and null terminate */
253 int len_out = utf8Word.length();
254
255 out = m_translate_in->fromUnicode( utf8Word, len_out );
256 }
257
258 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
259 {
260 if (good(iWord, 0, 0, 1, 0) == 1 ||
261 compoundgood(iWord, 1) == 1)
262 {
263 retVal = true;
264 }
265 }
266
267 return retVal;
268}
269
270TQStringList
271ISpellChecker::suggestWord(const TQString& utf8Word)
272{
273 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
274 int c;
275
276 if (!m_bSuccessfulInit)
277 return TQStringList();
278
279 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
280 utf8Word.length() == 0)
281 return TQStringList();
282
283 TQCString out;
284 if (!m_translate_in)
285 return TQStringList();
286 else
287 {
288 /* convert to 8bit string and null terminate */
289
290 int len_out = utf8Word.length();
291 out = m_translate_in->fromUnicode( utf8Word, len_out );
292 }
293
294 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
295 makepossibilities(iWord);
296 else
297 return TQStringList();
298
299 TQStringList sugg_arr;
300 for (c = 0; c < m_pcount; c++)
301 {
302 TQString utf8Word;
303
304 if (!m_translate_in)
305 {
306 /* copy to 8bit string and null terminate */
307 utf8Word = TQString::fromUtf8( m_possibilities[c] );
308 }
309 else
310 {
311 /* convert to 32bit string and null terminate */
312 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
313 }
314
315 sugg_arr.append( utf8Word );
316 }
317
318 return sugg_arr;
319}
320
321static void
322s_buildHashNames (std::vector<std::string> & names, const char * dict)
323{
324 const char * tmp = 0;
325 int i = 0;
326
327 names.clear ();
328
329 while ( (tmp = ispell_dirs[i++]) ) {
330 TQCString maybeFile = TQCString( tmp ) + '/';
331 maybeFile += dict;
332 names.push_back( maybeFile.data() );
333 }
334}
335
336static void
337s_allDics()
338{
339 const char * tmp = 0;
340 int i = 0;
341
342 while ( (tmp = ispell_dirs[i++]) ) {
343 TQDir dir( tmp );
344 TQStringList lst = dir.entryList( "*.hash" );
345 for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
346 TQFileInfo info( *it );
347 for (size_t i = 0; i < size_ispell_map; i++)
348 {
349 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
350 if (!strcmp (info.fileName().latin1(), mapping->dict))
351 {
352 ispell_dict_map.insert( mapping->lang, *it );
353 }
354 }
355 }
356 }
357}
358
359TQValueList<TQString>
360ISpellChecker::allDics()
361{
362 if ( ispell_dict_map.empty() )
363 s_allDics();
364
365 return ispell_dict_map.keys();
366}
367
368TQString
369ISpellChecker::loadDictionary (const char * szdict)
370{
371 std::vector<std::string> dict_names;
372
373 s_buildHashNames (dict_names, szdict);
374
375 for (size_t i = 0; i < dict_names.size(); i++)
376 {
377 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
378 return dict_names[i].c_str();
379 }
380
381 return TQString::null;
382}
383
390bool
391ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
392{
393 TQString hashname;
394
395 const char * encoding = NULL;
396 const char * szFile = NULL;
397
398 for (size_t i = 0; i < size_ispell_map; i++)
399 {
400 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
401 if (!strcmp (szLang, mapping->lang))
402 {
403 szFile = mapping->dict;
404 encoding = mapping->enc;
405 break;
406 }
407 }
408
409 if (!szFile || !strlen(szFile))
410 return false;
411
412 alloc_ispell_struct();
413
414 hashname = loadDictionary(szFile);
415 if (hashname.isEmpty())
416 return false;
417
418 // one of the two above calls succeeded
419 setDictionaryEncoding (hashname, encoding);
420
421 return true;
422}
423
424void
425ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
426{
427 /* Get Hash encoding from XML file. This should always work! */
428 try_autodetect_charset(encoding);
429
430 if (m_translate_in)
431 {
432 /* We still have to setup prefstringchar*/
433 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
434 : static_cast<int *>(NULL));
435
436 if (prefstringchar < 0)
437 {
438 std::string teststring;
439 for(int n1 = 1; n1 <= 15; n1++)
440 {
441 teststring = "latin" + n1;
442 prefstringchar = findfiletype(teststring.c_str(), 1,
443 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
444 if (prefstringchar >= 0)
445 break;
446 }
447 }
448
449 return; /* success */
450 }
451
452 /* Test for UTF-8 first */
453 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
454 if (prefstringchar >= 0)
455 {
456 m_translate_in = TQTextCodec::codecForName("utf8");
457 }
458
459 if (m_translate_in)
460 return; /* success */
461
462 /* Test for "latinN" */
463 if (!m_translate_in)
464 {
465 /* Look for "altstringtype" names from latin1 to latin15 */
466 for(int n1 = 1; n1 <= 15; n1++)
467 {
468 TQString teststring = TQString("latin%1").arg(n1);
469 prefstringchar = findfiletype(teststring.latin1(), 1,
470 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
471 if (prefstringchar >= 0)
472 {
473 //FIXME: latin1 might be wrong
474 m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
475 break;
476 }
477 }
478 }
479
480 /* If nothing found, use latin1 */
481 if (!m_translate_in)
482 {
483 m_translate_in = TQTextCodec::codecForName("latin1");
484 }
485}
486
487bool
488ISpellChecker::requestDictionary(const char *szLang)
489{
490 if (!loadDictionaryForLanguage (szLang))
491 {
492 // handle a shortened version of the language tag: en_US => en
493 std::string shortened_dict (szLang);
494 size_t uscore_pos;
495
496 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
497 shortened_dict = shortened_dict.substr(0, uscore_pos);
498 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
499 return false;
500 } else
501 return false;
502 }
503
504 m_bSuccessfulInit = true;
505
506 if (prefstringchar < 0)
507 m_defdupchar = 0;
508 else
509 m_defdupchar = prefstringchar;
510
511 return true;
512}

tdespell2

Skip menu "tdespell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

tdespell2

Skip menu "tdespell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •   kspeech
  •     interface
  •     library
  •   tdetexteditor
  • kate
  • kded
  • kdoctools
  • kimgio
  • kjs
  • libtdemid
  • libtdescreensaver
  • tdeabc
  • tdecmshell
  • tdecore
  • tdefx
  • tdehtml
  • tdeinit
  • tdeio
  •   bookmarks
  •   httpfilter
  •   kpasswdserver
  •   kssl
  •   tdefile
  •   tdeio
  •   tdeioexec
  • tdeioslave
  •   http
  • tdemdi
  •   tdemdi
  • tdenewstuff
  • tdeparts
  • tdeprint
  • tderandr
  • tderesources
  • tdespell2
  • tdesu
  • tdeui
  • tdeunittest
  • tdeutils
  • tdewallet
Generated for tdespell2 by doxygen 1.9.4
This website is maintained by Timothy Pearson.