19 #include "kcharsets.h"
21 #include "kqiodevicegzip_p.h"
22 #include "kentities.c"
24 #include <tdeapplication.h>
25 #include <tdeglobal.h>
27 #include <tdeconfig.h>
29 #include <tqfontinfo.h>
30 #include <tqstrlist.h>
31 #include <tqfontdatabase.h>
34 #include <tqtextcodec.h>
36 #include <tqcstring.h>
42 static const char *
const language_names[] = {
66 static const char*
const charsets_for_encoding[] = {
135 static struct LanguageForEncoding
139 }
const language_for_encoding[] = {
140 {
"iso 8859-1", 13 },
141 {
"iso 8859-15", 13 },
142 {
"iso 8859-14", 13 },
148 {
"iso 8859-13", 2 },
149 {
"iso 8859-16", 18 },
174 {
"iso 8859-8-i", 8 },
176 {
"iso 8859-9", 12 },
178 {
"iso 8859-11", 11 },
186 {
"iso-10646-ucs-2", 15 },
191 static struct Builtin
195 }
const builtin[] = {
196 {
"iso-ir-111",
"koi8-r" },
197 {
"koi8-ru",
"koi8-u" },
198 {
"koi unified",
"koi8-r" },
200 {
"us-ascii",
"iso 8859-1" },
201 {
"usascii",
"iso 8859-1" },
202 {
"ascii",
"iso 8859-1" },
203 {
"x-utf-8",
"utf-8" },
204 {
"x-utf-7",
"utf-7" },
205 {
"unicode-1-1-utf-7",
"utf-7" },
206 {
"utf-16",
"iso-10646-ucs-2" },
207 {
"utf16",
"iso-10646-ucs-2" },
208 {
"ucs2",
"iso-10646-ucs-2" },
209 {
"iso10646-1",
"iso-10646-ucs-2" },
210 {
"gb18030.2000-1",
"gb18030" },
211 {
"gb18030.2000-0",
"gb18030" },
213 {
"gb2312.1980-0",
"gbk" },
214 {
"gb_2312-80",
"gbk" },
215 {
"x-euc-kr",
"euckr" },
216 {
"jisx0201.1976-0",
"eucjp" },
217 {
"jisx0208.1983-0",
"eucjp" },
218 {
"jisx0208.1990-0",
"eucjp" },
219 {
"jisx0208.1997-0",
"eucjp" },
220 {
"jisx0212.1990-0",
"eucjp" },
221 {
"jisx0213.2000-1",
"eucjp" },
222 {
"jisx0213.2000-2",
"eucjp" },
223 {
"windows850",
"ibm850" },
224 {
"windows866",
"ibm866" },
225 {
"windows1251",
"cp 1251" },
226 {
"windows1252",
"cp 1252" },
227 {
"windows1253",
"cp 1253" },
228 {
"windows1254",
"cp 1254" },
229 {
"windows1255",
"cp 1255" },
230 {
"windows1256",
"cp 1256" },
231 {
"windows1257",
"cp 1257" },
232 {
"windows1258",
"cp 1258" },
233 {
"windows-850",
"ibm850" },
234 {
"windows-866",
"ibm866" },
235 {
"x-windows-850",
"ibm850" },
236 {
"x-windows-866",
"ibm866" },
237 {
"x-windows-1250",
"cp 1250" },
238 {
"x-windows-1251",
"cp 1251" },
239 {
"x-windows-1252",
"cp 1252" },
240 {
"x-windows-1253",
"cp 1253" },
241 {
"x-windows-1254",
"cp 1254" },
242 {
"x-windows-1255",
"cp 1255" },
243 {
"x-windows-1256",
"cp 1256" },
244 {
"x-windows-1257",
"cp 1257" },
245 {
"x-windows-1258",
"cp 1258" },
246 {
"cp819",
"iso 8859-1" },
247 {
"cp850",
"ibm850" },
248 {
"cp866",
"ibm866" },
249 {
"cp-819",
"iso 8859-1" },
250 {
"cp-850",
"ibm850" },
251 {
"cp-866",
"ibm866" },
252 {
"cp-1250",
"cp 1250" },
253 {
"cp-1251",
"cp 1251" },
254 {
"cp-1252",
"cp 1252" },
255 {
"cp-1253",
"cp 1253" },
256 {
"cp-1254",
"cp 1254" },
257 {
"cp-1255",
"cp 1255" },
258 {
"cp-1256",
"cp 1256" },
259 {
"cp-1257",
"cp 1257" },
260 {
"cp-1258",
"cp 1258" },
261 {
"cp-10000",
"apple roman" },
262 {
"x-cp-850",
"ibm850" },
263 {
"x-cp-866",
"ibm866" },
264 {
"x-cp-1250",
"cp 1250" },
265 {
"x-cp-1251",
"cp 1251" },
266 {
"x-cp-1252",
"cp 1252" },
267 {
"x-cp-1253",
"cp 1253" },
268 {
"x-cp-1254",
"cp 1254" },
269 {
"x-cp-1255",
"cp 1255" },
270 {
"x-cp-1256",
"cp 1256" },
271 {
"x-cp-1257",
"cp 1257" },
272 {
"x-cp-1258",
"cp 1258" },
273 {
"x-cp-10000",
"apple roman" },
274 {
"ibm819",
"iso 8859-1" },
275 {
"thai-tis620",
"iso 8859-11" },
276 {
"windows-874",
"cp 874" },
277 {
"windows874",
"cp 874" },
278 {
"x-windows-874",
"cp 874" },
279 {
"x-cp-874",
"cp 874" },
280 {
"ibm 874",
"cp 874" },
281 {
"ibm874",
"cp 874" },
282 {
"x-ibm874",
"cp 874" },
283 {
"ksc5601.1987-0",
"euckr" },
284 {
"x-winsami2",
"winsami2" },
285 {
"x-mac-roman",
"apple roman" },
286 {
"macintosh",
"apple roman" },
287 {
"mac",
"apple roman" },
288 {
"csiso2022jp",
"jis7" },
289 {
"big5-eten",
"big5-hkscs" },
290 {
"cp950",
"big5-hkscs" },
295 static struct Aliases
299 }
const aliases[] = {
300 {
"cp852",
"ibm852" },
301 {
"cp-852",
"ibm852" },
302 {
"x-cp-852",
"ibm852" },
303 {
"windows852",
"ibm852" },
304 {
"windows-852",
"ibm852" },
305 {
"x-windows-852",
"ibm852" },
311 static struct ConversionHints
315 }
const conversion_hints[] = {
316 {
"cp1250",
"iso-8859-2" },
317 {
"koi8-r",
"iso-8859-5" },
318 {
"koi8-u",
"koi8-r" },
320 {
"pt 154",
"cp 1251" },
321 {
"paratype-154",
"cp 1251" },
322 {
"pt-154",
"cp 1251" },
328 template<
typename T,
typename Data >
329 static Data kcharsets_array_search(
const T* start,
const char* entry )
331 for(
const T* pos = start;
334 if( qstrcmp( pos->index, entry ) == 0 )
340 class KCharsetsPrivate
344 : codecForNameDict(43, false)
354 TQAsciiDict<TQTextCodec> codecForNameDict;
362 d =
new KCharsetsPrivate(
this);
372 TQChar res = TQChar::null;
375 if(str[pos] == (TQChar)
'&') pos++;
378 if (str[pos] == (TQChar)
'#' && str.length()-pos > 1) {
381 if (str[pos] == (TQChar)
'x' || str[pos] == (TQChar)
'X') {
384 TQString tmp(str.unicode()+pos, str.length()-pos);
385 res = tmp.toInt(&ok, 16);
388 TQString tmp(str.unicode()+pos, str.length()-pos);
389 res = tmp.toInt(&ok, 10);
394 const entity *e = kde_findEntity(str.ascii(), str.length());
403 return TQChar(e->code);
413 TQString tmp = str.left(len);
415 if( res != (TQChar)TQChar::null )
return res;
425 ent.sprintf(
"�x%x;", ch.unicode());
431 TQString text = input;
432 const TQChar *p = text.unicode();
433 const TQChar *end = p + text.length();
434 const TQChar *ampersand = 0;
435 bool scanForSemicolon =
false;
437 for ( ; p < end; ++p ) {
438 const TQChar ch = *p;
440 if ( ch == (TQChar)
'&' ) {
442 scanForSemicolon =
true;
446 if ( ch != (TQChar)
';' || scanForSemicolon ==
false )
451 scanForSemicolon =
false;
453 const TQChar *entityBegin = ampersand + 1;
455 const uint entityLength = p - entityBegin;
456 if ( entityLength == 0 )
459 const TQChar entityValue =
KCharsets::fromEntity( TQConstString( entityBegin, entityLength ).
string() );
460 if ( entityValue.isNull() )
463 const uint ampersandPos = ampersand - text.unicode();
465 text[ (int)ampersandPos ] = entityValue;
466 text.remove( ampersandPos + 1, entityLength + 1 );
467 p = text.unicode() + ampersandPos;
468 end = text.unicode() + text.length();
477 TQStringList available;
478 for (
const char*
const* pos = charsets_for_encoding; *pos; ++pos ) {
480 available.append( TQString::fromLatin1( *pos ));
487 int lang = kcharsets_array_search< LanguageForEncoding, int >
488 ( language_for_encoding, encoding.latin1());
489 return i18n( language_names[lang] );
494 const int left = descriptiveName.findRev(
'(' );
497 return descriptiveName.stripWhiteSpace();
499 TQString name(descriptiveName.mid(left+1));
501 const int right = name.findRev(
')' );
506 return name.left(right).stripWhiteSpace();
511 for (
const LanguageForEncoding* pos = language_for_encoding; pos->index; ++pos ) {
512 if ( encodingName == TQString::fromLatin1( pos->index )) {
513 const TQString description = i18n( language_names[ pos->data ] );
514 return i18n(
"Descriptive Encoding Name",
"%1 ( %2 )"). arg ( description ). arg( encodingName );
517 return TQString::null;
523 TQStringList encodings;
524 for (
const LanguageForEncoding* pos = language_for_encoding; pos->index; ++pos ) {
525 const TQString name = TQString::fromLatin1( pos->index );
526 const TQString description = i18n( language_names[ pos->data ] );
527 encodings.append( i18n(
"Descriptive Encoding Name",
"%1 ( %2 )"). arg ( description ). arg( name ) );
543 TQTextCodec* codec = 0;
545 if((codec = d->codecForNameDict[n.isEmpty() ?
"->locale<-" : n.latin1()]))
550 d->codecForNameDict.replace(
"->locale<-", codec);
554 TQCString name = n.lower().latin1();
555 TQCString key = name;
556 if (name.right(8) ==
"_charset")
557 name.truncate(name.length()-8);
559 if (name.isEmpty()) {
561 return TQTextCodec::codecForName(
"iso8859-1");
564 codec = TQTextCodec::codecForName(name);
567 d->codecForNameDict.replace(key, codec);
573 TQCString cname = kcharsets_array_search< Builtin, const char* >( builtin, name.data());
576 codec = TQTextCodec::codecForName(cname);
580 d->codecForNameDict.replace(key, codec);
592 cname = kcharsets_array_search< Aliases, const char* >( aliases, name.data());
596 cname = cname.upper();
598 const TQString basicName = TQString::fromLatin1(cname);
599 kdDebug() << k_funcinfo <<
endl <<
" Trying to find " << cname <<
" in " << dir <<
endl;
601 TQString charMapFileName;
602 bool gzipped =
false;
604 if (!qdir.exists()) {
607 else if (qdir.exists(basicName,
false)) {
608 charMapFileName = basicName;
610 else if (qdir.exists(basicName+
".gz",
false)) {
611 charMapFileName = basicName +
".gz";
618 TQRegExp regexp(
"^(X-)?(CP|IBM)(-| )?(0-9)+");
619 if ( regexp.search(basicName) != -1) {
620 const TQString num = regexp.cap(4);
624 else if (qdir.exists(
"IBM"+num)) {
625 charMapFileName =
"IBM"+num;
627 else if (qdir.exists(
"IBM"+num+
".gz")) {
628 charMapFileName =
"IBM"+num+
".gz";
631 else if (qdir.exists(
"CP"+num)) {
632 charMapFileName =
"CP"+num;
634 else if (qdir.exists(
"CP"+num+
".gz")) {
635 charMapFileName =
"CP"+num+
".gz";
641 if (gzipped && !charMapFileName.isEmpty()) {
642 KQIODeviceGZip gzip(dir +
"/" + charMapFileName);
643 if (gzip.open(IO_ReadOnly)) {
644 kdDebug() <<
"Loading gzipped charset..." <<
endl;
645 codec = TQTextCodec::loadCharmap(&gzip);
649 kdWarning() <<
"Could not open gzipped charset!" <<
endl;
651 else if (!charMapFileName.isEmpty()) {
652 codec = TQTextCodec::loadCharmapFile(dir +
"/" + charMapFileName);
656 d->codecForNameDict.replace(key, codec);
662 cname = kcharsets_array_search< ConversionHints, const char* >( conversion_hints, (
const char*)name.data() );
665 codec = TQTextCodec::codecForName(cname);
668 d->codecForNameDict.replace(key, codec);
674 return TQTextCodec::codecForName(
"iso8859-1");
Charset font and encoder/decoder handling.
TQString encodingForName(const TQString &descriptiveName)
Returns the encoding for a string obtained with descriptiveEncodingNames().
static TQString toEntity(const TQChar &ch)
Converts a TQChar to an entity.
virtual ~KCharsets()
Destructor.
static TQString resolveEntities(const TQString &text)
Scans the given string for entities (like &) and resolves them using fromEntity.
TQString languageForEncoding(const TQString &encoding)
Returns the language the encoding is used for.
KCharsets()
Protected constructor.
TQStringList availableEncodingNames()
Lists all available encodings as names.
TQTextCodec * codecForName(const TQString &name) const
Provided for compatibility.
static TQChar fromEntity(const TQString &str)
Converts an entity to a character.
TQString descriptiveNameForEncoding(const TQString &encodingName)
Returns the descriptive encoding name for an encoding name.
TQStringList descriptiveEncodingNames()
Lists the available encoding names together with a more descriptive language.
TQString readPathEntry(const TQString &pKey, const TQString &aDefault=TQString::null) const
Reads a path.
Helper class to facilitate working with TDEConfig / KSimpleConfig groups.
static TDEConfig * config()
Returns the general config object.
static TDELocale * locale()
Returns the global locale object.
TQTextCodec * codecForEncoding() const
Returns the user's preferred encoding.
#define I18N_NOOP(x)
I18N_NOOP marks a string to be translated without translating it.
kndbgstream & endl(kndbgstream &s)
Does nothing.