26 #include "qutf7codec.h"
28 #ifndef TQT_NO_TEXTCODEC
30 int TQUtf7Codec::mibEnum()
const {
34 int TQStrictUtf7Codec::mibEnum()
const {
38 const char* TQUtf7Codec::name()
const {
42 const char* TQStrictUtf7Codec::name()
const {
43 return "X-QT-UTF-7-STRICT";
46 const char* TQUtf7Codec::mimeName()
const {
50 bool TQUtf7Codec::canEncode( TQChar )
const {
54 bool TQUtf7Codec::canEncode(
const TQString & )
const {
58 static uchar base64Set[] = {
59 0x00, 0x00, 0x00, 0x00,
60 0x00, 0x11, 0xFF, 0xC0,
61 0x7F, 0xFF, 0xFF, 0xE0,
62 0x7F, 0xFF, 0xFF, 0xE0
65 static uchar base64SetWithLastTwoBitsZero[] = {
66 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x88, 0x80,
68 0x44, 0x44, 0x44, 0x40,
69 0x11, 0x11, 0x11, 0x00
72 static uchar directSet[] = {
73 0x00, 0x00, 0x00, 0x00,
74 0x01, 0xCF, 0xFF, 0xE1,
75 0x7F, 0xFF, 0xFF, 0xE0,
76 0x7F, 0xFF, 0xFF, 0xE0
79 static uchar optDirectSet[] = {
80 0x00, 0x00, 0x00, 0x00,
81 0x7E, 0x20, 0x00, 0x1E,
82 0x80, 0x00, 0x00, 0x17,
83 0x80, 0x00, 0x00, 0x1C
86 static inline bool isOfSet(uchar ch, uchar* set) {
87 return set[ ch/8 ] & (0x80 >> ( ch%8 ));
90 int TQUtf7Codec::heuristicContentMatch(
const char* chars,
int len)
const
95 bool rightAfterEscape = FALSE;
96 bool onlyNullBitsSinceLastBoundary = TRUE;
97 for ( i = 0; i < len ; i++ ) {
98 if ((
unsigned char)chars[i] >= 128)
101 if ( isOfSet(chars[i],base64Set) ) {
104 onlyNullBitsSinceLastBoundary = TRUE;
107 onlyNullBitsSinceLastBoundary
108 = isOfSet(chars[i],base64SetWithLastTwoBitsZero);
111 onlyNullBitsSinceLastBoundary
112 = ( chars[i] ==
'A' || chars[i] ==
'Q' ||
113 chars[i] ==
'g' || chars[i] ==
'w' );
116 onlyNullBitsSinceLastBoundary
117 = onlyNullBitsSinceLastBoundary && (chars[i] ==
'A');
119 stepNo = (stepNo + 1) % 8;
120 rightAfterEscape = FALSE;
122 if (rightAfterEscape && chars[i] !=
'-')
124 if (!onlyNullBitsSinceLastBoundary)
130 if (chars[i] ==
'+') {
132 rightAfterEscape = TRUE;
139 class TQUtf7Decoder :
public TQTextDecoder {
157 bool rightAfterEscape;
159 TQUtf7Decoder() : uc(0), stepNo(0), shifted(FALSE), rightAfterEscape(FALSE)
164 inline void resetParser()
169 rightAfterEscape = FALSE;
173 TQString toUnicode(
const char* chars,
int len)
175 TQString result =
"";
176 for (
int i=0; i<len; i++) {
183 tqWarning(
"TQUtf7Decoder: 8bit char found in input. "
184 "Parser has been re-initialized!");
186 result += TQChar::replacement;
196 if ( rightAfterEscape && ch ==
'-' ) {
200 result += TQChar(
'+');
209 if ( ch >=
'A' && ch <=
'Z' ) {
211 }
else if ( ch >=
'a' && ch <=
'z' ) {
212 bits = ch -
'a' + 26;
213 }
else if ( ch >=
'0' && ch <=
'9' ) {
214 bits = ch -
'0' + 52;
215 }
else if ( ch ==
'+' ) {
217 }
else if ( ch ==
'/' ) {
226 if ( rightAfterEscape ) {
231 tqWarning(
"TQUtf7Decoder: ill-formed input: "
232 "non-base64 char after escaping \"+\"!");
235 if (stepNo >= 1 && uc) {
236 tqWarning(
"TQUtf7Decoder: ill-formed sequence: "
237 "non-zero bits in shifted-sequence tail!");
256 case 0: uc = bits << 10;
break;
258 case 1: uc |= bits << 4;
break;
261 case 2: uc |= bits >> 2; result += TQChar(uc);
263 uc = bits << 14;
break;
264 case 3: uc |= bits << 8;
break;
265 case 4: uc |= bits << 2;
break;
268 case 5: uc |= bits >> 4; result += TQChar(uc);
270 uc = bits << 12;
break;
271 case 6: uc |= bits << 6;
break;
275 case 7: uc |= bits; result += TQChar(uc);
282 rightAfterEscape = FALSE;
302 rightAfterEscape = TRUE;
305 result += TQChar(ch);
315 TQTextDecoder* TQUtf7Codec::makeDecoder()
const
317 return new TQUtf7Decoder;
321 class TQUtf7Encoder :
public TQTextEncoder {
322 uchar dontNeedEncodingSet[16];
326 bool mayContinueShiftedSequence : 1;
328 TQUtf7Encoder(
bool encOpt,
bool encLwsp)
329 : outbits(0), stepNo(0),
330 shifted(FALSE), mayContinueShiftedSequence(FALSE)
332 for (
int i = 0; i < 16 ; i++) {
333 dontNeedEncodingSet[i] = directSet[i];
335 dontNeedEncodingSet[i] |= optDirectSet[i];
338 dontNeedEncodingSet[
' '/8] |= 0x80 >> (
' '%8);
339 dontNeedEncodingSet[
'\n'/8] |= 0x80 >> (
'\n'%8);
340 dontNeedEncodingSet[
'\r'/8] |= 0x80 >> (
'\r'%8);
341 dontNeedEncodingSet[
'\t'/8] |= 0x80 >> (
'\t'%8);
347 char toBase64( ushort u ) {
349 return (
char)u +
'A';
351 return (
char)u - 26 +
'a';
353 return (
char)u - 52 +
'0';
360 void addToShiftedSequence(TQCString::Iterator & t, ushort u) {
364 *t++ = toBase64( u >> 10 );
365 *t++ = toBase64( (u & 0x03FF ) >> 4 );
367 outbits = (u & 0x000F) << 2;
373 if (!mayContinueShiftedSequence) {
375 *t++ = toBase64( outbits | ( u >> 14 ) );
377 *t++ = toBase64( (u & 0x3F00 ) >> 8 );
378 *t++ = toBase64( (u & 0x00FC ) >> 2 );
380 outbits = (u & 0x0003) << 4;
386 if (!mayContinueShiftedSequence) {
388 *t++ = toBase64( outbits | ( u >> 12 ) );
390 *t++ = toBase64( (u & 0x0FFF) >> 6 );
391 *t++ = toBase64( u & 0x003F );
396 stepNo = (stepNo + 1) % 3;
399 void endShiftedSequence(TQCString::Iterator & t) {
403 *t++ = toBase64( outbits );
415 bool continueOK( ushort u ) {
416 return stepNo == 0 ||
417 ( stepNo == 1 && (u & 0xF000) == 0 ) ||
418 ( stepNo == 2 && (u & 0xC000) == 0 );
421 void processDoesntNeedEncoding(TQCString::Iterator & t, ushort ch) {
424 endShiftedSequence(t);
426 if (isOfSet((
char)ch,base64Set) || ch ==
'-' ) {
429 }
else if (mayContinueShiftedSequence) {
432 mayContinueShiftedSequence = FALSE;
433 if (isOfSet(ch,base64Set) || ch ==
'-' ) {
443 TQCString fromUnicode(
const TQString & uc,
int & len_in_out)
450 int maxreslen = 3 * len_in_out + 5;
451 TQCString result( maxreslen );
455 cout <<
"\nlen_in_out: " << len_in_out
456 <<
"; shifted: " << (shifted ?
"true" :
"false")
457 <<
";\n" <<
"mayContinue: "
458 << (mayContinueShiftedSequence ?
"true" :
"false")
459 <<
"; stepNo: " << stepNo <<
";\n"
460 <<
"outbits: " << outbits << endl;
465 const TQChar * s = uc.unicode();
466 TQCString::Iterator t = result.data();
470 if ( mayContinueShiftedSequence )
474 for (
int i = 0 ; i < len_in_out ;
476 ushort ch = s[i].unicode();
486 if ( isOfSet((uchar)ch,dontNeedEncodingSet) ) {
487 processDoesntNeedEncoding(t,ch);
489 }
else if ( ch ==
'+' ) {
491 if (shifted || mayContinueShiftedSequence) {
496 addToShiftedSequence(t,ch);
497 mayContinueShiftedSequence = FALSE;
510 if (!shifted && (!mayContinueShiftedSequence || !continueOK(ch) ) ) {
514 addToShiftedSequence(t,ch);
516 mayContinueShiftedSequence = FALSE;
520 endShiftedSequence(t);
521 mayContinueShiftedSequence = TRUE;
527 len_in_out = t - result.data();
530 cout <<
"len_in_out: " << len_in_out <<
"; "
531 <<
"mayContinue: " << (mayContinueShiftedSequence ?
"true" :
"false")
532 <<
"; stepNo: " << stepNo << endl;
535 Q_ASSERT(len_in_out <= maxreslen-1);
542 TQTextEncoder* TQUtf7Codec::makeEncoder()
const {
543 return new TQUtf7Encoder(
false,
false );
546 TQTextEncoder* TQStrictUtf7Codec::makeEncoder()
const {
547 return new TQUtf7Encoder(
true,
false );