libkmime

kmime_header_parsing.cpp
1/*
2 kmime_header_parsing.cpp
3
4 This file is part of KMime, the KDE internet mail/usenet news message library.
5 Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
6
7 KMime is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License, version 2, as
9 published by the Free Software Foundation.
10
11 KMime is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
20 In addition, as a special exception, the copyright holders give
21 permission to link the code of this library with any edition of
22 the TQt library by Trolltech AS, Norway (or with modified versions
23 of TQt that use the same license as TQt), and distribute linked
24 combinations including the two. You must obey the GNU General
25 Public License in all respects for all of the code used other than
26 TQt. If you modify this file, you may extend this exception to
27 your version of the file, but you are not obligated to do so. If
28 you do not wish to do so, delete this exception statement from
29 your version.
30*/
31
32#include <config.h>
33#include "kmime_header_parsing.h"
34
35#include "kmime_codecs.h"
36#include "kmime_util.h"
37#include "kmime_warning.h"
38
39#include <tdeglobal.h>
40#include <kcharsets.h>
41
42#include <tqtextcodec.h>
43#include <tqmap.h>
44#include <tqcstring.h>
45#include <tqstringlist.h>
46
47#include <ctype.h> // for isdigit
48#include <cassert>
49
50using namespace KMime;
51using namespace KMime::Types;
52
53namespace KMime {
54
55namespace Types {
56
57 TQString AddrSpec::asString() const {
58 bool needsQuotes = false;
59 TQString result;
60 result.reserve( localPart.length() + domain.length() + 1 );
61 for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
62 const char ch = localPart[i].latin1();
63 if ( ch == '.' || isAText( ch ) )
64 result += ch;
65 else {
66 needsQuotes = true;
67 if ( ch == '\\' || ch == '"' )
68 result += '\\';
69 result += ch;
70 }
71 }
72 if ( needsQuotes )
73 return '"' + result + "\"@" + domain;
74 else
75 return result + '@' + domain;
76 }
77
78}
79
80namespace HeaderParsing {
81
82// parse the encoded-word (scursor points to after the initial '=')
83bool parseEncodedWord( const char* & scursor, const char * const send,
84 TQString & result, TQCString & language ) {
85
86 // make sure the caller already did a bit of the work.
87 assert( *(scursor-1) == '=' );
88
89 //
90 // STEP 1:
91 // scan for the charset/language portion of the encoded-word
92 //
93
94 char ch = *scursor++;
95
96 if ( ch != '?' ) {
97 kdDebug() << "first" << endl;
98 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
99 return false;
100 }
101
102 // remember start of charset (ie. just after the initial "=?") and
103 // language (just after the first '*') fields:
104 const char * charsetStart = scursor;
105 const char * languageStart = 0;
106
107 // find delimiting '?' (and the '*' separating charset and language
108 // tags, if any):
109 for ( ; scursor != send ; scursor++ )
110 if ( *scursor == '?')
111 break;
112 else if ( *scursor == '*' && !languageStart )
113 languageStart = scursor + 1;
114
115 // not found? can't be an encoded-word!
116 if ( scursor == send || *scursor != '?' ) {
117 kdDebug() << "second" << endl;
118 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
119 return false;
120 }
121
122 // extract the language information, if any (if languageStart is 0,
123 // language will be null, too):
124 TQCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/);
125 // extract charset information (keep in mind: the size given to the
126 // ctor is one off due to the \0 terminator):
127 TQCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
128
129 //
130 // STEP 2:
131 // scan for the encoding portion of the encoded-word
132 //
133
134
135 // remember start of encoding (just _after_ the second '?'):
136 scursor++;
137 const char * encodingStart = scursor;
138
139 // find next '?' (ending the encoding tag):
140 for ( ; scursor != send ; scursor++ )
141 if ( *scursor == '?' ) break;
142
143 // not found? Can't be an encoded-word!
144 if ( scursor == send || *scursor != '?' ) {
145 kdDebug() << "third" << endl;
146 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
147 return false;
148 }
149
150 // extract the encoding information:
151 TQCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
152
153
154 kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
155 << "\"; language == \"" << maybeLanguage
156 << "\"; encoding == \"" << maybeEncoding << "\"" << endl;
157
158 //
159 // STEP 3:
160 // scan for encoded-text portion of encoded-word
161 //
162
163
164 // remember start of encoded-text (just after the third '?'):
165 scursor++;
166 const char * encodedTextStart = scursor;
167
168 // find next '?' (ending the encoded-text):
169 for ( ; scursor != send ; scursor++ )
170 if ( *scursor == '?' ) break;
171
172 // not found? Can't be an encoded-word!
173 // ### maybe evaluate it nonetheless if the rest is OK?
174 if ( scursor == send || *scursor != '?' ) {
175 kdDebug() << "fourth" << endl;
176 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
177 return false;
178 }
179 scursor++;
180 // check for trailing '=':
181 if ( scursor == send || *scursor != '=' ) {
182 kdDebug() << "fifth" << endl;
183 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
184 return false;
185 }
186 scursor++;
187
188 // set end sentinel for encoded-text:
189 const char * const encodedTextEnd = scursor - 2;
190
191 //
192 // STEP 4:
193 // setup decoders for the transfer encoding and the charset
194 //
195
196
197 // try if there's a codec for the encoding found:
198 Codec * codec = Codec::codecForName( maybeEncoding );
199 if ( !codec ) {
200 KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
201 return false;
202 }
203
204 // get an instance of a corresponding decoder:
205 Decoder * dec = codec->makeDecoder();
206 assert( dec );
207
208 // try if there's a (text)codec for the charset found:
209 bool matchOK = false;
210 TQTextCodec
211 *textCodec = TDEGlobal::charsets()->codecForName( maybeCharset, matchOK );
212
213 if ( !matchOK || !textCodec ) {
214 KMIME_WARN_UNKNOWN(Charset,maybeCharset);
215 delete dec;
216 return false;
217 };
218
219 kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
220
221 // allocate a temporary buffer to store the 8bit text:
222 int encodedTextLength = encodedTextEnd - encodedTextStart;
223 TQByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
224 TQByteArray::Iterator bit = buffer.begin();
225 TQByteArray::ConstIterator bend = buffer.end();
226
227 //
228 // STEP 5:
229 // do the actual decoding
230 //
231
232 if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
233 KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
234 << encodedTextLength << " )\nresult may be truncated" << endl;
235
236 result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
237
238 kdDebug() << "result now: \"" << result << "\"" << endl;
239 // cleanup:
240 delete dec;
241 language = maybeLanguage;
242
243 return true;
244}
245
246static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
247 while ( scursor != send
248 && ( *scursor == ' ' || *scursor == '\n' ||
249 *scursor == '\t' || *scursor == '\r' ) )
250 scursor++;
251}
252
253bool parseAtom( const char * & scursor, const char * const send,
254 TQString & result, bool allow8Bit )
255{
256 TQPair<const char*,int> maybeResult;
257
258 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
259 result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
260 return true;
261 }
262
263 return false;
264}
265
266bool parseAtom( const char * & scursor, const char * const send,
267 TQPair<const char*,int> & result, bool allow8Bit ) {
268 bool success = false;
269 const char * start = scursor;
270
271 while ( scursor != send ) {
272 signed char ch = *scursor++;
273 if ( ch > 0 && isAText(ch) ) {
274 // AText: OK
275 success = true;
276 } else if ( allow8Bit && ch < 0 ) {
277 // 8bit char: not OK, but be tolerant.
278 KMIME_WARN_8BIT(ch);
279 success = true;
280 } else {
281 // CTL or special - marking the end of the atom:
282 // re-set sursor to point to the offending
283 // char and return:
284 scursor--;
285 break;
286 }
287 }
288 result.first = start;
289 result.second = scursor - start;
290 return success;
291}
292
293bool parseToken( const char * & scursor, const char * const send,
294 TQString & result, bool allow8Bit )
295{
296 TQPair<const char*,int> maybeResult;
297
298 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
299 result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
300 return true;
301 }
302
303 return false;
304}
305
306bool parseToken( const char * & scursor, const char * const send,
307 TQPair<const char*,int> & result, bool allow8Bit )
308{
309 bool success = false;
310 const char * start = scursor;
311
312 while ( scursor != send ) {
313 signed char ch = *scursor++;
314 if ( ch > 0 && isTText(ch) ) {
315 // TText: OK
316 success = true;
317 } else if ( allow8Bit && ch < 0 ) {
318 // 8bit char: not OK, but be tolerant.
319 KMIME_WARN_8BIT(ch);
320 success = true;
321 } else {
322 // CTL or tspecial - marking the end of the atom:
323 // re-set sursor to point to the offending
324 // char and return:
325 scursor--;
326 break;
327 }
328 }
329 result.first = start;
330 result.second = scursor - start;
331 return success;
332}
333
334#define READ_ch_OR_FAIL if ( scursor == send ) { \
335 KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
336 return false; \
337 } else { \
338 ch = *scursor++; \
339 }
340
341// known issues:
342//
343// - doesn't handle quoted CRLF
344
345bool parseGenericQuotedString( const char* & scursor, const char * const send,
346 TQString & result, bool isCRLF,
347 const char openChar, const char closeChar )
348{
349 char ch;
350 // We are in a quoted-string or domain-literal or comment and the
351 // cursor points to the first char after the openChar.
352 // We will apply unfolding and quoted-pair removal.
353 // We return when we either encounter the end or unescaped openChar
354 // or closeChar.
355
356 assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
357
358 while ( scursor != send ) {
359 ch = *scursor++;
360
361 if ( ch == closeChar || ch == openChar ) {
362 // end of quoted-string or another opening char:
363 // let caller decide what to do.
364 return true;
365 }
366
367 switch( ch ) {
368 case '\\': // quoted-pair
369 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
370 READ_ch_OR_FAIL;
371 KMIME_WARN_IF_8BIT(ch);
372 result += TQChar(ch);
373 break;
374 case '\r':
375 // ###
376 // The case of lonely '\r' is easy to solve, as they're
377 // not part of Unix Line-ending conventions.
378 // But I see a problem if we are given Unix-native
379 // line-ending-mails, where we cannot determine anymore
380 // whether a given '\n' was part of a CRLF or was occurring
381 // on it's own.
382 READ_ch_OR_FAIL;
383 if ( ch != '\n' ) {
384 // CR on it's own...
385 KMIME_WARN_LONE(CR);
386 result += TQChar('\r');
387 scursor--; // points to after the '\r' again
388 } else {
389 // CRLF encountered.
390 // lookahead: check for folding
391 READ_ch_OR_FAIL;
392 if ( ch == ' ' || ch == '\t' ) {
393 // correct folding;
394 // position cursor behind the CRLF WSP (unfolding)
395 // and add the WSP to the result
396 result += TQChar(ch);
397 } else {
398 // this is the "shouldn't happen"-case. There is a CRLF
399 // inside a quoted-string without it being part of FWS.
400 // We take it verbatim.
401 KMIME_WARN_NON_FOLDING(CRLF);
402 result += "\r\n";
403 // the cursor is decremented again, so's we need not
404 // duplicate the whole switch here. "ch" could've been
405 // everything (incl. openChar or closeChar).
406 scursor--;
407 }
408 }
409 break;
410 case '\n':
411 // Note: CRLF has been handled above already!
412 // ### LF needs special treatment, depending on whether isCRLF
413 // is true (we can be sure a lonely '\n' was meant this way) or
414 // false ('\n' alone could have meant LF or CRLF in the original
415 // message. This parser assumes CRLF iff the LF is followed by
416 // either WSP (folding) or NULL (premature end of quoted-string;
417 // Should be fixed, since NULL is allowed as per rfc822).
418 READ_ch_OR_FAIL;
419 if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
420 // folding
421 // correct folding
422 result += TQChar(ch);
423 } else {
424 // non-folding
425 KMIME_WARN_LONE(LF);
426 result += TQChar('\n');
427 // pos is decremented, so's we need not duplicate the whole
428 // switch here. ch could've been everything (incl. <">, "\").
429 scursor--;
430 }
431 break;
432 default:
433 KMIME_WARN_IF_8BIT(ch);
434 result += TQChar(ch);
435 }
436 }
437
438 return false;
439}
440
441// known issues:
442//
443// - doesn't handle encoded-word inside comments.
444
445bool parseComment( const char* & scursor, const char * const send,
446 TQString & result, bool isCRLF, bool reallySave )
447{
448 int commentNestingDepth = 1;
449 const char * afterLastClosingParenPos = 0;
450 TQString maybeCmnt;
451 const char * oldscursor = scursor;
452
453 assert( *(scursor-1) == '(' );
454
455 while ( commentNestingDepth ) {
456 TQString cmntPart;
457 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
458 assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
459 // see the kdoc for above function for the possible conditions
460 // we have to check:
461 switch ( *(scursor-1) ) {
462 case ')':
463 if ( reallySave ) {
464 // add the chunk that's now surely inside the comment.
465 result += maybeCmnt;
466 result += cmntPart;
467 if ( commentNestingDepth > 1 ) // don't add the outermost ')'...
468 result += TQChar(')');
469 maybeCmnt = TQString();
470 }
471 afterLastClosingParenPos = scursor;
472 --commentNestingDepth;
473 break;
474 case '(':
475 if ( reallySave ) {
476 // don't add to "result" yet, because we might find that we
477 // are already outside the (broken) comment...
478 maybeCmnt += cmntPart;
479 maybeCmnt += TQChar('(');
480 }
481 ++commentNestingDepth;
482 break;
483 default: assert( 0 );
484 } // switch
485 } else {
486 // !parseGenericQuotedString, ie. premature end
487 if ( afterLastClosingParenPos )
488 scursor = afterLastClosingParenPos;
489 else
490 scursor = oldscursor;
491 return false;
492 }
493 } // while
494
495 return true;
496}
497
498
499// known issues: none.
500
501bool parsePhrase( const char* & scursor, const char * const send,
502 TQString & result, bool isCRLF )
503{
504 enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
505 TQString tmp;
506 TQCString lang;
507 const char * successfullyParsed = 0;
508 // only used by the encoded-word branch
509 const char * oldscursor;
510 // used to suppress whitespace between adjacent encoded-words
511 // (rfc2047, 6.2):
512 bool lastWasEncodedWord = false;
513
514 while ( scursor != send ) {
515 char ch = *scursor++;
516 switch ( ch ) {
517 case '.': // broken, but allow for intorop's sake
518 if ( found == None ) {
519 --scursor;
520 return false;
521 } else {
522 if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
523 result += ". ";
524 else
525 result += '.';
526 successfullyParsed = scursor;
527 }
528 break;
529 case '"': // quoted-string
530 tmp = TQString();
531 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
532 successfullyParsed = scursor;
533 assert( *(scursor-1) == '"' );
534 switch ( found ) {
535 case None:
536 found = QuotedString;
537 break;
538 case Phrase:
539 case Atom:
540 case EncodedWord:
541 case QuotedString:
542 found = Phrase;
543 result += TQChar(' '); // rfc822, 3.4.4
544 break;
545 default:
546 assert( 0 );
547 }
548 lastWasEncodedWord = false;
549 result += tmp;
550 } else {
551 // premature end of quoted string.
552 // What to do? Return leading '"' as special? Return as quoted-string?
553 // We do the latter if we already found something, else signal failure.
554 if ( found == None ) {
555 return false;
556 } else {
557 result += TQChar(' '); // rfc822, 3.4.4
558 result += tmp;
559 return true;
560 }
561 }
562 break;
563 case '(': // comment
564 // parse it, but ignore content:
565 tmp = TQString();
566 if ( parseComment( scursor, send, tmp, isCRLF,
567 false /*don't bother with the content*/ ) ) {
568 successfullyParsed = scursor;
569 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
570 } else {
571 if ( found == None )
572 return false;
573 else {
574 scursor = successfullyParsed;
575 return true;
576 }
577 }
578 break;
579 case '=': // encoded-word
580 tmp = TQString();
581 oldscursor = scursor;
582 lang = 0;
583 if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
584 successfullyParsed = scursor;
585 switch ( found ) {
586 case None:
587 found = EncodedWord;
588 break;
589 case Phrase:
590 case EncodedWord:
591 case Atom:
592 case QuotedString:
593 if ( !lastWasEncodedWord )
594 result += TQChar(' '); // rfc822, 3.4.4
595 found = Phrase;
596 break;
597 default: assert( 0 );
598 }
599 lastWasEncodedWord = true;
600 result += tmp;
601 break;
602 } else
603 // parse as atom:
604 scursor = oldscursor;
605 // fall though...
606
607 default: //atom
608 tmp = TQString();
609 scursor--;
610 if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
611 successfullyParsed = scursor;
612 switch ( found ) {
613 case None:
614 found = Atom;
615 break;
616 case Phrase:
617 case Atom:
618 case EncodedWord:
619 case QuotedString:
620 found = Phrase;
621 result += TQChar(' '); // rfc822, 3.4.4
622 break;
623 default:
624 assert( 0 );
625 }
626 lastWasEncodedWord = false;
627 result += tmp;
628 } else {
629 if ( found == None )
630 return false;
631 else {
632 scursor = successfullyParsed;
633 return true;
634 }
635 }
636 }
637 eatWhiteSpace( scursor, send );
638 }
639
640 return ( found != None );
641}
642
643
644bool parseDotAtom( const char* & scursor, const char * const send,
645 TQString & result, bool isCRLF )
646{
647 // always points to just after the last atom parsed:
648 const char * successfullyParsed;
649
650 TQString tmp;
651 if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
652 return false;
653 result += tmp;
654 successfullyParsed = scursor;
655
656 while ( scursor != send ) {
657 eatCFWS( scursor, send, isCRLF );
658
659 // end of header or no '.' -> return
660 if ( scursor == send || *scursor != '.' ) return true;
661 scursor++; // eat '.'
662
663 eatCFWS( scursor, send, isCRLF );
664
665 if ( scursor == send || !isAText( *scursor ) ) {
666 // end of header or no AText, but this time following a '.'!:
667 // reset cursor to just after last successfully parsed char and
668 // return:
669 scursor = successfullyParsed;
670 return true;
671 }
672
673 // try to parse the next atom:
674 TQString maybeAtom;
675 if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
676 scursor = successfullyParsed;
677 return true;
678 }
679
680 result += TQChar('.');
681 result += maybeAtom;
682 successfullyParsed = scursor;
683 }
684
685 scursor = successfullyParsed;
686 return true;
687}
688
689
690void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
691 TQString dummy;
692
693 while ( scursor != send ) {
694 const char * oldscursor = scursor;
695
696 char ch = *scursor++;
697
698 switch( ch ) {
699 case ' ':
700 case '\t': // whitespace
701 case '\r':
702 case '\n': // folding
703 continue;
704
705 case '(': // comment
706 if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) )
707 continue;
708 scursor = oldscursor;
709 return;
710
711 default:
712 scursor = oldscursor;
713 return;
714 }
715
716 }
717}
718
719bool parseDomain( const char* & scursor, const char * const send,
720 TQString & result, bool isCRLF ) {
721 eatCFWS( scursor, send, isCRLF );
722 if ( scursor == send ) return false;
723
724 // domain := dot-atom / domain-literal / atom *("." atom)
725 //
726 // equivalent to:
727 // domain = dot-atom / domain-literal,
728 // since parseDotAtom does allow CFWS between atoms and dots
729
730 if ( *scursor == '[' ) {
731 // domain-literal:
732 TQString maybeDomainLiteral;
733 // eat '[':
734 scursor++;
735 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
736 isCRLF, '[', ']' ) ) {
737 if ( scursor == send ) {
738 // end of header: check for closing ']':
739 if ( *(scursor-1) == ']' ) {
740 // OK, last char was ']':
741 result = maybeDomainLiteral;
742 return true;
743 } else {
744 // not OK, domain-literal wasn't closed:
745 return false;
746 }
747 }
748 // we hit openChar in parseGenericQuotedString.
749 // include it in maybeDomainLiteral and keep on parsing:
750 if ( *(scursor-1) == '[' ) {
751 maybeDomainLiteral += TQChar('[');
752 continue;
753 }
754 // OK, real end of domain-literal:
755 result = maybeDomainLiteral;
756 return true;
757 }
758 } else {
759 // dot-atom:
760 TQString maybeDotAtom;
761 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
762 result = maybeDotAtom;
763 return true;
764 }
765 }
766 return false;
767}
768
769bool parseObsRoute( const char* & scursor, const char* const send,
770 TQStringList & result, bool isCRLF, bool save ) {
771 while ( scursor != send ) {
772 eatCFWS( scursor, send, isCRLF );
773 if ( scursor == send ) return false;
774
775 // empty entry:
776 if ( *scursor == ',' ) {
777 scursor++;
778 if ( save ) result.append( TQString() );
779 continue;
780 }
781
782 // empty entry ending the list:
783 if ( *scursor == ':' ) {
784 scursor++;
785 if ( save ) result.append( TQString() );
786 return true;
787 }
788
789 // each non-empty entry must begin with '@':
790 if ( *scursor != '@' )
791 return false;
792 else
793 scursor++;
794
795 TQString maybeDomain;
796 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
797 if ( save ) result.append( maybeDomain );
798
799 // eat the following (optional) comma:
800 eatCFWS( scursor, send, isCRLF );
801 if ( scursor == send ) return false;
802 if ( *scursor == ':' ) { scursor++; return true; }
803 if ( *scursor == ',' ) scursor++;
804
805 }
806
807 return false;
808}
809
810bool parseAddrSpec( const char* & scursor, const char * const send,
811 AddrSpec & result, bool isCRLF ) {
812 //
813 // STEP 1:
814 // local-part := dot-atom / quoted-string / word *("." word)
815 //
816 // this is equivalent to:
817 // local-part := word *("." word)
818
819 TQString maybeLocalPart;
820 TQString tmp;
821
822 while ( scursor != send ) {
823 // first, eat any whitespace
824 eatCFWS( scursor, send, isCRLF );
825
826 char ch = *scursor++;
827 switch ( ch ) {
828 case '.': // dot
829 maybeLocalPart += TQChar('.');
830 break;
831
832 case '@':
833 goto SAW_AT_SIGN;
834 break;
835
836 case '"': // quoted-string
837 tmp = TQString();
838 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
839 maybeLocalPart += tmp;
840 else
841 return false;
842 break;
843
844 default: // atom
845 scursor--; // re-set scursor to point to ch again
846 tmp = TQString();
847 if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
848 maybeLocalPart += tmp;
849 else
850 return false; // parseAtom can only fail if the first char is non-atext.
851 break;
852 }
853 }
854
855 return false;
856
857
858 //
859 // STEP 2:
860 // domain
861 //
862
863SAW_AT_SIGN:
864
865 assert( *(scursor-1) == '@' );
866
867 TQString maybeDomain;
868 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
869 return false;
870
871 result.localPart = maybeLocalPart;
872 result.domain = maybeDomain;
873
874 return true;
875}
876
877
878bool parseAngleAddr( const char* & scursor, const char * const send,
879 AddrSpec & result, bool isCRLF ) {
880 // first, we need an opening angle bracket:
881 eatCFWS( scursor, send, isCRLF );
882 if ( scursor == send || *scursor != '<' ) return false;
883 scursor++; // eat '<'
884
885 eatCFWS( scursor, send, isCRLF );
886 if ( scursor == send ) return false;
887
888 if ( *scursor == '@' || *scursor == ',' ) {
889 // obs-route: parse, but ignore:
890 KMIME_WARN << "obsolete source route found! ignoring." << endl;
891 TQStringList dummy;
892 if ( !parseObsRoute( scursor, send, dummy,
893 isCRLF, false /* don't save */ ) )
894 return false;
895 // angle-addr isn't complete until after the '>':
896 if ( scursor == send ) return false;
897 }
898
899 // parse addr-spec:
900 AddrSpec maybeAddrSpec;
901 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
902
903 eatCFWS( scursor, send, isCRLF );
904 if ( scursor == send || *scursor != '>' ) return false;
905 scursor++;
906
907 result = maybeAddrSpec;
908 return true;
909
910}
911
912bool parseMailbox( const char* & scursor, const char * const send,
913 Mailbox & result, bool isCRLF ) {
914
915 // rfc:
916 // mailbox := addr-spec / ([ display-name ] angle-addr)
917 // us:
918 // mailbox := addr-spec / ([ display-name ] angle-addr)
919 // / (angle-addr "(" display-name ")")
920
921 eatCFWS( scursor, send, isCRLF );
922 if ( scursor == send ) return false;
923
924 AddrSpec maybeAddrSpec;
925
926 // first, try if it's a vanilla addr-spec:
927 const char * oldscursor = scursor;
928 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
929 result.displayName = TQString();
930 result.addrSpec = maybeAddrSpec;
931 return true;
932 }
933 scursor = oldscursor;
934
935 // second, see if there's a display-name:
936 TQString maybeDisplayName;
937 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
938 // failed: reset cursor, note absent display-name
939 maybeDisplayName = TQString();
940 scursor = oldscursor;
941 } else {
942 // succeeded: eat CFWS
943 eatCFWS( scursor, send, isCRLF );
944 if ( scursor == send ) return false;
945 }
946
947 // third, parse the angle-addr:
948 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
949 return false;
950
951 if ( maybeDisplayName.isNull() ) {
952 // check for the obsolete form of display-name (as comment):
953 eatWhiteSpace( scursor, send );
954 if ( scursor != send && *scursor == '(' ) {
955 scursor++;
956 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) )
957 return false;
958 }
959 }
960
961 result.displayName = maybeDisplayName;
962 result.addrSpec = maybeAddrSpec;
963 return true;
964}
965
966bool parseGroup( const char* & scursor, const char * const send,
967 Address & result, bool isCRLF ) {
968 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
969 //
970 // equivalent to:
971 // group := display-name ":" [ obs-mbox-list ] ";"
972
973 eatCFWS( scursor, send, isCRLF );
974 if ( scursor == send ) return false;
975
976 // get display-name:
977 TQString maybeDisplayName;
978 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
979 return false;
980
981 // get ":":
982 eatCFWS( scursor, send, isCRLF );
983 if ( scursor == send || *scursor != ':' ) return false;
984
985 result.displayName = maybeDisplayName;
986
987 // get obs-mbox-list (may contain empty entries):
988 scursor++;
989 while ( scursor != send ) {
990 eatCFWS( scursor, send, isCRLF );
991 if ( scursor == send ) return false;
992
993 // empty entry:
994 if ( *scursor == ',' ) { scursor++; continue; }
995
996 // empty entry ending the list:
997 if ( *scursor == ';' ) { scursor++; return true; }
998
999 Mailbox maybeMailbox;
1000 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
1001 return false;
1002 result.mailboxList.append( maybeMailbox );
1003
1004 eatCFWS( scursor, send, isCRLF );
1005 // premature end:
1006 if ( scursor == send ) return false;
1007 // regular end of the list:
1008 if ( *scursor == ';' ) { scursor++; return true; }
1009 // eat regular list entry separator:
1010 if ( *scursor == ',' ) scursor++;
1011 }
1012 return false;
1013}
1014
1015
1016bool parseAddress( const char* & scursor, const char * const send,
1017 Address & result, bool isCRLF ) {
1018 // address := mailbox / group
1019
1020 eatCFWS( scursor, send, isCRLF );
1021 if ( scursor == send ) return false;
1022
1023 // first try if it's a single mailbox:
1024 Mailbox maybeMailbox;
1025 const char * oldscursor = scursor;
1026 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
1027 // yes, it is:
1028 result.displayName = TQString();
1029 result.mailboxList.append( maybeMailbox );
1030 return true;
1031 }
1032 scursor = oldscursor;
1033
1034 Address maybeAddress;
1035
1036 // no, it's not a single mailbox. Try if it's a group:
1037 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
1038 return false;
1039
1040 result = maybeAddress;
1041 return true;
1042}
1043
1044bool parseAddressList( const char* & scursor, const char * const send,
1045 AddressList & result, bool isCRLF ) {
1046 while ( scursor != send ) {
1047 eatCFWS( scursor, send, isCRLF );
1048 // end of header: this is OK.
1049 if ( scursor == send ) return true;
1050 // empty entry: ignore:
1051 if ( *scursor == ',' ) { scursor++; continue; }
1052
1053 // parse one entry
1054 Address maybeAddress;
1055 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
1056 result.append( maybeAddress );
1057
1058 eatCFWS( scursor, send, isCRLF );
1059 // end of header: this is OK.
1060 if ( scursor == send ) return true;
1061 // comma separating entries: eat it.
1062 if ( *scursor == ',' ) scursor++;
1063 }
1064 return true;
1065}
1066
1067
1068static TQString asterisk = TQString::fromLatin1("*0*",1);
1069static TQString asteriskZero = TQString::fromLatin1("*0*",2);
1070//static TQString asteriskZeroAsterisk = TQString::fromLatin1("*0*",3);
1071
1072bool parseParameter( const char* & scursor, const char * const send,
1073 TQPair<TQString,TQStringOrTQPair> & result, bool isCRLF ) {
1074 // parameter = regular-parameter / extended-parameter
1075 // regular-parameter = regular-parameter-name "=" value
1076 // extended-parameter =
1077 // value = token / quoted-string
1078 //
1079 // note that rfc2231 handling is out of the scope of this function.
1080 // Therefore we return the attribute as TQString and the value as
1081 // (start,length) tupel if we see that the value is encoded
1082 // (trailing asterisk), for parseParameterList to decode...
1083
1084 eatCFWS( scursor, send, isCRLF );
1085 if ( scursor == send ) return false;
1086
1087 //
1088 // parse the parameter name:
1089 //
1090 TQString maybeAttribute;
1091 if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) )
1092 return false;
1093
1094 eatCFWS( scursor, send, isCRLF );
1095 // premature end: not OK (haven't seen '=' yet).
1096 if ( scursor == send || *scursor != '=' ) return false;
1097 scursor++; // eat '='
1098
1099 eatCFWS( scursor, send, isCRLF );
1100 if ( scursor == send ) {
1101 // don't choke on attribute=, meaning the value was omitted:
1102 if ( maybeAttribute.endsWith( asterisk ) ) {
1103 KMIME_WARN << "attribute ends with \"*\", but value is empty! "
1104 "Chopping away \"*\"." << endl;
1105 maybeAttribute.truncate( maybeAttribute.length() - 1 );
1106 }
1107 result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1108 return true;
1109 }
1110
1111 const char * oldscursor = scursor;
1112
1113 //
1114 // parse the parameter value:
1115 //
1116 TQStringOrTQPair maybeValue;
1117 if ( *scursor == '"' ) {
1118 // value is a quoted-string:
1119 scursor++;
1120 if ( maybeAttribute.endsWith( asterisk ) ) {
1121 // attributes ending with "*" designate extended-parameters,
1122 // which cannot have quoted-strings as values. So we remove the
1123 // trailing "*" to not confuse upper layers.
1124 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
1125 "Chopping away \"*\"." << endl;
1126 maybeAttribute.truncate( maybeAttribute.length() - 1 );
1127 }
1128
1129 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
1130 scursor = oldscursor;
1131 result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1132 return false; // this case needs further processing by upper layers!!
1133 }
1134 } else {
1135 // value is a token:
1136 if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
1137 scursor = oldscursor;
1138 result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1139 return false; // this case needs further processing by upper layers!!
1140 }
1141 }
1142
1143 result = qMakePair( maybeAttribute.lower(), maybeValue );
1144 return true;
1145}
1146
1147
1148
1149bool parseRawParameterList( const char* & scursor, const char * const send,
1150 TQMap<TQString,TQStringOrTQPair> & result,
1151 bool isCRLF ) {
1152 // we use parseParameter() consecutively to obtain a map of raw
1153 // attributes to raw values. "Raw" here means that we don't do
1154 // rfc2231 decoding and concatenation. This is left to
1155 // parseParameterList(), which will call this function.
1156 //
1157 // The main reason for making this chunk of code a separate
1158 // (private) method is that we can deal with broken parameters
1159 // _here_ and leave the rfc2231 handling solely to
1160 // parseParameterList(), which will still be enough work.
1161
1162 while ( scursor != send ) {
1163 eatCFWS( scursor, send, isCRLF );
1164 // empty entry ending the list: OK.
1165 if ( scursor == send ) return true;
1166 // empty list entry: ignore.
1167 if ( *scursor == ';' ) { scursor++; continue; }
1168
1169 TQPair<TQString,TQStringOrTQPair> maybeParameter;
1170 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
1171 // we need to do a bit of work if the attribute is not
1172 // NULL. These are the cases marked with "needs further
1173 // processing" in parseParameter(). Specifically, parsing of the
1174 // token or the quoted-string, which should represent the value,
1175 // failed. We take the easy way out and simply search for the
1176 // next ';' to start parsing again. (Another option would be to
1177 // take the text between '=' and ';' as value)
1178 if ( maybeParameter.first.isNull() ) return false;
1179 while ( scursor != send ) {
1180 if ( *scursor++ == ';' ) goto IS_SEMICOLON;
1181 }
1182 // scursor == send case: end of list.
1183 return true;
1184 IS_SEMICOLON:
1185 // *scursor == ';' case: parse next entry.
1186 continue;
1187 }
1188 // successful parsing brings us here:
1189 result.insert( maybeParameter.first, maybeParameter.second );
1190
1191 eatCFWS( scursor, send, isCRLF );
1192 // end of header: ends list.
1193 if ( scursor == send ) return true;
1194 // regular separator: eat it.
1195 if ( *scursor == ';' ) scursor++;
1196 }
1197 return true;
1198}
1199
1200
1201static void decodeRFC2231Value( Codec* & rfc2231Codec,
1202 TQTextCodec* & textcodec,
1203 bool isContinuation, TQString & value,
1204 TQPair<const char*,int> & source ) {
1205
1206 //
1207 // parse the raw value into (charset,language,text):
1208 //
1209
1210 const char * decBegin = source.first;
1211 const char * decCursor = decBegin;
1212 const char * decEnd = decCursor + source.second;
1213
1214 if ( !isContinuation ) {
1215 // find the first single quote
1216 while ( decCursor != decEnd ) {
1217 if ( *decCursor == '\'' ) break;
1218 else decCursor++;
1219 }
1220
1221 if ( decCursor == decEnd ) {
1222 // there wasn't a single single quote at all!
1223 // take the whole value to be in latin-1:
1224 KMIME_WARN << "No charset in extended-initial-value. "
1225 "Assuming \"iso-8859-1\"." << endl;
1226 value += TQString::fromLatin1( decBegin, source.second );
1227 return;
1228 }
1229
1230 TQCString charset( decBegin, decCursor - decBegin + 1 );
1231
1232 const char * oldDecCursor = ++decCursor;
1233 // find the second single quote (we ignore the language tag):
1234 while ( decCursor != decEnd ) {
1235 if ( *decCursor == '\'' ) break;
1236 else decCursor++;
1237 }
1238 if ( decCursor == decEnd ) {
1239 KMIME_WARN << "No language in extended-initial-value. "
1240 "Trying to recover." << endl;
1241 decCursor = oldDecCursor;
1242 } else
1243 decCursor++;
1244
1245 // decCursor now points to the start of the
1246 // "extended-other-values":
1247
1248 //
1249 // get the decoders:
1250 //
1251
1252 bool matchOK = false;
1253 textcodec = TDEGlobal::charsets()->codecForName( charset, matchOK );
1254 if ( !matchOK ) {
1255 textcodec = 0;
1256 KMIME_WARN_UNKNOWN(Charset,charset);
1257 }
1258 }
1259
1260 if ( !rfc2231Codec ) {
1261 rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
1262 assert( rfc2231Codec );
1263 }
1264
1265 if ( !textcodec ) {
1266 value += TQString::fromLatin1( decCursor, decEnd - decCursor );
1267 return;
1268 }
1269
1270 Decoder * dec = rfc2231Codec->makeDecoder();
1271 assert( dec );
1272
1273 //
1274 // do the decoding:
1275 //
1276
1277 TQByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
1278 TQByteArray::Iterator bit = buffer.begin();
1279 TQByteArray::ConstIterator bend = buffer.end();
1280
1281 if ( !dec->decode( decCursor, decEnd, bit, bend ) )
1282 KMIME_WARN << rfc2231Codec->name()
1283 << " codec lies about it's maxDecodedSizeFor()\n"
1284 "result may be truncated" << endl;
1285
1286 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
1287
1288 kdDebug() << "value now: \"" << value << "\"" << endl;
1289 // cleanup:
1290 delete dec;
1291}
1292
1293// known issues:
1294// - permutes rfc2231 continuations when the total number of parts
1295// exceeds 10 (other-sections then becomes *xy, ie. two digits)
1296
1297bool parseParameterList( const char* & scursor, const char * const send,
1298 TQMap<TQString,TQString> & result, bool isCRLF ) {
1299 // parse the list into raw attribute-value pairs:
1300 TQMap<TQString,TQStringOrTQPair> rawParameterList;
1301 if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
1302 return false;
1303
1304 if ( rawParameterList.isEmpty() ) return true;
1305
1306 // decode rfc 2231 continuations and alternate charset encoding:
1307
1308 // NOTE: this code assumes that what TQMapIterator delivers is sorted
1309 // by the key!
1310
1311 Codec * rfc2231Codec = 0;
1312 TQTextCodec * textcodec = 0;
1313 TQString attribute;
1314 TQString value;
1315 enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
1316
1317 TQMapIterator<TQString,TQStringOrTQPair> it, end = rawParameterList.end();
1318
1319 for ( it = rawParameterList.begin() ; it != end ; ++it ) {
1320 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
1321 //
1322 // new attribute:
1323 //
1324
1325 // store the last attribute/value pair in the result map now:
1326 if ( !attribute.isNull() ) result.insert( attribute, value );
1327 // and extract the information from the new raw attribute:
1328 value = TQString();
1329 attribute = it.key();
1330 mode = NoMode;
1331 // is the value encoded?
1332 if ( attribute.endsWith( asterisk ) ) {
1333 attribute.truncate( attribute.length() - 1 );
1334 mode = (Modes) ((int) mode | Encoded);
1335 }
1336 // is the value continued?
1337 if ( attribute.endsWith( asteriskZero ) ) {
1338 attribute.truncate( attribute.length() - 2 );
1339 mode = (Modes) ((int) mode | Continued);
1340 }
1341 //
1342 // decode if necessary:
1343 //
1344 if ( mode & Encoded ) {
1345 decodeRFC2231Value( rfc2231Codec, textcodec,
1346 false, /* isn't continuation */
1347 value, (*it).qpair );
1348 } else {
1349 // not encoded.
1350 if ( (*it).qpair.first )
1351 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
1352 else
1353 value += (*it).qstring;
1354 }
1355
1356 //
1357 // shortcut-processing when the value isn't encoded:
1358 //
1359
1360 if ( !(mode & Continued) ) {
1361 // save result already:
1362 result.insert( attribute, value );
1363 // force begin of a new attribute:
1364 attribute = TQString();
1365 }
1366 } else /* it.key().startsWith( attribute ) */ {
1367 //
1368 // continuation
1369 //
1370
1371 // ignore the section and trust TQMap to have sorted the keys:
1372 if ( it.key().endsWith( asterisk ) ) {
1373 // encoded
1374 decodeRFC2231Value( rfc2231Codec, textcodec,
1375 true, /* is continuation */
1376 value, (*it).qpair );
1377 } else {
1378 // not encoded
1379 if ( (*it).qpair.first )
1380 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
1381 else
1382 value += (*it).qstring;
1383 }
1384 }
1385 }
1386
1387 // write last attr/value pair:
1388 if ( !attribute.isNull() )
1389 result.insert( attribute, value );
1390
1391 return true;
1392}
1393
1394static const char * stdDayNames[] = {
1395 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1396};
1397static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1398
1399static bool parseDayName( const char* & scursor, const char * const send )
1400{
1401 // check bounds:
1402 if ( send - scursor < 3 ) return false;
1403
1404 for ( int i = 0 ; i < stdDayNamesLen ; ++i )
1405 if ( tqstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
1406 scursor += 3;
1407 kdDebug() << "found " << stdDayNames[i] << endl;
1408 return true;
1409 }
1410
1411 return false;
1412}
1413
1414
1415static const char * stdMonthNames[] = {
1416 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1417 "Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
1418};
1419static const int stdMonthNamesLen =
1420 sizeof stdMonthNames / sizeof *stdMonthNames;
1421
1422static bool parseMonthName( const char* & scursor, const char * const send,
1423 int & result )
1424{
1425 // check bounds:
1426 if ( send - scursor < 3 ) return false;
1427
1428 for ( result = 0 ; result < stdMonthNamesLen ; ++result )
1429 if ( tqstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
1430 scursor += 3;
1431 return true;
1432 }
1433
1434 // not found:
1435 return false;
1436}
1437
1438static const struct {
1439 const char * tzName;
1440 long int secsEastOfGMT;
1441} timeZones[] = {
1442 // rfc 822 timezones:
1443 { "GMT", 0 },
1444 { "UT", 0 },
1445 { "EDT", -4*3600 },
1446 { "EST", -5*3600 },
1447 { "MST", -5*3600 },
1448 { "CST", -6*3600 },
1449 { "MDT", -6*3600 },
1450 { "MST", -7*3600 },
1451 { "PDT", -7*3600 },
1452 { "PST", -8*3600 },
1453 // common, non-rfc-822 zones:
1454 { "CET", 1*3600 },
1455 { "MET", 1*3600 },
1456 { "UTC", 0 },
1457 { "CEST", 2*3600 },
1458 { "BST", 1*3600 },
1459 // rfc 822 military timezones:
1460 { "Z", 0 },
1461 { "A", -1*3600 },
1462 { "B", -2*3600 },
1463 { "C", -3*3600 },
1464 { "D", -4*3600 },
1465 { "E", -5*3600 },
1466 { "F", -6*3600 },
1467 { "G", -7*3600 },
1468 { "H", -8*3600 },
1469 { "I", -9*3600 },
1470 // J is not used!
1471 { "K", -10*3600 },
1472 { "L", -11*3600 },
1473 { "M", -12*3600 },
1474 { "N", 1*3600 },
1475 { "O", 2*3600 },
1476 { "P", 3*3600 },
1477 { "Q", 4*3600 },
1478 { "R", 5*3600 },
1479 { "S", 6*3600 },
1480 { "T", 7*3600 },
1481 { "U", 8*3600 },
1482 { "V", 9*3600 },
1483 { "W", 10*3600 },
1484 { "X", 11*3600 },
1485 { "Y", 12*3600 },
1486};
1487static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1488
1489static bool parseAlphaNumericTimeZone( const char* & scursor,
1490 const char * const send,
1491 long int & secsEastOfGMT,
1492 bool & timeZoneKnown )
1493{
1494 TQPair<const char*,int> maybeTimeZone(0,0);
1495 if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) )
1496 return false;
1497 for ( int i = 0 ; i < timeZonesLen ; ++i )
1498 if ( tqstrnicmp( timeZones[i].tzName,
1499 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
1500 scursor += maybeTimeZone.second;
1501 secsEastOfGMT = timeZones[i].secsEastOfGMT;
1502 timeZoneKnown = true;
1503 return true;
1504 }
1505
1506 // don't choke just because we don't happen to know the time zone
1507 KMIME_WARN_UNKNOWN(time zone,TQCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
1508 secsEastOfGMT = 0;
1509 timeZoneKnown = false;
1510 return true;
1511}
1512
1513// parse a number and return the number of digits parsed:
1514static int parseDigits( const char* & scursor, const char * const send,
1515 int & result )
1516{
1517 result = 0;
1518 int digits = 0;
1519 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
1520 result *= 10;
1521 result += int( *scursor - '0' );
1522 }
1523 return digits;
1524}
1525
1526static bool parseTimeOfDay( const char* & scursor, const char * const send,
1527 int & hour, int & min, int & sec, bool isCRLF=false )
1528{
1529 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1530
1531 //
1532 // 2DIGIT representing "hour":
1533 //
1534 if ( !parseDigits( scursor, send, hour ) ) return false;
1535
1536 eatCFWS( scursor, send, isCRLF );
1537 if ( scursor == send || *scursor != ':' ) return false;
1538 scursor++; // eat ':'
1539
1540 eatCFWS( scursor, send, isCRLF );
1541 if ( scursor == send ) return false;
1542
1543 //
1544 // 2DIGIT representing "minute":
1545 //
1546 if ( !parseDigits( scursor, send, min ) ) return false;
1547
1548 eatCFWS( scursor, send, isCRLF );
1549 if ( scursor == send ) return true; // seconds are optional
1550
1551 //
1552 // let's see if we have a 2DIGIT representing "second":
1553 //
1554 if ( *scursor == ':' ) {
1555 // yepp, there are seconds:
1556 scursor++; // eat ':'
1557 eatCFWS( scursor, send, isCRLF );
1558 if ( scursor == send ) return false;
1559
1560 if ( !parseDigits( scursor, send, sec ) ) return false;
1561 } else {
1562 sec = 0;
1563 }
1564
1565 return true;
1566}
1567
1568
1569bool parseTime( const char* & scursor, const char * send,
1570 int & hour, int & min, int & sec, long int & secsEastOfGMT,
1571 bool & timeZoneKnown, bool isCRLF )
1572{
1573 // time := time-of-day CFWS ( zone / obs-zone )
1574 //
1575 // obs-zone := "UT" / "GMT" /
1576 // "EST" / "EDT" / ; -0500 / -0400
1577 // "CST" / "CDT" / ; -0600 / -0500
1578 // "MST" / "MDT" / ; -0700 / -0600
1579 // "PST" / "PDT" / ; -0800 / -0700
1580 // "A"-"I" / "a"-"i" /
1581 // "K"-"Z" / "k"-"z"
1582
1583 eatCFWS( scursor, send, isCRLF );
1584 if ( scursor == send ) return false;
1585
1586 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
1587 return false;
1588
1589 eatCFWS( scursor, send, isCRLF );
1590 if ( scursor == send ) {
1591 timeZoneKnown = false;
1592 secsEastOfGMT = 0;
1593 return true; // allow missing timezone
1594 }
1595
1596 timeZoneKnown = true;
1597 if ( *scursor == '+' || *scursor == '-' ) {
1598 // remember and eat '-'/'+':
1599 const char sign = *scursor++;
1600 // numerical timezone:
1601 int maybeTimeZone;
1602 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
1603 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
1604 if ( sign == '-' ) {
1605 secsEastOfGMT *= -1;
1606 if ( secsEastOfGMT == 0 )
1607 timeZoneKnown = false; // -0000 means indetermined tz
1608 }
1609 } else {
1610 // maybe alphanumeric timezone:
1611 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
1612 return false;
1613 }
1614 return true;
1615}
1616
1617
1618bool parseDateTime( const char* & scursor, const char * const send,
1619 Types::DateTime & result, bool isCRLF )
1620{
1621 // Parsing date-time; strict mode:
1622 //
1623 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
1624 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1625 // time
1626 //
1627 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1628 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1629 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez"
1630
1631 struct tm maybeDateTime = {
1632#ifdef HAVE_TM_GMTOFF
1633 0, 0, // initializers for members tm_gmtoff and tm_zone
1634#endif
1635 0, 0, 0, 0, 0, 0, 0, 0, 0
1636 };
1637
1638 eatCFWS( scursor, send, isCRLF );
1639 if ( scursor == send ) return false;
1640
1641 //
1642 // let's see if there's a day-of-week:
1643 //
1644 if ( parseDayName( scursor, send ) ) {
1645 eatCFWS( scursor, send, isCRLF );
1646 if ( scursor == send ) return false;
1647 // day-name should be followed by ',' but we treat it as optional:
1648 if ( *scursor == ',' ) {
1649 scursor++; // eat ','
1650 eatCFWS( scursor, send, isCRLF );
1651 }
1652 }
1653
1654 //
1655 // 1*2DIGIT representing "day" (of month):
1656 //
1657 int maybeDay;
1658 if ( !parseDigits( scursor, send, maybeDay ) ) return false;
1659
1660 eatCFWS( scursor, send, isCRLF );
1661 if ( scursor == send ) return false;
1662
1663 // success: store maybeDay in maybeDateTime:
1664 maybeDateTime.tm_mday = maybeDay;
1665
1666 //
1667 // month-name:
1668 //
1669 int maybeMonth = 0;
1670 if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
1671 if ( scursor == send ) return false;
1672 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
1673
1674 eatCFWS( scursor, send, isCRLF );
1675 if ( scursor == send ) return false;
1676
1677 // success: store maybeMonth in maybeDateTime:
1678 maybeDateTime.tm_mon = maybeMonth;
1679
1680 //
1681 // 2*DIGIT representing "year":
1682 //
1683 int maybeYear;
1684 if ( !parseDigits( scursor, send, maybeYear ) ) return false;
1685 // RFC 2822 4.3 processing:
1686 if ( maybeYear < 50 )
1687 maybeYear += 2000;
1688 else if ( maybeYear < 1000 )
1689 maybeYear += 1900;
1690 // else keep as is
1691 if ( maybeYear < 1900 ) return false; // rfc2822, 3.3
1692
1693 eatCFWS( scursor, send, isCRLF );
1694 if ( scursor == send ) return false;
1695
1696 // success: store maybeYear in maybeDateTime:
1697 maybeDateTime.tm_year = maybeYear - 1900;
1698
1699 //
1700 // time
1701 //
1702 int maybeHour, maybeMinute, maybeSecond;
1703 long int secsEastOfGMT;
1704 bool timeZoneKnown = true;
1705
1706 if ( !parseTime( scursor, send,
1707 maybeHour, maybeMinute, maybeSecond,
1708 secsEastOfGMT, timeZoneKnown, isCRLF ) )
1709 return false;
1710
1711 // success: store everything in maybeDateTime:
1712 maybeDateTime.tm_hour = maybeHour;
1713 maybeDateTime.tm_min = maybeMinute;
1714 maybeDateTime.tm_sec = maybeSecond;
1715 maybeDateTime.tm_isdst = DateFormatter::isDaylight();
1716 // now put everything together and check if mktime(3) likes it:
1717 result.time = mktime( &maybeDateTime );
1718 if ( result.time == (time_t)(-1) ) return false;
1719
1720 // adjust to UTC/GMT:
1721 //result.time -= secsEastOfGMT;
1722 result.secsEastOfGMT = secsEastOfGMT;
1723 result.timeZoneKnown = timeZoneKnown;
1724
1725 return true;
1726}
1727
1728#if 0
1729bool tryToMakeAnySenseOfDateString( const char* & scursor,
1730 const char * const send,
1731 time_t & result, bool isCRLF )
1732{
1733 return false;
1734}
1735#endif
1736
1737} // namespace HeaderParsing
1738
1739} // namespace KMime
Abstract base class of codecs like base64 and quoted-printable.
Definition: kmime_codecs.h:57
virtual const char * name() const =0
Stateful decoder class, modelled after TQTextDecoder.
Definition: kmime_codecs.h:268
virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)=0
Decode a chunk of data, maintaining state information between calls.