libkmime

kmime_parsers.cpp
1/*
2 kmime_parsers.cpp
3
4 KMime, the KDE internet mail/usenet news message library.
5 Copyright (c) 2001 the KMime authors.
6 See file AUTHORS for details
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software Foundation,
14 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
15*/
16#include "kmime_parsers.h"
17
18#include <tqregexp.h>
19
20using namespace KMime::Parser;
21
22namespace KMime {
23namespace Parser {
24
25
26MultiPart::MultiPart(const TQCString &src, const TQCString &boundary)
27{
28 s_rc=src;
29 b_oundary=boundary;
30}
31
32
33bool MultiPart::parse()
34{
35 TQCString b="--"+b_oundary, part;
36 int pos1=0, pos2=0, blen=b.length();
37
38 p_arts.clear();
39
40 //find the first valid boundary
41 while(1) {
42 if( (pos1=s_rc.find(b.data(), pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all
43 break;
44 pos1+=blen; //boundary found but not valid => skip it;
45 }
46
47 if(pos1>-1) {
48 pos1+=blen;
49 if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken
50 pos1=-1; //we give up
51 else if( (pos1-blen)>1 ) //preamble present
52 p_reamble=s_rc.left(pos1-blen);
53 }
54
55
56 while(pos1>-1 && pos2>-1) {
57
58 //skip the rest of the line for the first boundary - the message-part starts here
59 if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak
60 //now find the next valid boundary
61 pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
62 while(1) {
63 if( (pos2=s_rc.find(b.data(), pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found
64 break;
65 pos2+=blen; //boundary is invalid => skip it;
66 }
67
68 if(pos2==-1) { // no more boundaries found
69 part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string
70 p_arts.append(part);
71 pos1=-1;
72 pos2=-1; //break;
73 }
74 else {
75 part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
76 p_arts.append(part);
77 pos2+=blen; //pos2 points now to the first charakter after the boundary
78 if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary
79 pos1=pos2+2; //pos1 points now to the character directly after the end-boundary
80 if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line
81 e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge
82 pos1=-1;
83 pos2=-1; //break
84 }
85 else {
86 pos1=pos2; //the search continues ...
87 }
88 }
89 }
90 }
91
92 return (!p_arts.isEmpty());
93}
94
95//============================================================================================
96
97
98NonMimeParser::NonMimeParser(const TQCString &src) :
99 s_rc(src), p_artNr(-1), t_otalNr(-1)
100{}
101
105TQCString NonMimeParser::guessMimeType(const TQCString& fileName)
106{
107 TQCString tmp, mimeType;
108 int pos;
109
110 if(!fileName.isEmpty()) {
111 pos=fileName.findRev('.');
112 if(pos++ != -1) {
113 tmp=fileName.mid(pos, fileName.length()-pos).upper();
114 if(tmp=="JPG" || tmp=="JPEG") mimeType="image/jpeg";
115 else if(tmp=="GIF") mimeType="image/gif";
116 else if(tmp=="PNG") mimeType="image/png";
117 else if(tmp=="TIFF" || tmp=="TIF") mimeType="image/tiff";
118 else if(tmp=="XPM") mimeType="image/x-xpm";
119 else if(tmp=="XBM") mimeType="image/x-xbm";
120 else if(tmp=="BMP") mimeType="image/x-bmp";
121 else if(tmp=="TXT" ||
122 tmp=="ASC" ||
123 tmp=="H" ||
124 tmp=="C" ||
125 tmp=="CC" ||
126 tmp=="CPP") mimeType="text/plain";
127 else if(tmp=="HTML" || tmp=="HTM") mimeType="text/html";
128 else mimeType="application/octet-stream";
129 }
130 else mimeType="application/octet-stream";
131 }
132 else mimeType="application/octet-stream";
133
134 return mimeType;
135}
136
137//============================================================================================
138
139
140UUEncoded::UUEncoded(const TQCString &src, const TQCString &subject) :
141 NonMimeParser(src), s_ubject(subject)
142{}
143
144
145bool UUEncoded::parse()
146{
147 int currentPos=0;
148 bool success=true, firstIteration=true;
149
150 while (success) {
151 int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
152 bool containsBegin=false, containsEnd=false;
153 TQCString tmp,fileName;
154
155 if( (beginPos=s_rc.find(TQRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) {
156 containsBegin=true;
157 uuStart=s_rc.find('\n', beginPos);
158 if(uuStart==-1) {//no more line breaks found, we give up
159 success = false;
160 break;
161 } else
162 uuStart++; //points now at the beginning of the next line
163 }
164 else beginPos=currentPos;
165
166 if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 )
167 endPos=s_rc.length(); //no end found
168 else
169 containsEnd=true;
170
171 if ((containsBegin && containsEnd) || firstIteration) {
172
173 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
174 //all lines in a uuencoded text start with 'M'
175 for(int idx=uuStart; idx<endPos; idx++)
176 if(s_rc[idx]=='\n') {
177 lineCount++;
178 if(idx+1<endPos && s_rc[idx+1]=='M') {
179 idx++;
180 MCount++;
181 }
182 }
183
184 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
185 if( MCount==0 || (lineCount-MCount)>10 ||
186 ((!containsBegin || !containsEnd) && (MCount<15)) ) { // harder check for splitted-articles
187 success = false;
188 break; //too many "non-M-Lines" found, we give up
189 }
190
191 if( (!containsBegin || !containsEnd) && !s_ubject.isNull()) { // message may be split up => parse subject
192 TQRegExp rx("[0-9]+/[0-9]+");
193 pos=rx.search(TQString(s_ubject), 0);
194 len=rx.matchedLength();
195 if(pos!=-1) {
196 tmp=s_ubject.mid(pos, len);
197 pos=tmp.find('/');
198 p_artNr=tmp.left(pos).toInt();
199 t_otalNr=tmp.right(tmp.length()-pos-1).toInt();
200 } else {
201 success = false;
202 break; //no "part-numbers" found in the subject, we give up
203 }
204 }
205
206 //everything before "begin" is text
207 if(beginPos>0)
208 t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
209
210 if(containsBegin)
211 fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename
212 else
213 fileName = "";
214 f_ilenames.append(fileName);
215 b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded
216 m_imeTypes.append(guessMimeType(fileName));
217 firstIteration=false;
218
219 int next = s_rc.find('\n', endPos+1);
220 if(next==-1) { //no more line breaks found, we give up
221 success = false;
222 break;
223 } else
224 next++; //points now at the beginning of the next line
225 currentPos = next;
226
227 } else {
228 success = false;
229 }
230 }
231
232 // append trailing text part of the article
233 t_ext.append(s_rc.right(s_rc.length()-currentPos));
234
235 return ((b_ins.count()>0) || isPartial());
236}
237
238
239//============================================================================================
240
241
242YENCEncoded::YENCEncoded(const TQCString &src) :
243 NonMimeParser(src)
244{}
245
246
247bool YENCEncoded::yencMeta(TQCString& src, const TQCString& name, int* value)
248{
249 bool found = false;
250 TQCString sought=name + "=";
251
252 int iPos=src.find( sought.data() );
253 if (iPos>-1) {
254 int pos1=src.find(' ', iPos);
255 int pos2=src.find('\r', iPos);
256 int pos3=src.find('\t', iPos);
257 int pos4=src.find('\n', iPos);
258 if (pos2>=0 && (pos1<0 || pos1>pos2))
259 pos1=pos2;
260 if (pos3>=0 && (pos1<0 || pos1>pos3))
261 pos1=pos3;
262 if (pos4>=0 && (pos1<0 || pos1>pos4))
263 pos1=pos4;
264 iPos=src.findRev( '=', pos1)+1;
265 if (iPos<pos1) {
266 char c=src.at( iPos);
267 if ( c>='0' && c<='9') {
268 found=true;
269 *value=src.mid( iPos, pos1-iPos).toInt();
270 }
271 }
272 }
273 return found;
274}
275
276
277bool YENCEncoded::parse()
278{
279 int currentPos=0;
280 bool success=true;
281
282 while (success) {
283 int beginPos=currentPos, yencStart=currentPos;
284 bool containsPart=false;
285 TQCString fileName,mimeType;
286
287 if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) {
288 yencStart=s_rc.find( '\n', beginPos);
289 if (yencStart==-1) { // no more line breaks found, give up
290 success = false;
291 break;
292 } else {
293 yencStart++;
294 if (s_rc.find("=ypart", yencStart)==yencStart) {
295 containsPart=true;
296 yencStart=s_rc.find( '\n', yencStart);
297 if ( yencStart== -1) {
298 success=false;
299 break;
300 }
301 yencStart++;
302 }
303 }
304 // Try to identify yenc meta data
305
306 // Filenames can contain any embedded chars until end of line
307 TQCString meta=s_rc.mid(beginPos, yencStart-beginPos);
308 int namePos=meta.find("name=");
309 if (namePos== -1) {
310 success=false;
311 break;
312 }
313 int eolPos=meta.find('\r', namePos);
314 if (eolPos== -1)
315 eolPos=meta.find('\n', namePos);
316 if (eolPos== -1) {
317 success=false;
318 break;
319 }
320 fileName=meta.mid(namePos+5, eolPos-(namePos+5));
321
322 // Other metadata is integer
323 int yencLine;
324 if (!yencMeta(meta, "line", &yencLine)) {
325 success=false;
326 break;
327 }
328 int yencSize;
329 if (!yencMeta( meta, "size", &yencSize)) {
330 success=false;
331 break;
332 }
333
334 int partBegin, partEnd;
335 if (containsPart) {
336 if (!yencMeta(meta, "part", &p_artNr)) {
337 success=false;
338 break;
339 }
340 if (!yencMeta(meta, "begin", &partBegin) || !
341 yencMeta(meta, "end", &partEnd)) {
342 success=false;
343 break;
344 }
345 if (!yencMeta(meta, "total", &t_otalNr))
346 t_otalNr=p_artNr+1;
347 if (yencSize==partEnd-partBegin+1)
348 t_otalNr=1; else
349 yencSize=partEnd-partBegin+1;
350 }
351
352 // We have a valid yenc header; now we extract the binary data
353 int totalSize=0;
354 int pos=yencStart;
355 int len=s_rc.length();
356 bool lineStart=true;
357 int lineLength=0;
358 bool containsEnd=false;
359 TQByteArray binary = TQByteArray(yencSize);
360 while (pos<len) {
361 int ch=s_rc.at(pos);
362 if (ch<0)
363 ch+=256;
364 if (ch=='\r')
365 {
366 if (lineLength!=yencLine && totalSize!=yencSize)
367 break;
368 pos++;
369 }
370 else if (ch=='\n')
371 {
372 lineStart=true;
373 lineLength=0;
374 pos++;
375 }
376 else
377 {
378 if (ch=='=')
379 {
380 if (pos+1<len)
381 {
382 ch=s_rc.at( pos+1);
383 if (lineStart && ch=='y')
384 {
385 containsEnd=true;
386 break;
387 }
388 pos+=2;
389 ch-=64+42;
390 if (ch<0)
391 ch+=256;
392 if (totalSize>=yencSize)
393 break;
394 binary.at(totalSize++)=ch;
395 lineLength++;
396 }
397 else
398 break;
399 }
400 else
401 {
402 ch-=42;
403 if (ch<0)
404 ch+=256;
405 if (totalSize>=yencSize)
406 break;
407 binary.at(totalSize++)=ch;
408 lineLength++;
409 pos++;
410 }
411 lineStart=false;
412 }
413 }
414
415 if (!containsEnd)
416 {
417 success=false;
418 break;
419 }
420 if (totalSize!=yencSize)
421 {
422 success=false;
423 break;
424 }
425
426 // pos now points to =yend; get end data
427 eolPos=s_rc.find('\n', pos);
428 if (eolPos== -1)
429 {
430 success=false;
431 break;
432 }
433 meta=s_rc.mid(pos, eolPos-pos);
434 if (!yencMeta(meta, "size", &totalSize))
435 {
436 success=false;
437 break;
438 }
439 if (totalSize!=yencSize)
440 {
441 success=false;
442 break;
443 }
444
445 f_ilenames.append(fileName);
446 m_imeTypes.append(guessMimeType( fileName));
447 b_ins.append(binary);
448
449 //everything before "begin" is text
450 if(beginPos>0)
451 t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
452 currentPos = eolPos+1;
453
454 } else {
455 success = false;
456 }
457 }
458
459 // append trailing text part of the article
460 t_ext.append(s_rc.right(s_rc.length()-currentPos));
461
462 return b_ins.count()>0;
463}
464
465} // namespace Parser
466} // namespace KMime
Helper-class: abstract base class of all parsers for non-mime binary data (uuencoded,...
Definition: kmime_parsers.h:52
static TQCString guessMimeType(const TQCString &fileName)
try to guess the mimetype from the file-extension