libkmime

kmime_parsers.cpp
1 /*
2  kmime_parsers.cpp
3 
4  KMime, the KDE internet mail/usenet news message library.
5  Copyright (c) 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software Foundation,
14  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
15 */
16 #include "kmime_parsers.h"
17 
18 #include <tqregexp.h>
19 
20 using namespace KMime::Parser;
21 
22 namespace KMime {
23 namespace Parser {
24 
25 
26 MultiPart::MultiPart(const TQCString &src, const TQCString &boundary)
27 {
28  s_rc=src;
29  b_oundary=boundary;
30 }
31 
32 
33 bool MultiPart::parse()
34 {
35  TQCString b="--"+b_oundary, part;
36  int pos1=0, pos2=0, blen=b.length();
37 
38  p_arts.clear();
39 
40  //find the first valid boundary
41  while(1) {
42  if( (pos1=s_rc.find(b.data(), pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all
43  break;
44  pos1+=blen; //boundary found but not valid => skip it;
45  }
46 
47  if(pos1>-1) {
48  pos1+=blen;
49  if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken
50  pos1=-1; //we give up
51  else if( (pos1-blen)>1 ) //preamble present
52  p_reamble=s_rc.left(pos1-blen);
53  }
54 
55 
56  while(pos1>-1 && pos2>-1) {
57 
58  //skip the rest of the line for the first boundary - the message-part starts here
59  if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak
60  //now find the next valid boundary
61  pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
62  while(1) {
63  if( (pos2=s_rc.find(b.data(), pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found
64  break;
65  pos2+=blen; //boundary is invalid => skip it;
66  }
67 
68  if(pos2==-1) { // no more boundaries found
69  part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string
70  p_arts.append(part);
71  pos1=-1;
72  pos2=-1; //break;
73  }
74  else {
75  part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
76  p_arts.append(part);
77  pos2+=blen; //pos2 points now to the first charakter after the boundary
78  if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary
79  pos1=pos2+2; //pos1 points now to the character directly after the end-boundary
80  if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line
81  e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge
82  pos1=-1;
83  pos2=-1; //break
84  }
85  else {
86  pos1=pos2; //the search continues ...
87  }
88  }
89  }
90  }
91 
92  return (!p_arts.isEmpty());
93 }
94 
95 //============================================================================================
96 
97 
98 NonMimeParser::NonMimeParser(const TQCString &src) :
99  s_rc(src), p_artNr(-1), t_otalNr(-1)
100 {}
101 
105 TQCString NonMimeParser::guessMimeType(const TQCString& fileName)
106 {
107  TQCString tmp, mimeType;
108  int pos;
109 
110  if(!fileName.isEmpty()) {
111  pos=fileName.findRev('.');
112  if(pos++ != -1) {
113  tmp=fileName.mid(pos, fileName.length()-pos).upper();
114  if(tmp=="JPG" || tmp=="JPEG") mimeType="image/jpeg";
115  else if(tmp=="GIF") mimeType="image/gif";
116  else if(tmp=="PNG") mimeType="image/png";
117  else if(tmp=="TIFF" || tmp=="TIF") mimeType="image/tiff";
118  else if(tmp=="XPM") mimeType="image/x-xpm";
119  else if(tmp=="XBM") mimeType="image/x-xbm";
120  else if(tmp=="BMP") mimeType="image/x-bmp";
121  else if(tmp=="TXT" ||
122  tmp=="ASC" ||
123  tmp=="H" ||
124  tmp=="C" ||
125  tmp=="CC" ||
126  tmp=="CPP") mimeType="text/plain";
127  else if(tmp=="HTML" || tmp=="HTM") mimeType="text/html";
128  else mimeType="application/octet-stream";
129  }
130  else mimeType="application/octet-stream";
131  }
132  else mimeType="application/octet-stream";
133 
134  return mimeType;
135 }
136 
137 //============================================================================================
138 
139 
140 UUEncoded::UUEncoded(const TQCString &src, const TQCString &subject) :
141  NonMimeParser(src), s_ubject(subject)
142 {}
143 
144 
145 bool UUEncoded::parse()
146 {
147  int currentPos=0;
148  bool success=true, firstIteration=true;
149 
150  while (success) {
151  int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
152  bool containsBegin=false, containsEnd=false;
153  TQCString tmp,fileName;
154 
155  if( (beginPos=s_rc.find(TQRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) {
156  containsBegin=true;
157  uuStart=s_rc.find('\n', beginPos);
158  if(uuStart==-1) {//no more line breaks found, we give up
159  success = false;
160  break;
161  } else
162  uuStart++; //points now at the beginning of the next line
163  }
164  else beginPos=currentPos;
165 
166  if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 )
167  endPos=s_rc.length(); //no end found
168  else
169  containsEnd=true;
170 
171  if ((containsBegin && containsEnd) || firstIteration) {
172 
173  //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
174  //all lines in a uuencoded text start with 'M'
175  for(int idx=uuStart; idx<endPos; idx++)
176  if(s_rc[idx]=='\n') {
177  lineCount++;
178  if(idx+1<endPos && s_rc[idx+1]=='M') {
179  idx++;
180  MCount++;
181  }
182  }
183 
184  //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
185  if( MCount==0 || (lineCount-MCount)>10 ||
186  ((!containsBegin || !containsEnd) && (MCount<15)) ) { // harder check for splitted-articles
187  success = false;
188  break; //too many "non-M-Lines" found, we give up
189  }
190 
191  if( (!containsBegin || !containsEnd) && !s_ubject.isNull()) { // message may be split up => parse subject
192  TQRegExp rx("[0-9]+/[0-9]+");
193  pos=rx.search(TQString(s_ubject), 0);
194  len=rx.matchedLength();
195  if(pos!=-1) {
196  tmp=s_ubject.mid(pos, len);
197  pos=tmp.find('/');
198  p_artNr=tmp.left(pos).toInt();
199  t_otalNr=tmp.right(tmp.length()-pos-1).toInt();
200  } else {
201  success = false;
202  break; //no "part-numbers" found in the subject, we give up
203  }
204  }
205 
206  //everything before "begin" is text
207  if(beginPos>0)
208  t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
209 
210  if(containsBegin)
211  fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename
212  else
213  fileName = "";
214  f_ilenames.append(fileName);
215  b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded
216  m_imeTypes.append(guessMimeType(fileName));
217  firstIteration=false;
218 
219  int next = s_rc.find('\n', endPos+1);
220  if(next==-1) { //no more line breaks found, we give up
221  success = false;
222  break;
223  } else
224  next++; //points now at the beginning of the next line
225  currentPos = next;
226 
227  } else {
228  success = false;
229  }
230  }
231 
232  // append trailing text part of the article
233  t_ext.append(s_rc.right(s_rc.length()-currentPos));
234 
235  return ((b_ins.count()>0) || isPartial());
236 }
237 
238 
239 //============================================================================================
240 
241 
242 YENCEncoded::YENCEncoded(const TQCString &src) :
243  NonMimeParser(src)
244 {}
245 
246 
247 bool YENCEncoded::yencMeta(TQCString& src, const TQCString& name, int* value)
248 {
249  bool found = false;
250  TQCString sought=name + "=";
251 
252  int iPos=src.find( sought.data() );
253  if (iPos>-1) {
254  int pos1=src.find(' ', iPos);
255  int pos2=src.find('\r', iPos);
256  int pos3=src.find('\t', iPos);
257  int pos4=src.find('\n', iPos);
258  if (pos2>=0 && (pos1<0 || pos1>pos2))
259  pos1=pos2;
260  if (pos3>=0 && (pos1<0 || pos1>pos3))
261  pos1=pos3;
262  if (pos4>=0 && (pos1<0 || pos1>pos4))
263  pos1=pos4;
264  iPos=src.findRev( '=', pos1)+1;
265  if (iPos<pos1) {
266  char c=src.at( iPos);
267  if ( c>='0' && c<='9') {
268  found=true;
269  *value=src.mid( iPos, pos1-iPos).toInt();
270  }
271  }
272  }
273  return found;
274 }
275 
276 
277 bool YENCEncoded::parse()
278 {
279  int currentPos=0;
280  bool success=true;
281 
282  while (success) {
283  int beginPos=currentPos, yencStart=currentPos;
284  bool containsPart=false;
285  TQCString fileName,mimeType;
286 
287  if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) {
288  yencStart=s_rc.find( '\n', beginPos);
289  if (yencStart==-1) { // no more line breaks found, give up
290  success = false;
291  break;
292  } else {
293  yencStart++;
294  if (s_rc.find("=ypart", yencStart)==yencStart) {
295  containsPart=true;
296  yencStart=s_rc.find( '\n', yencStart);
297  if ( yencStart== -1) {
298  success=false;
299  break;
300  }
301  yencStart++;
302  }
303  }
304  // Try to identify yenc meta data
305 
306  // Filenames can contain any embedded chars until end of line
307  TQCString meta=s_rc.mid(beginPos, yencStart-beginPos);
308  int namePos=meta.find("name=");
309  if (namePos== -1) {
310  success=false;
311  break;
312  }
313  int eolPos=meta.find('\r', namePos);
314  if (eolPos== -1)
315  eolPos=meta.find('\n', namePos);
316  if (eolPos== -1) {
317  success=false;
318  break;
319  }
320  fileName=meta.mid(namePos+5, eolPos-(namePos+5));
321 
322  // Other metadata is integer
323  int yencLine;
324  if (!yencMeta(meta, "line", &yencLine)) {
325  success=false;
326  break;
327  }
328  int yencSize;
329  if (!yencMeta( meta, "size", &yencSize)) {
330  success=false;
331  break;
332  }
333 
334  int partBegin, partEnd;
335  if (containsPart) {
336  if (!yencMeta(meta, "part", &p_artNr)) {
337  success=false;
338  break;
339  }
340  if (!yencMeta(meta, "begin", &partBegin) || !
341  yencMeta(meta, "end", &partEnd)) {
342  success=false;
343  break;
344  }
345  if (!yencMeta(meta, "total", &t_otalNr))
346  t_otalNr=p_artNr+1;
347  if (yencSize==partEnd-partBegin+1)
348  t_otalNr=1; else
349  yencSize=partEnd-partBegin+1;
350  }
351 
352  // We have a valid yenc header; now we extract the binary data
353  int totalSize=0;
354  int pos=yencStart;
355  int len=s_rc.length();
356  bool lineStart=true;
357  int lineLength=0;
358  bool containsEnd=false;
359  TQByteArray binary = TQByteArray(yencSize);
360  while (pos<len) {
361  int ch=s_rc.at(pos);
362  if (ch<0)
363  ch+=256;
364  if (ch=='\r')
365  {
366  if (lineLength!=yencLine && totalSize!=yencSize)
367  break;
368  pos++;
369  }
370  else if (ch=='\n')
371  {
372  lineStart=true;
373  lineLength=0;
374  pos++;
375  }
376  else
377  {
378  if (ch=='=')
379  {
380  if (pos+1<len)
381  {
382  ch=s_rc.at( pos+1);
383  if (lineStart && ch=='y')
384  {
385  containsEnd=true;
386  break;
387  }
388  pos+=2;
389  ch-=64+42;
390  if (ch<0)
391  ch+=256;
392  if (totalSize>=yencSize)
393  break;
394  binary.at(totalSize++)=ch;
395  lineLength++;
396  }
397  else
398  break;
399  }
400  else
401  {
402  ch-=42;
403  if (ch<0)
404  ch+=256;
405  if (totalSize>=yencSize)
406  break;
407  binary.at(totalSize++)=ch;
408  lineLength++;
409  pos++;
410  }
411  lineStart=false;
412  }
413  }
414 
415  if (!containsEnd)
416  {
417  success=false;
418  break;
419  }
420  if (totalSize!=yencSize)
421  {
422  success=false;
423  break;
424  }
425 
426  // pos now points to =yend; get end data
427  eolPos=s_rc.find('\n', pos);
428  if (eolPos== -1)
429  {
430  success=false;
431  break;
432  }
433  meta=s_rc.mid(pos, eolPos-pos);
434  if (!yencMeta(meta, "size", &totalSize))
435  {
436  success=false;
437  break;
438  }
439  if (totalSize!=yencSize)
440  {
441  success=false;
442  break;
443  }
444 
445  f_ilenames.append(fileName);
446  m_imeTypes.append(guessMimeType( fileName));
447  b_ins.append(binary);
448 
449  //everything before "begin" is text
450  if(beginPos>0)
451  t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
452  currentPos = eolPos+1;
453 
454  } else {
455  success = false;
456  }
457  }
458 
459  // append trailing text part of the article
460  t_ext.append(s_rc.right(s_rc.length()-currentPos));
461 
462  return b_ins.count()>0;
463 }
464 
465 } // namespace Parser
466 } // namespace KMime
Helper-class: abstract base class of all parsers for non-mime binary data (uuencoded,...
Definition: kmime_parsers.h:52
static TQCString guessMimeType(const TQCString &fileName)
try to guess the mimetype from the file-extension