kmime_parsers.cpp Source File (libkmime)

libkmime

libkmime
/*
    kmime_parsers.cpp
 
    KMime, the KDE internet mail/usenet news message library.
    Copyright (c) 2001 the KMime authors.
    See file AUTHORS for details
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software Foundation,
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
*/
#include "kmime_parsers.h"
 
#include <tqregexp.h>
 
using namespace KMime::Parser;
 
namespace KMime {
namespace Parser {
 
 
MultiPart::MultiPart(const TQCString &src, const TQCString &boundary)
{
  s_rc=src;
  b_oundary=boundary;
}
 
 
bool MultiPart::parse()
{
  TQCString b="--"+b_oundary, part;
  int pos1=0, pos2=0, blen=b.length();
 
  p_arts.clear();
 
  //find the first valid boundary
  while(1) {
    if( (pos1=s_rc.find(b.data(), pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all
      break;
    pos1+=blen; //boundary found but not valid => skip it;
  }
 
  if(pos1>-1) {
    pos1+=blen;
    if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken
      pos1=-1; //we give up
    else if( (pos1-blen)>1 ) //preamble present
      p_reamble=s_rc.left(pos1-blen);
  }
 
 
  while(pos1>-1 && pos2>-1) {
 
    //skip the rest of the line for the first boundary - the message-part starts here
    if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak
      //now find the next valid boundary
      pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
      while(1) {
        if( (pos2=s_rc.find(b.data(), pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found
          break;
        pos2+=blen; //boundary is invalid => skip it;
      }
 
      if(pos2==-1) { // no more boundaries found
        part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string
        p_arts.append(part);
        pos1=-1;
        pos2=-1; //break;
      }
      else {
        part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
        p_arts.append(part);
        pos2+=blen; //pos2 points now to the first charakter after the boundary
        if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary
          pos1=pos2+2; //pos1 points now to the character directly after the end-boundary
          if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line
            e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge
          pos1=-1;
          pos2=-1; //break
        }
        else {
          pos1=pos2; //the search continues ...
        }
      }
    }
  }
 
  return (!p_arts.isEmpty());
}
 
//============================================================================================
 
 
NonMimeParser::NonMimeParser(const TQCString &src) :
  s_rc(src), p_artNr(-1), t_otalNr(-1)
{}
 
TQCString NonMimeParser::guessMimeType(const TQCString& fileName)
{
  TQCString tmp, mimeType;
  int pos;
 
  if(!fileName.isEmpty()) {
    pos=fileName.findRev('.');
    if(pos++ != -1) {
      tmp=fileName.mid(pos, fileName.length()-pos).upper();
      if(tmp=="JPG" || tmp=="JPEG")       mimeType="image/jpeg";
      else if(tmp=="GIF")                 mimeType="image/gif";
      else if(tmp=="PNG")                 mimeType="image/png";
      else if(tmp=="TIFF" || tmp=="TIF")  mimeType="image/tiff";
      else if(tmp=="XPM")                 mimeType="image/x-xpm";
      else if(tmp=="XBM")                 mimeType="image/x-xbm";
      else if(tmp=="BMP")                 mimeType="image/x-bmp";
      else if(tmp=="TXT" ||
              tmp=="ASC" ||
              tmp=="H" ||
              tmp=="C" ||
              tmp=="CC" ||
              tmp=="CPP")                 mimeType="text/plain";
      else if(tmp=="HTML" || tmp=="HTM")  mimeType="text/html";
      else                                mimeType="application/octet-stream";
    }
    else mimeType="application/octet-stream";
  }
  else mimeType="application/octet-stream";
 
  return mimeType;
}
 
//============================================================================================
 
 
UUEncoded::UUEncoded(const TQCString &src, const TQCString &subject) :
  NonMimeParser(src), s_ubject(subject)
{}
 
 
bool UUEncoded::parse()
{
  int currentPos=0;
  bool success=true, firstIteration=true;
 
  while (success) {
    int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
    bool containsBegin=false, containsEnd=false;
    TQCString tmp,fileName;
 
    if( (beginPos=s_rc.find(TQRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) {
      containsBegin=true;
      uuStart=s_rc.find('\n', beginPos);
      if(uuStart==-1) {//no more line breaks found, we give up
        success = false;
        break;
      } else
        uuStart++; //points now at the beginning of the next line
    }
      else beginPos=currentPos;
 
    if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 )
      endPos=s_rc.length(); //no end found
    else
      containsEnd=true;
 
    if ((containsBegin && containsEnd) || firstIteration) {
 
      //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
      //all lines in a uuencoded text start with 'M'
      for(int idx=uuStart; idx<endPos; idx++)
        if(s_rc[idx]=='\n') {
          lineCount++;
          if(idx+1<endPos && s_rc[idx+1]=='M') {
            idx++;
            MCount++;
          }
        }
 
      //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
      if( MCount==0 || (lineCount-MCount)>10 ||
          ((!containsBegin || !containsEnd) && (MCount<15)) ) {  // harder check for splitted-articles
        success = false;
        break; //too many "non-M-Lines" found, we give up
      }
 
      if( (!containsBegin || !containsEnd) && !s_ubject.isNull()) {  // message may be split up => parse subject
    TQRegExp rx("[0-9]+/[0-9]+");
    pos=rx.search(TQString(s_ubject), 0);
    len=rx.matchedLength();
        if(pos!=-1) {
          tmp=s_ubject.mid(pos, len);
          pos=tmp.find('/');
          p_artNr=tmp.left(pos).toInt();
          t_otalNr=tmp.right(tmp.length()-pos-1).toInt();
        } else {
          success = false;
          break; //no "part-numbers" found in the subject, we give up
        }
      }
 
      //everything before "begin" is text
      if(beginPos>0)
        t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
 
      if(containsBegin)
        fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename
      else
        fileName = "";
      f_ilenames.append(fileName);
      b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded     
      m_imeTypes.append(guessMimeType(fileName));
      firstIteration=false;
 
      int next = s_rc.find('\n', endPos+1);
      if(next==-1) { //no more line breaks found, we give up
        success = false;
        break;
      } else
        next++; //points now at the beginning of the next line
      currentPos = next;
 
    } else {
      success = false;
    }
  }
 
  // append trailing text part of the article
  t_ext.append(s_rc.right(s_rc.length()-currentPos));
 
  return ((b_ins.count()>0) || isPartial());
}
 
 
//============================================================================================
 
 
YENCEncoded::YENCEncoded(const TQCString &src) :
  NonMimeParser(src)
{}
 
 
bool YENCEncoded::yencMeta(TQCString& src, const TQCString& name, int* value)
{
  bool found = false;
  TQCString sought=name + "=";
 
  int iPos=src.find( sought.data() );
  if (iPos>-1) {
    int pos1=src.find(' ', iPos);
    int pos2=src.find('\r', iPos);
    int pos3=src.find('\t', iPos);
    int pos4=src.find('\n', iPos);
    if (pos2>=0 && (pos1<0 || pos1>pos2))
      pos1=pos2;
    if (pos3>=0 && (pos1<0 || pos1>pos3))
      pos1=pos3;
    if (pos4>=0 && (pos1<0 || pos1>pos4))
      pos1=pos4;
    iPos=src.findRev( '=', pos1)+1;
    if (iPos<pos1) {
      char c=src.at( iPos);
      if ( c>='0' && c<='9') {
        found=true;
        *value=src.mid( iPos, pos1-iPos).toInt();
      }
    }
  }
  return found;
}
 
 
bool YENCEncoded::parse()
{
  int currentPos=0;
  bool success=true;
 
  while (success) {
    int beginPos=currentPos, yencStart=currentPos;
    bool containsPart=false;
    TQCString fileName,mimeType;
 
    if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) {
      yencStart=s_rc.find( '\n', beginPos);
      if (yencStart==-1) { // no more line breaks found, give up
        success = false;
        break;
      } else {
        yencStart++;
        if (s_rc.find("=ypart", yencStart)==yencStart) {
          containsPart=true;
          yencStart=s_rc.find( '\n', yencStart);
          if ( yencStart== -1) {
            success=false;
            break;
          }
          yencStart++;
        }
      }
      // Try to identify yenc meta data
 
      // Filenames can contain any embedded chars until end of line
      TQCString meta=s_rc.mid(beginPos, yencStart-beginPos);
      int namePos=meta.find("name=");
      if (namePos== -1) {
        success=false;
        break;
      }
      int eolPos=meta.find('\r', namePos);
      if (eolPos== -1)
      eolPos=meta.find('\n', namePos);    
      if (eolPos== -1) {
        success=false;
        break;
      }
      fileName=meta.mid(namePos+5, eolPos-(namePos+5));
 
      // Other metadata is integer
      int yencLine;
      if (!yencMeta(meta, "line", &yencLine)) {
        success=false;
        break;
      }
      int yencSize;
      if (!yencMeta( meta, "size", &yencSize)) {
        success=false;
        break;
      }
 
      int partBegin, partEnd;
      if (containsPart) {
        if (!yencMeta(meta, "part", &p_artNr)) {
          success=false;
          break;
        }
        if (!yencMeta(meta, "begin", &partBegin) || !
             yencMeta(meta, "end", &partEnd)) {
          success=false;
          break;
        }
        if (!yencMeta(meta, "total", &t_otalNr))
          t_otalNr=p_artNr+1;
        if (yencSize==partEnd-partBegin+1)
          t_otalNr=1; else
        yencSize=partEnd-partBegin+1;
      }
 
      // We have a valid yenc header; now we extract the binary data
      int totalSize=0;
      int pos=yencStart;
      int len=s_rc.length();
      bool lineStart=true;
      int lineLength=0;
      bool containsEnd=false;
      TQByteArray binary = TQByteArray(yencSize);
      while (pos<len) {
        int ch=s_rc.at(pos);
        if (ch<0)
          ch+=256;
        if (ch=='\r')
        {
          if (lineLength!=yencLine && totalSize!=yencSize)          
            break;          
          pos++;
        }
        else if (ch=='\n')
        {
          lineStart=true;
          lineLength=0;
          pos++;
        }
        else
        {
          if (ch=='=')
          {
            if (pos+1<len)
            {
              ch=s_rc.at( pos+1);
              if (lineStart && ch=='y')
              {
                containsEnd=true;
                break;
              }
              pos+=2;
              ch-=64+42;
              if (ch<0)
                ch+=256;
              if (totalSize>=yencSize)            
                break;            
              binary.at(totalSize++)=ch;
              lineLength++;
            }
            else            
              break;            
          }
          else
          {
            ch-=42;
            if (ch<0)
              ch+=256;
            if (totalSize>=yencSize)            
              break;
            binary.at(totalSize++)=ch;
            lineLength++;
            pos++;
          }
          lineStart=false;
        }
      }
      
      if (!containsEnd)
      {
        success=false;
        break;
      }
      if (totalSize!=yencSize)
      {        
        success=false;
        break;
      }
 
      // pos now points to =yend; get end data
      eolPos=s_rc.find('\n', pos);
      if (eolPos== -1)
      {
        success=false;
        break;
      }
      meta=s_rc.mid(pos, eolPos-pos);
      if (!yencMeta(meta, "size", &totalSize))
      {        
        success=false;
        break;
      }
      if (totalSize!=yencSize)
      {        
        success=false;
        break;
      }
 
      f_ilenames.append(fileName);
      m_imeTypes.append(guessMimeType( fileName));
      b_ins.append(binary);
 
      //everything before "begin" is text
      if(beginPos>0)
        t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
      currentPos = eolPos+1;
 
    } else {
      success = false;
    }
  }
 
  // append trailing text part of the article
  t_ext.append(s_rc.right(s_rc.length()-currentPos));
 
  return b_ins.count()>0;
}
 
} // namespace Parser
} // namespace KMime