libtdepim

linklocator.cpp
1 
23 #include "linklocator.h"
24 #include "pimemoticons.h"
25 #include <tdeversion.h>
26 #include <tdeglobal.h>
27 #include <kstandarddirs.h>
28 #include <kstaticdeleter.h>
29 #include <kmdcodec.h>
30 #include <kdebug.h>
31 
32 #include <tqstylesheet.h>
33 #include <tqfile.h>
34 #include <tqregexp.h>
35 
36 #include <limits.h>
37 
38 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
39 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
40 
41 static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
42 static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
43 
44 LinkLocator::LinkLocator(const TQString& text, int pos)
45  : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
46 {
47  // If you change either of the above values for maxUrlLen or
48  // maxAddressLen, then please also update the documentation for
49  // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
50  // default values used for the maxUrlLen/maxAddressLen parameters
51  // of convertToHtml().
52 
53  if ( !s_smileyEmoticonNameMap ) {
54  smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
55  new TQMap<TQString, TQString>() );
56  for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
57  TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
58  imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
59  s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
60  }
61  }
62 
63  if ( !s_smileyEmoticonHTMLCache )
64  smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
65  new TQMap<TQString, TQString>() );
66 }
67 
68 void LinkLocator::setMaxUrlLen(int length)
69 {
70  mMaxUrlLen = length;
71 }
72 
74 {
75  return mMaxUrlLen;
76 }
77 
79 {
80  mMaxAddressLen = length;
81 }
82 
84 {
85  return mMaxAddressLen;
86 }
87 
89 {
90  TQString url;
91  if(atUrl())
92  {
93  // handle cases like this: <link>http://foobar.org/</link>
94  int start = mPos;
95  while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
96  TQString("<>()[]").find(mText[mPos]) == -1)
97  {
98  ++mPos;
99  }
100  /* some URLs really end with: # / & - _ */
101  const TQString allowedSpecialChars = TQString("#/&-_");
102  while(mPos > start && mText[mPos-1].isPunct() &&
103  allowedSpecialChars.find(mText[mPos-1]) == -1 )
104  {
105  --mPos;
106  }
107 
108  url = mText.mid(start, mPos - start);
109  if(isEmptyUrl(url) || mPos - start > maxUrlLen())
110  {
111  mPos = start;
112  url = "";
113  }
114  else
115  {
116  --mPos;
117  }
118  }
119  return url;
120 }
121 
122 // keep this in sync with KMMainWin::slotUrlClicked()
123 bool LinkLocator::atUrl() const
124 {
125  // the following characters are allowed in a dot-atom (RFC 2822):
126  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
127  const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
128 
129  // the character directly before the URL must not be a letter, a number or
130  // any other character allowed in a dot-atom (RFC 2822).
131  if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
132  ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
133  return false;
134 
135  TQChar ch = mText[mPos];
136  return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
137  mText.mid(mPos, 8) == "https://") ) ||
138  (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
139  (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
140  mText.mid(mPos, 6) == "ftp://" ||
141  mText.mid(mPos, 7) == "ftps://") ) ||
142  (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
143  mText.mid(mPos, 6) == "smb://") ) ||
144  (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
145  (ch=='w' && mText.mid(mPos, 4) == "www.") ||
146  (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
147  (ch=='n' && mText.mid(mPos, 5) == "news:");
148  // note: no "file:" for security reasons
149 }
150 
151 bool LinkLocator::isEmptyUrl(const TQString& url)
152 {
153  return url.isEmpty() ||
154  url == "http://" ||
155  url == "https://" ||
156  url == "fish://" ||
157  url == "ftp://" ||
158  url == "ftps://" ||
159  url == "sftp://" ||
160  url == "smb://" ||
161  url == "vnc://" ||
162  url == "mailto" ||
163  url == "www" ||
164  url == "ftp" ||
165  url == "news" ||
166  url == "news://";
167 }
168 
170 {
171  TQString address;
172 
173  if ( mText[mPos] == '@' ) {
174  // the following characters are allowed in a dot-atom (RFC 2822):
175  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
176  const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
177 
178  // determine the local part of the email address
179  int start = mPos - 1;
180  while ( start >= 0 && mText[start].unicode() < 128 &&
181  ( mText[start].isLetterOrNumber() ||
182  mText[start] == '@' || // allow @ to find invalid email addresses
183  allowedSpecialChars.find( mText[start] ) != -1 ) ) {
184  if ( mText[start] == '@' )
185  return TQString(); // local part contains '@' -> no email address
186  --start;
187  }
188  ++start;
189  // we assume that an email address starts with a letter or a digit
190  while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
191  ++start;
192  if ( start == mPos )
193  return TQString(); // local part is empty -> no email address
194 
195  // determine the domain part of the email address
196  int dotPos = INT_MAX;
197  int end = mPos + 1;
198  while ( end < (int)mText.length() &&
199  ( mText[end].isLetterOrNumber() ||
200  mText[end] == '@' || // allow @ to find invalid email addresses
201  mText[end] == '.' ||
202  mText[end] == '-' ) ) {
203  if ( mText[end] == '@' )
204  return TQString(); // domain part contains '@' -> no email address
205  if ( mText[end] == '.' )
206  dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
207  ++end;
208  }
209  // we assume that an email address ends with a letter or a digit
210  while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
211  --end;
212  if ( end == mPos )
213  return TQString(); // domain part is empty -> no email address
214  if ( dotPos >= end )
215  return TQString(); // domain part doesn't contain a dot
216 
217  if ( end - start > maxAddressLen() )
218  return TQString(); // too long -> most likely no email address
219  address = mText.mid( start, end - start );
220 
221  mPos = end - 1;
222  }
223  return address;
224 }
225 
226 TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
227  int maxUrlLen, int maxAddressLen)
228 {
229  LinkLocator locator(plainText);
230  locator.setMaxUrlLen(maxUrlLen);
232 
233  TQString str;
234  TQString result((TQChar*)0, (int)locator.mText.length() * 2);
235  TQChar ch;
236  int x;
237  bool startOfLine = true;
238  TQString emoticon;
239 
240  for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
241  {
242  ch = locator.mText[locator.mPos];
243  if ( flags & PreserveSpaces )
244  {
245  if (ch==' ')
246  {
247  if (startOfLine) {
248  result += "&nbsp;";
249  locator.mPos++, x++;
250  startOfLine = false;
251  }
252  while (locator.mText[locator.mPos] == ' ')
253  {
254  result += " ";
255  locator.mPos++, x++;
256  if (locator.mText[locator.mPos] == ' ') {
257  result += "&nbsp;";
258  locator.mPos++, x++;
259  }
260  }
261  locator.mPos--, x--;
262  continue;
263  }
264  else if (ch=='\t')
265  {
266  do
267  {
268  result += "&nbsp;";
269  x++;
270  }
271  while((x&7) != 0);
272  x--;
273  startOfLine = false;
274  continue;
275  }
276  }
277  if (ch=='\n')
278  {
279  result += "<br />";
280  startOfLine = true;
281  x = -1;
282  continue;
283  }
284 
285  startOfLine = false;
286  if (ch=='&')
287  result += "&amp;";
288  else if (ch=='"')
289  result += "&quot;";
290  else if (ch=='<')
291  result += "&lt;";
292  else if (ch=='>')
293  result += "&gt;";
294  else
295  {
296  const int start = locator.mPos;
297  if ( !(flags & IgnoreUrls) ) {
298  str = locator.getUrl();
299  if (!str.isEmpty())
300  {
301  TQString hyperlink;
302  if(str.left(4) == "www.")
303  hyperlink = "http://" + str;
304  else if(str.left(4) == "ftp.")
305  hyperlink = "ftp://" + str;
306  else
307  hyperlink = str;
308 
309  str = str.replace('&', "&amp;");
310  result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
311  x += locator.mPos - start;
312  continue;
313  }
314  str = locator.getEmailAddress();
315  if(!str.isEmpty())
316  {
317  // len is the length of the local part
318  int len = str.find('@');
319  TQString localPart = str.left(len);
320 
321  // remove the local part from the result (as '&'s have been expanded to
322  // &amp; we have to take care of the 4 additional characters per '&')
323  result.truncate(result.length() - len - (localPart.contains('&')*4));
324  x -= len;
325 
326  result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
327  x += str.length() - 1;
328  continue;
329  }
330  }
331  if ( flags & ReplaceSmileys ) {
332  str = locator.getEmoticon();
333  if ( ! str.isEmpty() ) {
334  result += str;
335  x += locator.mPos - start;
336  continue;
337  }
338  }
339  if ( flags & HighlightText ) {
340  str = locator.highlightedText();
341  if ( !str.isEmpty() ) {
342  result += str;
343  x += locator.mPos - start;
344  continue;
345  }
346  }
347  result += ch;
348  }
349  }
350 
351  return result;
352 }
353 
354 TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
355 {
356  if ( iconPath.isEmpty() )
357  return TQString();
358 
359  TQFile pngFile( iconPath );
360  if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
361  return TQString();
362 
363  TQByteArray ba = pngFile.readAll();
364  pngFile.close();
365  return TQString::fromLatin1("data:image/png;base64,%1")
366  .arg( KCodecs::base64Encode( ba ).data() );
367 }
368 
369 
370 TQString LinkLocator::getEmoticon()
371 {
372  // smileys have to be prepended by whitespace
373  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
374  return TQString();
375 
376  // since smileys start with ':', ';', '(' or '8' short circuit method
377  const TQChar ch = mText[mPos];
378  if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
379  return TQString();
380 
381  // find the end of the smiley (a smiley is at most 4 chars long and ends at
382  // lineend or whitespace)
383  const int MinSmileyLen = 2;
384  const int MaxSmileyLen = 4;
385  int smileyLen = 1;
386  while ( ( smileyLen <= MaxSmileyLen ) &&
387  ( mPos+smileyLen < (int)mText.length() ) &&
388  !mText[mPos+smileyLen].isSpace() )
389  smileyLen++;
390  if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
391  return TQString();
392 
393  const TQString smiley = mText.mid( mPos, smileyLen );
394  if ( !s_smileyEmoticonNameMap->contains( smiley ) )
395  return TQString(); // that's not a (known) smiley
396 
397  TQString htmlRep;
398  if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
399  htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
400  }
401  else {
402  const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
403 
404 #if KDE_IS_VERSION( 3, 3, 91 )
405  const TQString iconPath = locate( "emoticons",
406  EmotIcons::theme() +
407  TQString::fromLatin1( "/" ) +
408  imageName + TQString::fromLatin1(".png") );
409 #else
410  const TQString iconPath = locate( "data",
411  TQString::fromLatin1( "kopete/pics/emoticons/" )+
412  EmotIcons::theme() +
413  TQString::fromLatin1( "/" ) +
414  imageName + TQString::fromLatin1(".png") );
415 #endif
416 
417  const TQString dataUrl = pngToDataUrl( iconPath );
418  if ( dataUrl.isEmpty() ) {
419  htmlRep = TQString();
420  }
421  else {
422  // create an image tag (the text in attribute alt is used
423  // for copy & paste) representing the smiley
424  htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
425  "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
426  .arg( dataUrl,
427  TQStyleSheet::escape( smiley ),
428  TQStyleSheet::escape( smiley ) );
429  }
430  s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
431  }
432 
433  if ( !htmlRep.isEmpty() )
434  mPos += smileyLen - 1;
435 
436  return htmlRep;
437 }
438 
439 TQString LinkLocator::highlightedText()
440 {
441  // formating symbols must be prepended with a whitespace
442  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
443  return TQString();
444 
445  const TQChar ch = mText[mPos];
446  if ( ch != '/' && ch != '*' && ch != '_' )
447  return TQString();
448 
449  TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
450  if ( re.search( mText, mPos ) == mPos ) {
451  uint length = re.matchedLength();
452  // there must be a whitespace after the closing formating symbol
453  if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
454  return TQString();
455  mPos += length - 1;
456  switch ( ch.latin1() ) {
457  case '*':
458  return "<b>" + re.cap( 1 ) + "</b>";
459  case '_':
460  return "<u>" + re.cap( 1 ) + "</u>";
461  case '/':
462  return "<i>" + re.cap( 1 ) + "</i>";
463  }
464  }
465  return TQString();
466 }
467 
LinkLocator assists in identifying sections of text that can usefully be converted in hyperlinks in h...
Definition: linklocator.h:42
int maxAddressLen() const
Definition: linklocator.cpp:83
TQString getEmailAddress()
Attempts to grab an email address.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
Definition: linklocator.cpp:68
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
Definition: linklocator.cpp:78
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
int mPos
The current scan position.
Definition: linklocator.h:161
int maxUrlLen() const
Definition: linklocator.cpp:73
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point.
Definition: linklocator.cpp:44
TQString mText
The plaintext string being scanned for URLs and email addresses.
Definition: linklocator.h:157
TQString getUrl()
Attempts to grab a URL starting at the current scan position.
Definition: linklocator.cpp:88