libtdepim

linklocator.cpp
1
23#include "linklocator.h"
24#include "pimemoticons.h"
25#include <tdeversion.h>
26#include <tdeglobal.h>
27#include <tdestandarddirs.h>
28#include <kstaticdeleter.h>
29#include <kmdcodec.h>
30#include <kdebug.h>
31
32#include <tqstylesheet.h>
33#include <tqfile.h>
34#include <tqregexp.h>
35
36#include <limits.h>
37
38TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
39TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
40
41static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
42static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
43
44LinkLocator::LinkLocator(const TQString& text, int pos)
45 : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
46{
47 // If you change either of the above values for maxUrlLen or
48 // maxAddressLen, then please also update the documentation for
49 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
50 // default values used for the maxUrlLen/maxAddressLen parameters
51 // of convertToHtml().
52
53 if ( !s_smileyEmoticonNameMap ) {
54 smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
55 new TQMap<TQString, TQString>() );
56 for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
57 TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
58 imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
59 s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
60 }
61 }
62
63 if ( !s_smileyEmoticonHTMLCache )
64 smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
65 new TQMap<TQString, TQString>() );
66}
67
69{
70 mMaxUrlLen = length;
71}
72
74{
75 return mMaxUrlLen;
76}
77
79{
80 mMaxAddressLen = length;
81}
82
84{
85 return mMaxAddressLen;
86}
87
89{
90 TQString url;
91 if(atUrl())
92 {
93 // handle cases like this: <link>http://foobar.org/</link>
94 int start = mPos;
95 while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
96 TQString("<>()[]").find(mText[mPos]) == -1)
97 {
98 ++mPos;
99 }
100 /* some URLs really end with: # / & - _ */
101 const TQString allowedSpecialChars = TQString("#/&-_");
102 while(mPos > start && mText[mPos-1].isPunct() &&
103 allowedSpecialChars.find(mText[mPos-1]) == -1 )
104 {
105 --mPos;
106 }
107
108 url = mText.mid(start, mPos - start);
109 if(isEmptyUrl(url) || mPos - start > maxUrlLen())
110 {
111 mPos = start;
112 url = "";
113 }
114 else
115 {
116 --mPos;
117 }
118 }
119 return url;
120}
121
122// keep this in sync with KMMainWin::slotUrlClicked()
123bool LinkLocator::atUrl() const
124{
125 // the following characters are allowed in a dot-atom (RFC 2822):
126 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
127 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
128
129 // the character directly before the URL must not be a letter, a number or
130 // any other character allowed in a dot-atom (RFC 2822).
131 if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
132 ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
133 return false;
134
135 TQChar ch = mText[mPos];
136 return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
137 mText.mid(mPos, 8) == "https://") ) ||
138 (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
139 (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
140 mText.mid(mPos, 6) == "ftp://" ||
141 mText.mid(mPos, 7) == "ftps://") ) ||
142 (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
143 mText.mid(mPos, 6) == "smb://") ) ||
144 (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
145 (ch=='w' && mText.mid(mPos, 4) == "www.") ||
146 (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
147 (ch=='n' && mText.mid(mPos, 5) == "news:");
148 // note: no "file:" for security reasons
149}
150
151bool LinkLocator::isEmptyUrl(const TQString& url)
152{
153 return url.isEmpty() ||
154 url == "http://" ||
155 url == "https://" ||
156 url == "fish://" ||
157 url == "ftp://" ||
158 url == "ftps://" ||
159 url == "sftp://" ||
160 url == "smb://" ||
161 url == "vnc://" ||
162 url == "mailto" ||
163 url == "www" ||
164 url == "ftp" ||
165 url == "news" ||
166 url == "news://";
167}
168
170{
171 TQString address;
172
173 if ( mText[mPos] == '@' ) {
174 // the following characters are allowed in a dot-atom (RFC 2822):
175 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
176 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
177
178 // determine the local part of the email address
179 int start = mPos - 1;
180 while ( start >= 0 && mText[start].unicode() < 128 &&
181 ( mText[start].isLetterOrNumber() ||
182 mText[start] == '@' || // allow @ to find invalid email addresses
183 allowedSpecialChars.find( mText[start] ) != -1 ) ) {
184 if ( mText[start] == '@' )
185 return TQString(); // local part contains '@' -> no email address
186 --start;
187 }
188 ++start;
189 // we assume that an email address starts with a letter or a digit
190 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
191 ++start;
192 if ( start == mPos )
193 return TQString(); // local part is empty -> no email address
194
195 // determine the domain part of the email address
196 int dotPos = INT_MAX;
197 int end = mPos + 1;
198 while ( end < (int)mText.length() &&
199 ( mText[end].isLetterOrNumber() ||
200 mText[end] == '@' || // allow @ to find invalid email addresses
201 mText[end] == '.' ||
202 mText[end] == '-' ) ) {
203 if ( mText[end] == '@' )
204 return TQString(); // domain part contains '@' -> no email address
205 if ( mText[end] == '.' )
206 dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
207 ++end;
208 }
209 // we assume that an email address ends with a letter or a digit
210 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
211 --end;
212 if ( end == mPos )
213 return TQString(); // domain part is empty -> no email address
214 if ( dotPos >= end )
215 return TQString(); // domain part doesn't contain a dot
216
217 if ( end - start > maxAddressLen() )
218 return TQString(); // too long -> most likely no email address
219 address = mText.mid( start, end - start );
220
221 mPos = end - 1;
222 }
223 return address;
224}
225
226TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
227 int maxUrlLen, int maxAddressLen)
228{
229 LinkLocator locator(plainText);
230 locator.setMaxUrlLen(maxUrlLen);
232
233 TQString str;
234 TQString result((TQChar*)0, (int)locator.mText.length() * 2);
235 TQChar ch;
236 int x;
237 bool startOfLine = true;
238 TQString emoticon;
239
240 for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
241 {
242 ch = locator.mText[locator.mPos];
243 if ( flags & PreserveSpaces )
244 {
245 if (ch==' ')
246 {
247 if (startOfLine) {
248 result += "&nbsp;";
249 locator.mPos++, x++;
250 startOfLine = false;
251 }
252 while (locator.mText[locator.mPos] == ' ')
253 {
254 result += " ";
255 locator.mPos++, x++;
256 if (locator.mText[locator.mPos] == ' ') {
257 result += "&nbsp;";
258 locator.mPos++, x++;
259 }
260 }
261 locator.mPos--, x--;
262 continue;
263 }
264 else if (ch=='\t')
265 {
266 do
267 {
268 result += "&nbsp;";
269 x++;
270 }
271 while((x&7) != 0);
272 x--;
273 startOfLine = false;
274 continue;
275 }
276 }
277 if (ch=='\n')
278 {
279 result += "<br />";
280 startOfLine = true;
281 x = -1;
282 continue;
283 }
284
285 startOfLine = false;
286 if (ch=='&')
287 result += "&amp;";
288 else if (ch=='"')
289 result += "&quot;";
290 else if (ch=='<')
291 result += "&lt;";
292 else if (ch=='>')
293 result += "&gt;";
294 else
295 {
296 const int start = locator.mPos;
297 if ( !(flags & IgnoreUrls) ) {
298 str = locator.getUrl();
299 if (!str.isEmpty())
300 {
301 TQString hyperlink;
302 if(str.left(4) == "www.")
303 hyperlink = "http://" + str;
304 else if(str.left(4) == "ftp.")
305 hyperlink = "ftp://" + str;
306 else
307 hyperlink = str;
308
309 str = str.replace('&', "&amp;");
310 result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
311 x += locator.mPos - start;
312 continue;
313 }
314 str = locator.getEmailAddress();
315 if(!str.isEmpty())
316 {
317 // len is the length of the local part
318 int len = str.find('@');
319 TQString localPart = str.left(len);
320
321 // remove the local part from the result (as '&'s have been expanded to
322 // &amp; we have to take care of the 4 additional characters per '&')
323 result.truncate(result.length() - len - (localPart.contains('&')*4));
324 x -= len;
325
326 result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
327 x += str.length() - 1;
328 continue;
329 }
330 }
331 if ( flags & ReplaceSmileys ) {
332 str = locator.getEmoticon();
333 if ( ! str.isEmpty() ) {
334 result += str;
335 x += locator.mPos - start;
336 continue;
337 }
338 }
339 if ( flags & HighlightText ) {
340 str = locator.highlightedText();
341 if ( !str.isEmpty() ) {
342 result += str;
343 x += locator.mPos - start;
344 continue;
345 }
346 }
347 result += ch;
348 }
349 }
350
351 return result;
352}
353
354TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
355{
356 if ( iconPath.isEmpty() )
357 return TQString();
358
359 TQFile pngFile( iconPath );
360 if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
361 return TQString();
362
363 TQByteArray ba = pngFile.readAll();
364 pngFile.close();
365 return TQString::fromLatin1("data:image/png;base64,%1")
366 .arg( KCodecs::base64Encode( ba ).data() );
367}
368
369
370TQString LinkLocator::getEmoticon()
371{
372 // smileys have to be prepended by whitespace
373 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
374 return TQString();
375
376 // since smileys start with ':', ';', '(' or '8' short circuit method
377 const TQChar ch = mText[mPos];
378 if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
379 return TQString();
380
381 // find the end of the smiley (a smiley is at most 4 chars long and ends at
382 // lineend or whitespace)
383 const int MinSmileyLen = 2;
384 const int MaxSmileyLen = 4;
385 int smileyLen = 1;
386 while ( ( smileyLen <= MaxSmileyLen ) &&
387 ( mPos+smileyLen < (int)mText.length() ) &&
388 !mText[mPos+smileyLen].isSpace() )
389 smileyLen++;
390 if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
391 return TQString();
392
393 const TQString smiley = mText.mid( mPos, smileyLen );
394 if ( !s_smileyEmoticonNameMap->contains( smiley ) )
395 return TQString(); // that's not a (known) smiley
396
397 TQString htmlRep;
398 if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
399 htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
400 }
401 else {
402 const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
403
404#if KDE_IS_VERSION( 3, 3, 91 )
405 const TQString iconPath = locate( "emoticons",
406 EmotIcons::theme() +
407 TQString::fromLatin1( "/" ) +
408 imageName + TQString::fromLatin1(".png") );
409#else
410 const TQString iconPath = locate( "data",
411 TQString::fromLatin1( "kopete/pics/emoticons/" )+
412 EmotIcons::theme() +
413 TQString::fromLatin1( "/" ) +
414 imageName + TQString::fromLatin1(".png") );
415#endif
416
417 const TQString dataUrl = pngToDataUrl( iconPath );
418 if ( dataUrl.isEmpty() ) {
419 htmlRep = TQString();
420 }
421 else {
422 // create an image tag (the text in attribute alt is used
423 // for copy & paste) representing the smiley
424 htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
425 "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
426 .arg( dataUrl,
427 TQStyleSheet::escape( smiley ),
428 TQStyleSheet::escape( smiley ) );
429 }
430 s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
431 }
432
433 if ( !htmlRep.isEmpty() )
434 mPos += smileyLen - 1;
435
436 return htmlRep;
437}
438
439TQString LinkLocator::highlightedText()
440{
441 // formating symbols must be prepended with a whitespace
442 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
443 return TQString();
444
445 const TQChar ch = mText[mPos];
446 if ( ch != '/' && ch != '*' && ch != '_' )
447 return TQString();
448
449 TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
450 if ( re.search( mText, mPos ) == mPos ) {
451 uint length = re.matchedLength();
452 // there must be a whitespace after the closing formating symbol
453 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
454 return TQString();
455 mPos += length - 1;
456 switch ( ch.latin1() ) {
457 case '*':
458 return "<b>" + re.cap( 1 ) + "</b>";
459 case '_':
460 return "<u>" + re.cap( 1 ) + "</u>";
461 case '/':
462 return "<i>" + re.cap( 1 ) + "</i>";
463 }
464 }
465 return TQString();
466}
467
LinkLocator assists in identifying sections of text that can usefully be converted in hyperlinks in h...
Definition: linklocator.h:42
int maxAddressLen() const
Definition: linklocator.cpp:83
TQString getEmailAddress()
Attempts to grab an email address.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
Definition: linklocator.cpp:68
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
Definition: linklocator.cpp:78
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
int mPos
The current scan position.
Definition: linklocator.h:161
int maxUrlLen() const
Definition: linklocator.cpp:73
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point.
Definition: linklocator.cpp:44
TQString mText
The plaintext string being scanned for URLs and email addresses.
Definition: linklocator.h:157
TQString getUrl()
Attempts to grab a URL starting at the current scan position.
Definition: linklocator.cpp:88