akregator/src

article.cpp
1 /*
2  This file is part of Akregator.
3 
4  Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
5  2005 Frank Osterfeld <frank.osterfeld at kdemail.net>
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20  As a special exception, permission is given to link this program
21  with any edition of TQt, and distribute the resulting executable,
22  without including the source code for TQt in the source distribution.
23 */
24 
25 #include "article.h"
26 #include "feed.h"
27 #include "feedstorage.h"
28 #include "storage.h"
29 #include "librss/librss.h"
30 #include "shared.h"
31 #include "utils.h"
32 
33 #include <tqdatetime.h>
34 #include <tqdom.h>
35 #include <tqregexp.h>
36 #include <tqstringlist.h>
37 #include <tqvaluelist.h>
38 
39 #include <krfcdate.h>
40 #include <kdebug.h>
41 #include <kurl.h>
42 
43 
44 namespace Akregator {
45 
46 struct Article::Private : public Shared
47 {
58  enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10};
59 
60  TQString guid;
61  Backend::FeedStorage* archive;
62  Feed* feed;
63 
64  // the variables below are initialized to null values in the Article constructor
65  // and then loaded on demand instead.
66  //
67  // to read their values, you should therefore use the accessor methods of the Article
68  // hash(), pubDate(), statusBits() rather than accessing them directly.
69  uint hash;
70  TQDateTime pubDate;
71  int status;
72 };
73 
74 Article::Article() : d(new Private)
75 {
76  d->hash = 0;
77  d->status = 0;
78  d->feed = 0;
79  d->archive = 0;
80 }
81 
82 Article::Article(const TQString& guid, Feed* feed) : d(new Private)
83 {
84  // this constructor should be as cheap as possible, so avoid calls to
85  // read information from the archive in here if possible
86  //
87  // d->hash, d->pubDate and d->status are loaded on-demand by
88  // the hash(), pubDate() and statusBits() methods respectively
89 
90  d->feed = feed;
91  d->guid = guid;
92  d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl());
93  d->status = 0;
94  d->hash = 0;
95 }
96 
97 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive)
98 {
99  d->archive = archive;
100  d->status = Private::New;
101  d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url()
102  + article.commentsLink().url() );
103 
104  d->guid = article.guid();
105 
106  if (!d->archive->contains(d->guid))
107  {
108  d->archive->addEntry(d->guid);
109 
110  if (article.meta("deleted") == "true")
111  { // if article is in deleted state, we just add the status and omit the rest
112  d->status = Private::Read | Private::Deleted;
113  d->archive->setStatus(d->guid, d->status);
114  }
115  else
116  { // article is not deleted, let's add it to the archive
117 
118  d->archive->setHash(d->guid, hash() );
119  TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
120  d->archive->setTitle(d->guid, title);
121  d->archive->setDescription(d->guid, article.description());
122  d->archive->setLink(d->guid, article.link().url());
123  d->archive->setComments(d->guid, article.comments());
124  d->archive->setCommentsLink(d->guid, article.commentsLink().url());
125  d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink());
126  d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true");
127  d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime();
128  d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
129  d->archive->setAuthor(d->guid, article.author());
130 
131  TQValueList<RSS::Category> cats = article.categories();
132  TQValueList<RSS::Category>::ConstIterator end = cats.end();
133 
134  for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it)
135  {
136  Backend::Category cat;
137 
138  cat.term = (*it).category();
139  cat.scheme = (*it).domain();
140  cat.name = (*it).category();
141 
142  d->archive->addCategory(d->guid, cat);
143  }
144 
145  if (!article.enclosure().isNull())
146  {
147  d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length());
148  }
149  else
150  {
151  d->archive->removeEnclosure(d->guid);
152  }
153 
154  TQString status = article.meta("status");
155 
156  if (!status.isEmpty())
157  {
158  int statusInt = status.toInt();
159  if (statusInt == New)
160  statusInt = Unread;
161  setStatus(statusInt);
162  }
163  setKeep(article.meta("keep") == "true");
164  }
165  }
166  else
167  {
168  // always update comments count, as it's not used for hash calculation
169  d->archive->setComments(d->guid, article.comments());
170  if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified?
171  { // if yes, update
172  d->pubDate.setTime_t(d->archive->pubDate(d->guid));
173  d->archive->setHash(d->guid, hash() );
174  TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
175  d->archive->setTitle(d->guid, title);
176  d->archive->setDescription(d->guid, article.description());
177  d->archive->setLink(d->guid, article.link().url());
178  d->archive->setCommentsLink(d->guid, article.commentsLink().url());
179  d->archive->setAuthor(d->guid, article.author());
180  }
181  }
182 }
183 
184 Article::Article(RSS::Article article, Feed* feed) : d(new Private)
185 {
186  //assert(feed)
187  d->feed = feed;
188  initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()));
189 }
190 
191 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private)
192 {
193  d->feed = 0;
194  initialize(article, archive);
195 }
196 
197 bool Article::isNull() const
198 {
199  return d->archive == 0; // TODO: use proper null state
200 }
201 
202 void Article::offsetPubDate(int secs)
203 {
204  d->pubDate = pubDate().addSecs(secs);
205  d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
206 
207 }
208 
209 void Article::setDeleted()
210 {
211  if (isDeleted())
212  return;
213 
214  setStatus(Read);
215  d->status = Private::Deleted | Private::Read;
216  d->archive->setStatus(d->guid, d->status);
217  d->archive->setDeleted(d->guid);
218 
219  if (d->feed)
220  d->feed->setArticleDeleted(*this);
221 }
222 
223 bool Article::isDeleted() const
224 {
225  return (statusBits() & Private::Deleted) != 0;
226 }
227 
228 Article::Article(const Article &other) : d(new Private)
229 {
230  *this = other;
231 }
232 
233 Article::~Article()
234 {
235  if (d->deref())
236  {
237  delete d;
238  d = 0;
239  }
240 }
241 
242 Article &Article::operator=(const Article &other)
243 {
244  if (this != &other) {
245  other.d->ref();
246  if (d && d->deref())
247  delete d;
248  d = other.d;
249  }
250  return *this;
251 }
252 
253 
254 bool Article::operator<(const Article &other) const
255 {
256  return pubDate() > other.pubDate() ||
257  (pubDate() == other.pubDate() && guid() < other.guid() );
258 }
259 
260 bool Article::operator<=(const Article &other) const
261 {
262  return (pubDate() > other.pubDate() || *this == other);
263 }
264 
265 bool Article::operator>(const Article &other) const
266 {
267  return pubDate() < other.pubDate() ||
268  (pubDate() == other.pubDate() && guid() > other.guid() );
269 }
270 
271 bool Article::operator>=(const Article &other) const
272 {
273  return (pubDate() > other.pubDate() || *this == other);
274 }
275 
276 bool Article::operator==(const Article &other) const
277 {
278  return d->guid == other.guid();
279 }
280 
281 int Article::statusBits() const
282 {
283  // delayed loading of status information from archive
284  if ( d->status == 0 )
285  {
286  d->status = d->archive->status(d->guid);
287  }
288 
289  return d->status;
290 }
291 
292 int Article::status() const
293 {
294  if ((statusBits() & Private::Read) != 0)
295  return Read;
296 
297  if ((statusBits() & Private::New) != 0)
298  return New;
299  else
300  return Unread;
301 }
302 
303 void Article::setStatus(int stat)
304 {
305  // use status() rather than statusBits() here to filter out status flags that we are not
306  // interested in
307  int oldStatus = status();
308 
309  if (oldStatus != stat)
310  {
311  switch (stat)
312  {
313  case Read:
314  d->status = ( d->status | Private::Read) & ~Private::New;
315  break;
316  case Unread:
317  d->status = ( d->status & ~Private::Read) & ~Private::New;
318  break;
319  case New:
320  d->status = ( d->status | Private::New) & ~Private::Read;
321  break;
322  }
323  d->archive->setStatus(d->guid, d->status);
324  if (d->feed)
325  d->feed->setArticleChanged(*this, oldStatus);
326  }
327 }
328 
329 TQString Article::title() const
330 {
331  return d->archive->title(d->guid);
332 }
333 
334 TQString Article::author() const
335 {
336  return d->archive->author(d->guid);
337 }
338 
339 KURL Article::link() const
340 {
341  return d->archive->link(d->guid);
342 }
343 
344 TQString Article::description() const
345 {
346  return d->archive->description(d->guid);
347 }
348 
349 TQString Article::guid() const
350 {
351  return d->guid;
352 }
353 
354 KURL Article::commentsLink() const
355 {
356  return d->archive->commentsLink(d->guid);
357 }
358 
359 
360 int Article::comments() const
361 {
362  return d->archive->comments(d->guid);
363 }
364 
365 
366 bool Article::guidIsPermaLink() const
367 {
368  return d->archive->guidIsPermaLink(d->guid);
369 }
370 
372 {
373  return d->archive->guidIsHash(d->guid);
374 }
375 
376 uint Article::hash() const
377 {
378  // delayed loading of hash from archive
379  if ( d->hash == 0 )
380  {
381  d->hash = d->archive->hash(d->guid);
382  }
383 
384  return d->hash;
385 }
386 
387 bool Article::keep() const
388 {
389  return ( statusBits() & Private::Keep) != 0;
390 }
391 
392 RSS::Enclosure Article::enclosure() const
393 {
394  bool hasEnc;
395  TQString url, type;
396  int length;
397  d->archive->enclosure(d->guid, hasEnc, url, type, length);
398  return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure();
399 
400 
401 }
402 
403 
404 void Article::setKeep(bool keep)
405 {
406  d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep);
407  d->archive->setStatus(d->guid, d->status);
408  if (d->feed)
409  d->feed->setArticleChanged(*this);
410 }
411 
412 void Article::addTag(const TQString& tag)
413 {
414  d->archive->addTag(d->guid, tag);
415  if (d->feed)
416  d->feed->setArticleChanged(*this);
417 }
418 
419 void Article::removeTag(const TQString& tag)
420 {
421  d->archive->removeTag(d->guid, tag);
422  if (d->feed)
423  d->feed->setArticleChanged(*this);
424 }
425 
426 bool Article::hasTag(const TQString& tag) const
427 {
428  return d->archive->tags(d->guid).contains(tag);
429 }
430 
431 TQStringList Article::tags() const
432 {
433  return d->archive->tags(d->guid);
434 }
435 
436 Feed* Article::feed() const
437 { return d->feed; }
438 
439 const TQDateTime& Article::pubDate() const
440 {
441  // delayed loading of publication date information from archive
442  if ( d->pubDate.isNull() )
443  {
444  d->pubDate.setTime_t(d->archive->pubDate(d->guid));
445  }
446 
447  return d->pubDate;
448 }
449 
450 TQString Article::buildTitle(const TQString& description)
451 {
452  TQString s = description;
453  if (description.stripWhiteSpace().isEmpty())
454  return "";
455 
456  int i = s.find('>',500); /*avoid processing too much */
457  if (i != -1)
458  s = s.left(i+1);
459  TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false);
460  TQString tagName, toReplace, replaceWith;
461  while (rx.search(s) != -1 )
462  {
463  tagName=rx.cap(2);
464  if (tagName=="SCRIPT"||tagName=="script")
465  toReplace=rx.cap(0); // strip tag AND tag contents
466  else if (tagName.startsWith("br") || tagName.startsWith("BR"))
467  {
468  toReplace=rx.cap(1);
469  replaceWith=" ";
470  }
471  else
472  toReplace=rx.cap(1); // strip just tag
473  s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed
474  }
475  if (s.length()> 90)
476  s=s.left(90)+"...";
477  return s.simplifyWhiteSpace();
478 }
479 } // namespace Akregator
bool guidIsHash() const
returns if the guid is a hash or an ID taken from the source
Definition: article.cpp:371
uint hash() const
returns a hash value used to detect changes in articles with non-hash GUIDs.
Definition: article.cpp:376
bool keep() const
if true, the article should be kept even when expired
Definition: article.cpp:387
represents a feed
Definition: feed.h:63
const TQString & xmlUrl() const
returns the url of the actual feed source (rss/rdf/atom file)
Definition: feed.cpp:351