akregator/src

article.cpp
1/*
2 This file is part of Akregator.
3
4 Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
5 2005 Frank Osterfeld <frank.osterfeld at kdemail.net>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
20 As a special exception, permission is given to link this program
21 with any edition of TQt, and distribute the resulting executable,
22 without including the source code for TQt in the source distribution.
23*/
24
25#include "article.h"
26#include "feed.h"
27#include "feedstorage.h"
28#include "storage.h"
29#include "librss/librss.h"
30#include "shared.h"
31#include "utils.h"
32
33#include <tqdatetime.h>
34#include <tqdom.h>
35#include <tqregexp.h>
36#include <tqstringlist.h>
37#include <tqvaluelist.h>
38
39#include <krfcdate.h>
40#include <kdebug.h>
41#include <kurl.h>
42
43
44namespace Akregator {
45
46struct Article::Private : public Shared
47{
58 enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10};
59
60 TQString guid;
61 Backend::FeedStorage* archive;
62 Feed* feed;
63
64 // the variables below are initialized to null values in the Article constructor
65 // and then loaded on demand instead.
66 //
67 // to read their values, you should therefore use the accessor methods of the Article
68 // hash(), pubDate(), statusBits() rather than accessing them directly.
69 uint hash;
70 TQDateTime pubDate;
71 int status;
72};
73
74Article::Article() : d(new Private)
75{
76 d->hash = 0;
77 d->status = 0;
78 d->feed = 0;
79 d->archive = 0;
80}
81
82Article::Article(const TQString& guid, Feed* feed) : d(new Private)
83{
84 // this constructor should be as cheap as possible, so avoid calls to
85 // read information from the archive in here if possible
86 //
87 // d->hash, d->pubDate and d->status are loaded on-demand by
88 // the hash(), pubDate() and statusBits() methods respectively
89
90 d->feed = feed;
91 d->guid = guid;
92 d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl());
93 d->status = 0;
94 d->hash = 0;
95}
96
97void Article::initialize(RSS::Article article, Backend::FeedStorage* archive)
98{
99 d->archive = archive;
100 d->status = Private::New;
101 d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url()
102 + article.commentsLink().url() );
103
104 d->guid = article.guid();
105
106 if (!d->archive->contains(d->guid))
107 {
108 d->archive->addEntry(d->guid);
109
110 if (article.meta("deleted") == "true")
111 { // if article is in deleted state, we just add the status and omit the rest
112 d->status = Private::Read | Private::Deleted;
113 d->archive->setStatus(d->guid, d->status);
114 }
115 else
116 { // article is not deleted, let's add it to the archive
117
118 d->archive->setHash(d->guid, hash() );
119 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
120 d->archive->setTitle(d->guid, title);
121 d->archive->setDescription(d->guid, article.description());
122 d->archive->setLink(d->guid, article.link().url());
123 d->archive->setComments(d->guid, article.comments());
124 d->archive->setCommentsLink(d->guid, article.commentsLink().url());
125 d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink());
126 d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true");
127 d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime();
128 d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
129 d->archive->setAuthor(d->guid, article.author());
130
131 TQValueList<RSS::Category> cats = article.categories();
132 TQValueList<RSS::Category>::ConstIterator end = cats.end();
133
134 for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it)
135 {
136 Backend::Category cat;
137
138 cat.term = (*it).category();
139 cat.scheme = (*it).domain();
140 cat.name = (*it).category();
141
142 d->archive->addCategory(d->guid, cat);
143 }
144
145 if (!article.enclosure().isNull())
146 {
147 d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length());
148 }
149 else
150 {
151 d->archive->removeEnclosure(d->guid);
152 }
153
154 TQString status = article.meta("status");
155
156 if (!status.isEmpty())
157 {
158 int statusInt = status.toInt();
159 if (statusInt == New)
160 statusInt = Unread;
161 setStatus(statusInt);
162 }
163 setKeep(article.meta("keep") == "true");
164 }
165 }
166 else
167 {
168 // always update comments count, as it's not used for hash calculation
169 d->archive->setComments(d->guid, article.comments());
170 if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified?
171 { // if yes, update
172 d->pubDate.setTime_t(d->archive->pubDate(d->guid));
173 d->archive->setHash(d->guid, hash() );
174 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
175 d->archive->setTitle(d->guid, title);
176 d->archive->setDescription(d->guid, article.description());
177 d->archive->setLink(d->guid, article.link().url());
178 d->archive->setCommentsLink(d->guid, article.commentsLink().url());
179 d->archive->setAuthor(d->guid, article.author());
180 }
181 }
182}
183
184Article::Article(RSS::Article article, Feed* feed) : d(new Private)
185{
186 //assert(feed)
187 d->feed = feed;
188 initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()));
189}
190
191Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private)
192{
193 d->feed = 0;
194 initialize(article, archive);
195}
196
197bool Article::isNull() const
198{
199 return d->archive == 0; // TODO: use proper null state
200}
201
202void Article::offsetPubDate(int secs)
203{
204 d->pubDate = pubDate().addSecs(secs);
205 d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
206
207}
208
209void Article::setDeleted()
210{
211 if (isDeleted())
212 return;
213
214 setStatus(Read);
215 d->status = Private::Deleted | Private::Read;
216 d->archive->setStatus(d->guid, d->status);
217 d->archive->setDeleted(d->guid);
218
219 if (d->feed)
220 d->feed->setArticleDeleted(*this);
221}
222
223bool Article::isDeleted() const
224{
225 return (statusBits() & Private::Deleted) != 0;
226}
227
228Article::Article(const Article &other) : d(new Private)
229{
230 *this = other;
231}
232
233Article::~Article()
234{
235 if (d->deref())
236 {
237 delete d;
238 d = 0;
239 }
240}
241
242Article &Article::operator=(const Article &other)
243{
244 if (this != &other) {
245 other.d->ref();
246 if (d && d->deref())
247 delete d;
248 d = other.d;
249 }
250 return *this;
251}
252
253
254bool Article::operator<(const Article &other) const
255{
256 return pubDate() > other.pubDate() ||
257 (pubDate() == other.pubDate() && guid() < other.guid() );
258}
259
260bool Article::operator<=(const Article &other) const
261{
262 return (pubDate() > other.pubDate() || *this == other);
263}
264
265bool Article::operator>(const Article &other) const
266{
267 return pubDate() < other.pubDate() ||
268 (pubDate() == other.pubDate() && guid() > other.guid() );
269}
270
271bool Article::operator>=(const Article &other) const
272{
273 return (pubDate() > other.pubDate() || *this == other);
274}
275
276bool Article::operator==(const Article &other) const
277{
278 return d->guid == other.guid();
279}
280
281int Article::statusBits() const
282{
283 // delayed loading of status information from archive
284 if ( d->status == 0 )
285 {
286 d->status = d->archive->status(d->guid);
287 }
288
289 return d->status;
290}
291
292int Article::status() const
293{
294 if ((statusBits() & Private::Read) != 0)
295 return Read;
296
297 if ((statusBits() & Private::New) != 0)
298 return New;
299 else
300 return Unread;
301}
302
303void Article::setStatus(int stat)
304{
305 // use status() rather than statusBits() here to filter out status flags that we are not
306 // interested in
307 int oldStatus = status();
308
309 if (oldStatus != stat)
310 {
311 switch (stat)
312 {
313 case Read:
314 d->status = ( d->status | Private::Read) & ~Private::New;
315 break;
316 case Unread:
317 d->status = ( d->status & ~Private::Read) & ~Private::New;
318 break;
319 case New:
320 d->status = ( d->status | Private::New) & ~Private::Read;
321 break;
322 }
323 d->archive->setStatus(d->guid, d->status);
324 if (d->feed)
325 d->feed->setArticleChanged(*this, oldStatus);
326 }
327}
328
329TQString Article::title() const
330{
331 return d->archive->title(d->guid);
332}
333
334TQString Article::author() const
335{
336 return d->archive->author(d->guid);
337}
338
339KURL Article::link() const
340{
341 return d->archive->link(d->guid);
342}
343
344TQString Article::description() const
345{
346 return d->archive->description(d->guid);
347}
348
349TQString Article::guid() const
350{
351 return d->guid;
352}
353
354KURL Article::commentsLink() const
355{
356 return d->archive->commentsLink(d->guid);
357}
358
359
360int Article::comments() const
361{
362 return d->archive->comments(d->guid);
363}
364
365
366bool Article::guidIsPermaLink() const
367{
368 return d->archive->guidIsPermaLink(d->guid);
369}
370
372{
373 return d->archive->guidIsHash(d->guid);
374}
375
376uint Article::hash() const
377{
378 // delayed loading of hash from archive
379 if ( d->hash == 0 )
380 {
381 d->hash = d->archive->hash(d->guid);
382 }
383
384 return d->hash;
385}
386
387bool Article::keep() const
388{
389 return ( statusBits() & Private::Keep) != 0;
390}
391
392RSS::Enclosure Article::enclosure() const
393{
394 bool hasEnc;
395 TQString url, type;
396 int length;
397 d->archive->enclosure(d->guid, hasEnc, url, type, length);
398 return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure();
399
400
401}
402
403
404void Article::setKeep(bool keep)
405{
406 d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep);
407 d->archive->setStatus(d->guid, d->status);
408 if (d->feed)
409 d->feed->setArticleChanged(*this);
410}
411
412void Article::addTag(const TQString& tag)
413{
414 d->archive->addTag(d->guid, tag);
415 if (d->feed)
416 d->feed->setArticleChanged(*this);
417}
418
419void Article::removeTag(const TQString& tag)
420{
421 d->archive->removeTag(d->guid, tag);
422 if (d->feed)
423 d->feed->setArticleChanged(*this);
424}
425
426bool Article::hasTag(const TQString& tag) const
427{
428 return d->archive->tags(d->guid).contains(tag);
429}
430
431TQStringList Article::tags() const
432{
433 return d->archive->tags(d->guid);
434}
435
436Feed* Article::feed() const
437{ return d->feed; }
438
439const TQDateTime& Article::pubDate() const
440{
441 // delayed loading of publication date information from archive
442 if ( d->pubDate.isNull() )
443 {
444 d->pubDate.setTime_t(d->archive->pubDate(d->guid));
445 }
446
447 return d->pubDate;
448}
449
450TQString Article::buildTitle(const TQString& description)
451{
452 TQString s = description;
453 if (description.stripWhiteSpace().isEmpty())
454 return "";
455
456 int i = s.find('>',500); /*avoid processing too much */
457 if (i != -1)
458 s = s.left(i+1);
459 TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false);
460 TQString tagName, toReplace, replaceWith;
461 while (rx.search(s) != -1 )
462 {
463 tagName=rx.cap(2);
464 if (tagName=="SCRIPT"||tagName=="script")
465 toReplace=rx.cap(0); // strip tag AND tag contents
466 else if (tagName.startsWith("br") || tagName.startsWith("BR"))
467 {
468 toReplace=rx.cap(1);
469 replaceWith=" ";
470 }
471 else
472 toReplace=rx.cap(1); // strip just tag
473 s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed
474 }
475 if (s.length()> 90)
476 s=s.left(90)+"...";
477 return s.simplifyWhiteSpace();
478}
479} // namespace Akregator
bool guidIsHash() const
returns if the guid is a hash or an ID taken from the source
Definition: article.cpp:371
uint hash() const
returns a hash value used to detect changes in articles with non-hash GUIDs.
Definition: article.cpp:376
bool keep() const
if true, the article should be kept even when expired
Definition: article.cpp:387
represents a feed
Definition: feed.h:63
const TQString & xmlUrl() const
returns the url of the actual feed source (rss/rdf/atom file)
Definition: feed.cpp:351