akregator/src/librss

loader.cpp
1/*
2 * loader.cpp
3 *
4 * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
5 *
6 * This program is distributed in the hope that it will be useful, but WITHOUT
7 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8 * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
9 * accompanying file 'COPYING'.
10 */
11#include "loader.h"
12#include "document.h"
13#include "feeddetector.h"
14
15#include <tdeio/job.h>
16#include <tdeprocess.h>
17#include <kstaticdeleter.h>
18#include <kurl.h>
19#include <kdebug.h>
20
21#include <tqdom.h>
22#include <tqbuffer.h>
23#include <tqregexp.h>
24#include <tqstring.h>
25#include <tqstringlist.h>
26#include <tqtimer.h>
27
28using namespace RSS;
29
30DataRetriever::DataRetriever()
31{
32}
33
35{
36}
37
38class FileRetriever::Private
39{
40 public:
41
42 Private()
43 : buffer(NULL),
44 lastError(0), job(NULL)
45 {
46 }
47
48 ~Private()
49 {
50 delete buffer;
51 }
52
53 TQBuffer *buffer;
54 int lastError;
55 TDEIO::Job *job;
56 static KStaticDeleter<TQString> userAgentsd;
57 static TQString* userAgent;
58};
59
60KStaticDeleter<TQString> FileRetriever::Private::userAgentsd;
61TQString* FileRetriever::Private::userAgent = 0L;
63 : d(new Private)
64{
65}
66
68{
69 delete d;
70}
71
72bool FileRetriever::m_useCache = true;
73
74TQString FileRetriever::userAgent()
75{
76 if (Private::userAgent == 0L)
77 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
78 return *Private::userAgent;
79}
80
81void FileRetriever::setUserAgent(const TQString &ua)
82{
83 if (Private::userAgent == 0L)
84 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
85 (*Private::userAgent) = ua;
86}
87
88void FileRetriever::setUseCache(bool enabled)
89{
90 m_useCache = enabled;
91}
92
93void FileRetriever::retrieveData(const KURL &url)
94{
95 if (d->buffer)
96 return;
97
98 d->buffer = new TQBuffer;
99 d->buffer->open(IO_WriteOnly);
100
101 KURL u=url;
102
103 if (u.protocol()=="feed")
104 u.setProtocol("http");
105
106 d->job = TDEIO::get(u, false, false);
107 d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
108
109 TQString ua = userAgent();
110 if (!ua.isEmpty())
111 d->job->addMetaData("UserAgent", ua);
112
113
114 TQTimer::singleShot(1000*90, this, TQ_SLOT(slotTimeout()));
115
116 connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
117 TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
118 connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), TQ_SLOT(slotResult(TDEIO::Job *)));
119 connect(d->job, TQ_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)),
120 TQ_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &)));
121}
122
123void FileRetriever::slotTimeout()
124{
125 abort();
126
127 delete d->buffer;
128 d->buffer = NULL;
129
130 d->lastError = TDEIO::ERR_SERVER_TIMEOUT;
131
132 emit dataRetrieved(TQByteArray(), false);
133}
134
136{
137 return d->lastError;
138}
139
140void FileRetriever::slotData(TDEIO::Job *, const TQByteArray &data)
141{
142 d->buffer->writeBlock(data.data(), data.size());
143}
144
145void FileRetriever::slotResult(TDEIO::Job *job)
146{
147 TQByteArray data = d->buffer->buffer();
148 data.detach();
149
150 delete d->buffer;
151 d->buffer = NULL;
152
153 d->lastError = job->error();
154 emit dataRetrieved(data, d->lastError == 0);
155}
156
157void FileRetriever::slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &newUrl)
158{
159 emit permanentRedirection(newUrl);
160}
161
162void FileRetriever::abort()
163{
164 if (d->job)
165 {
166 d->job->kill(true);
167 d->job = NULL;
168 }
169}
170
171struct OutputRetriever::Private
172{
173 Private() : process(NULL),
174 buffer(NULL),
175 lastError(0)
176 {
177 }
178
179 ~Private()
180 {
181 delete process;
182 delete buffer;
183 }
184
185 KShellProcess *process;
186 TQBuffer *buffer;
187 int lastError;
188};
189
191 d(new Private)
192{
193}
194
196{
197 delete d;
198}
199
200void OutputRetriever::retrieveData(const KURL &url)
201{
202 // Ignore subsequent calls if we didn't finish the previous job yet.
203 if (d->buffer || d->process)
204 return;
205
206 d->buffer = new TQBuffer;
207 d->buffer->open(IO_WriteOnly);
208
209 d->process = new KShellProcess();
210 connect(d->process, TQ_SIGNAL(processExited(TDEProcess *)),
211 TQ_SLOT(slotExited(TDEProcess *)));
212 connect(d->process, TQ_SIGNAL(receivedStdout(TDEProcess *, char *, int)),
213 TQ_SLOT(slotOutput(TDEProcess *, char *, int)));
214 *d->process << url.path();
215 d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
216}
217
219{
220 return d->lastError;
221}
222
223void OutputRetriever::slotOutput(TDEProcess *, char *data, int length)
224{
225 d->buffer->writeBlock(data, length);
226}
227
228void OutputRetriever::slotExited(TDEProcess *p)
229{
230 if (!p->normalExit())
231 d->lastError = p->exitStatus();
232
233 TQByteArray data = d->buffer->buffer();
234 data.detach();
235
236 delete d->buffer;
237 d->buffer = NULL;
238
239 delete d->process;
240 d->process = NULL;
241
242 emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
243}
244
245struct Loader::Private
246{
247 Private() : retriever(NULL),
248 lastError(0)
249 {
250 }
251
252 ~Private()
253 {
254 delete retriever;
255 }
256
257 DataRetriever *retriever;
258 int lastError;
259 KURL discoveredFeedURL;
260 KURL url;
261};
262
264{
265 return new Loader;
266}
267
268Loader *Loader::create(TQObject *object, const char *slot)
269{
270 Loader *loader = create();
271 connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)),
272 object, slot);
273 return loader;
274}
275
276Loader::Loader() : d(new Private)
277{
278}
279
280Loader::~Loader()
281{
282 delete d;
283}
284
285void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
286{
287 if (d->retriever != NULL)
288 return;
289
290 d->url=url;
291 d->retriever = retriever;
292
293 connect(d->retriever, TQ_SIGNAL(dataRetrieved(const TQByteArray &, bool)),
294 this, TQ_SLOT(slotRetrieverDone(const TQByteArray &, bool)));
295
296 d->retriever->retrieveData(url);
297}
298
300{
301 return d->lastError;
302}
303
304void Loader::abort()
305{
306 if (d && d->retriever)
307 {
308 d->retriever->abort();
309 delete d->retriever;
310 d->retriever=NULL;
311 }
312 emit loadingComplete(this, TQDomDocument(), Aborted);
313 delete this;
314}
315
316const KURL &Loader::discoveredFeedURL() const
317{
318 return d->discoveredFeedURL;
319}
320
321void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
322{
323 d->lastError = d->retriever->errorCode();
324
325 delete d->retriever;
326 d->retriever = NULL;
327
328 Document rssDoc;
329 Status status = Success;
330
331 if (success) {
332 TQDomDocument doc;
333
334 /* Some servers insert whitespace before the <?xml...?> declaration.
335 * TQDom doesn't tolerate that (and it's right, that's invalid XML),
336 * so we strip that.
337 */
338
339 const char *charData = data.data();
340 int len = data.count();
341
342 while (len && TQChar(*charData).isSpace()) {
343 --len;
344 ++charData;
345 }
346
347 if ( len > 3 && TQChar(*charData) == TQChar(0357) ) { // 0357 0273 0277
348 len -= 3;
349 charData += 3;
350 }
351 TQByteArray tmpData;
352 tmpData.setRawData(charData, len);
353
354 if (doc.setContent(tmpData, /* namespaceProcessing */ true))
355 {
356 rssDoc = Document(doc);
357 if (!rssDoc.isValid())
358 {
359 discoverFeeds(tmpData);
360 status = ParseError;
361 }
362 }
363 else
364 {
365 discoverFeeds(tmpData);
366 status = ParseError;
367 }
368
369 tmpData.resetRawData(charData, len);
370 } else
371 status = RetrieveError;
372
373 emit loadingComplete(this, rssDoc, status);
374
375 delete this;
376}
377
378void Loader::discoverFeeds(const TQByteArray &data)
379{
380 TQString str = TQString(data).simplifyWhiteSpace();
381
382 TQStringList feeds;
383
384 FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str);
385
386 for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it)
387 {
388 feeds += (*it).url();
389 }
390
391 if (list.isEmpty())
393
394 TQString feed = feeds.first();
395 TQString host = d->url.host();
396 KURL testURL;
397 // loop through, prefer feeds on same host
398 TQStringList::Iterator end( feeds.end() );
399 for ( TQStringList::Iterator it = feeds.begin(); it != end; ++it)
400 {
401 testURL=*it;
402 if (testURL.host() == host)
403 {
404 feed = *it;
405 break;
406 }
407 }
408
409 d->discoveredFeedURL = feed.isNull() ? TQString() : FeedDetector::fixRelativeURL(feed, d->url);
410}
411
412#include "loader.moc"
Abstract baseclass for all data retriever classes.
Definition: loader.h:36
virtual ~DataRetriever()
Destructor.
Definition: loader.cpp:34
void dataRetrieved(const TQByteArray &data, bool success)
Emit this signal to tell the Loader class that the retrieval process was finished.
Represents a RSS document and provides all the features and properties as stored in it.
Definition: document.h:32
bool isValid() const
Definition: document.cpp:519
a class providing functions to detect linked feeds in HTML sources
Definition: feeddetector.h:56
static FeedDetectorEntryList extractFromLinkTags(const TQString &s)
searches an HTML page for feeds listed in <link> tags <link> tags with rel attribute values alterna...
static TQStringList extractBruteForce(const TQString &s)
searches an HTML page for slightly feed-like looking links and catches everything not running away qu...
virtual int errorCode() const
Definition: loader.cpp:135
virtual ~FileRetriever()
Destructor.
Definition: loader.cpp:67
FileRetriever()
Default constructor.
Definition: loader.cpp:62
virtual void retrieveData(const KURL &url)
Downloads the file referenced by the given URL and passes it's contents on to the Loader.
Definition: loader.cpp:93
void permanentRedirection(const KURL &url)
Signals a permanent redirection.
This class is the preferred way of loading RSS files.
Definition: loader.h:258
int errorCode() const
Retrieves the error code of the last loading process (if any), as reported by the employed data retre...
Definition: loader.cpp:299
void loadFrom(const KURL &url, DataRetriever *retriever)
Loads the RSS file referenced by the given URL using the specified retrieval algorithm.
Definition: loader.cpp:285
void loadingComplete(Loader *loader, Document doc, Status status)
This signal gets emitted when the loading process triggered by calling loadFrom() finished.
static Loader * create()
Constructs a Loader instance.
Definition: loader.cpp:263
virtual ~OutputRetriever()
Destructor.
Definition: loader.cpp:195
virtual void retrieveData(const KURL &url)
Executes the program referenced by the given URL and retrieves the data which the program prints to s...
Definition: loader.cpp:200
OutputRetriever()
Default constructor.
Definition: loader.cpp:190
virtual int errorCode() const
Definition: loader.cpp:218