akregator/src/librss

loader.cpp
1 /*
2  * loader.cpp
3  *
4  * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
5  *
6  * This program is distributed in the hope that it will be useful, but WITHOUT
7  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8  * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
9  * accompanying file 'COPYING'.
10  */
11 #include "loader.h"
12 #include "document.h"
13 #include "feeddetector.h"
14 
15 #include <tdeio/job.h>
16 #include <tdeprocess.h>
17 #include <kstaticdeleter.h>
18 #include <kurl.h>
19 #include <kdebug.h>
20 
21 #include <tqdom.h>
22 #include <tqbuffer.h>
23 #include <tqregexp.h>
24 #include <tqstring.h>
25 #include <tqstringlist.h>
26 #include <tqtimer.h>
27 
28 using namespace RSS;
29 
31 {
32 }
33 
35 {
36 }
37 
38 class FileRetriever::Private
39 {
40  public:
41 
42  Private()
43  : buffer(NULL),
44  lastError(0), job(NULL)
45  {
46  }
47 
48  ~Private()
49  {
50  delete buffer;
51  }
52 
53  TQBuffer *buffer;
54  int lastError;
55  TDEIO::Job *job;
56  static KStaticDeleter<TQString> userAgentsd;
57  static TQString* userAgent;
58 };
59 
60 KStaticDeleter<TQString> FileRetriever::Private::userAgentsd;
61 TQString* FileRetriever::Private::userAgent = 0L;
63  : d(new Private)
64 {
65 }
66 
68 {
69  delete d;
70 }
71 
72 bool FileRetriever::m_useCache = true;
73 
74 TQString FileRetriever::userAgent()
75 {
76  if (Private::userAgent == 0L)
77  FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
78  return *Private::userAgent;
79 }
80 
81 void FileRetriever::setUserAgent(const TQString &ua)
82 {
83  if (Private::userAgent == 0L)
84  FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
85  (*Private::userAgent) = ua;
86 }
87 
88 void FileRetriever::setUseCache(bool enabled)
89 {
90  m_useCache = enabled;
91 }
92 
93 void FileRetriever::retrieveData(const KURL &url)
94 {
95  if (d->buffer)
96  return;
97 
98  d->buffer = new TQBuffer;
99  d->buffer->open(IO_WriteOnly);
100 
101  KURL u=url;
102 
103  if (u.protocol()=="feed")
104  u.setProtocol("http");
105 
106  d->job = TDEIO::get(u, false, false);
107  d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
108 
109  TQString ua = userAgent();
110  if (!ua.isEmpty())
111  d->job->addMetaData("UserAgent", ua);
112 
113 
114  TQTimer::singleShot(1000*90, this, TQ_SLOT(slotTimeout()));
115 
116  connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
117  TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
118  connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), TQ_SLOT(slotResult(TDEIO::Job *)));
119  connect(d->job, TQ_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)),
120  TQ_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &)));
121 }
122 
123 void FileRetriever::slotTimeout()
124 {
125  abort();
126 
127  delete d->buffer;
128  d->buffer = NULL;
129 
130  d->lastError = TDEIO::ERR_SERVER_TIMEOUT;
131 
132  emit dataRetrieved(TQByteArray(), false);
133 }
134 
136 {
137  return d->lastError;
138 }
139 
140 void FileRetriever::slotData(TDEIO::Job *, const TQByteArray &data)
141 {
142  d->buffer->writeBlock(data.data(), data.size());
143 }
144 
145 void FileRetriever::slotResult(TDEIO::Job *job)
146 {
147  TQByteArray data = d->buffer->buffer();
148  data.detach();
149 
150  delete d->buffer;
151  d->buffer = NULL;
152 
153  d->lastError = job->error();
154  emit dataRetrieved(data, d->lastError == 0);
155 }
156 
157 void FileRetriever::slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &newUrl)
158 {
159  emit permanentRedirection(newUrl);
160 }
161 
162 void FileRetriever::abort()
163 {
164  if (d->job)
165  {
166  d->job->kill(true);
167  d->job = NULL;
168  }
169 }
170 
171 struct OutputRetriever::Private
172 {
173  Private() : process(NULL),
174  buffer(NULL),
175  lastError(0)
176  {
177  }
178 
179  ~Private()
180  {
181  delete process;
182  delete buffer;
183  }
184 
185  KShellProcess *process;
186  TQBuffer *buffer;
187  int lastError;
188 };
189 
191  d(new Private)
192 {
193 }
194 
196 {
197  delete d;
198 }
199 
200 void OutputRetriever::retrieveData(const KURL &url)
201 {
202  // Ignore subsequent calls if we didn't finish the previous job yet.
203  if (d->buffer || d->process)
204  return;
205 
206  d->buffer = new TQBuffer;
207  d->buffer->open(IO_WriteOnly);
208 
209  d->process = new KShellProcess();
210  connect(d->process, TQ_SIGNAL(processExited(TDEProcess *)),
211  TQ_SLOT(slotExited(TDEProcess *)));
212  connect(d->process, TQ_SIGNAL(receivedStdout(TDEProcess *, char *, int)),
213  TQ_SLOT(slotOutput(TDEProcess *, char *, int)));
214  *d->process << url.path();
215  d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
216 }
217 
219 {
220  return d->lastError;
221 }
222 
223 void OutputRetriever::slotOutput(TDEProcess *, char *data, int length)
224 {
225  d->buffer->writeBlock(data, length);
226 }
227 
228 void OutputRetriever::slotExited(TDEProcess *p)
229 {
230  if (!p->normalExit())
231  d->lastError = p->exitStatus();
232 
233  TQByteArray data = d->buffer->buffer();
234  data.detach();
235 
236  delete d->buffer;
237  d->buffer = NULL;
238 
239  delete d->process;
240  d->process = NULL;
241 
242  emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
243 }
244 
245 struct Loader::Private
246 {
247  Private() : retriever(NULL),
248  lastError(0)
249  {
250  }
251 
252  ~Private()
253  {
254  delete retriever;
255  }
256 
257  DataRetriever *retriever;
258  int lastError;
259  KURL discoveredFeedURL;
260  KURL url;
261 };
262 
264 {
265  return new Loader;
266 }
267 
268 Loader *Loader::create(TQObject *object, const char *slot)
269 {
270  Loader *loader = create();
271  connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)),
272  object, slot);
273  return loader;
274 }
275 
276 Loader::Loader() : d(new Private)
277 {
278 }
279 
280 Loader::~Loader()
281 {
282  delete d;
283 }
284 
285 void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
286 {
287  if (d->retriever != NULL)
288  return;
289 
290  d->url=url;
291  d->retriever = retriever;
292 
293  connect(d->retriever, TQ_SIGNAL(dataRetrieved(const TQByteArray &, bool)),
294  this, TQ_SLOT(slotRetrieverDone(const TQByteArray &, bool)));
295 
296  d->retriever->retrieveData(url);
297 }
298 
299 int Loader::errorCode() const
300 {
301  return d->lastError;
302 }
303 
304 void Loader::abort()
305 {
306  if (d && d->retriever)
307  {
308  d->retriever->abort();
309  delete d->retriever;
310  d->retriever=NULL;
311  }
312  emit loadingComplete(this, TQDomDocument(), Aborted);
313  delete this;
314 }
315 
316 const KURL &Loader::discoveredFeedURL() const
317 {
318  return d->discoveredFeedURL;
319 }
320 
321 void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
322 {
323  d->lastError = d->retriever->errorCode();
324 
325  delete d->retriever;
326  d->retriever = NULL;
327 
328  Document rssDoc;
329  Status status = Success;
330 
331  if (success) {
332  TQDomDocument doc;
333 
334  /* Some servers insert whitespace before the <?xml...?> declaration.
335  * TQDom doesn't tolerate that (and it's right, that's invalid XML),
336  * so we strip that.
337  */
338 
339  const char *charData = data.data();
340  int len = data.count();
341 
342  while (len && TQChar(*charData).isSpace()) {
343  --len;
344  ++charData;
345  }
346 
347  if ( len > 3 && TQChar(*charData) == TQChar(0357) ) { // 0357 0273 0277
348  len -= 3;
349  charData += 3;
350  }
351  TQByteArray tmpData;
352  tmpData.setRawData(charData, len);
353 
354  if (doc.setContent(tmpData))
355  {
356  rssDoc = Document(doc);
357  if (!rssDoc.isValid())
358  {
359  discoverFeeds(tmpData);
360  status = ParseError;
361  }
362  }
363  else
364  {
365  discoverFeeds(tmpData);
366  status = ParseError;
367  }
368 
369  tmpData.resetRawData(charData, len);
370  } else
371  status = RetrieveError;
372 
373  emit loadingComplete(this, rssDoc, status);
374 
375  delete this;
376 }
377 
378 void Loader::discoverFeeds(const TQByteArray &data)
379 {
380  TQString str = TQString(data).simplifyWhiteSpace();
381 
382  TQStringList feeds;
383 
384  FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str);
385 
386  for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it)
387  {
388  feeds += (*it).url();
389  }
390 
391  if (list.isEmpty())
392  feeds = FeedDetector::extractBruteForce(str);
393 
394  TQString feed = feeds.first();
395  TQString host = d->url.host();
396  KURL testURL;
397  // loop through, prefer feeds on same host
398  TQStringList::Iterator end( feeds.end() );
399  for ( TQStringList::Iterator it = feeds.begin(); it != end; ++it)
400  {
401  testURL=*it;
402  if (testURL.host() == host)
403  {
404  feed = *it;
405  break;
406  }
407  }
408 
409  d->discoveredFeedURL = feed.isNull() ? TQString() : FeedDetector::fixRelativeURL(feed, d->url);
410 }
411 
412 #include "loader.moc"
Abstract baseclass for all data retriever classes.
Definition: loader.h:36
DataRetriever()
Default constructor.
Definition: loader.cpp:30
virtual ~DataRetriever()
Destructor.
Definition: loader.cpp:34
void dataRetrieved(const TQByteArray &data, bool success)
Emit this signal to tell the Loader class that the retrieval process was finished.
Represents a RSS document and provides all the features and properties as stored in it.
Definition: document.h:32
bool isValid() const
Definition: document.cpp:519
a class providing functions to detect linked feeds in HTML sources
Definition: feeddetector.h:56
static FeedDetectorEntryList extractFromLinkTags(const TQString &s)
searches an HTML page for feeds listed in <link> tags <link> tags with rel attribute values alterna...
static TQStringList extractBruteForce(const TQString &s)
searches an HTML page for slightly feed-like looking links and catches everything not running away qu...
virtual int errorCode() const
Definition: loader.cpp:135
virtual ~FileRetriever()
Destructor.
Definition: loader.cpp:67
FileRetriever()
Default constructor.
Definition: loader.cpp:62
virtual void retrieveData(const KURL &url)
Downloads the file referenced by the given URL and passes it's contents on to the Loader.
Definition: loader.cpp:93
void permanentRedirection(const KURL &url)
Signals a permanent redirection.
This class is the preferred way of loading RSS files.
Definition: loader.h:258
int errorCode() const
Retrieves the error code of the last loading process (if any), as reported by the employed data retre...
Definition: loader.cpp:299
void loadFrom(const KURL &url, DataRetriever *retriever)
Loads the RSS file referenced by the given URL using the specified retrieval algorithm.
Definition: loader.cpp:285
void loadingComplete(Loader *loader, Document doc, Status status)
This signal gets emitted when the loading process triggered by calling loadFrom() finished.
static Loader * create()
Constructs a Loader instance.
Definition: loader.cpp:263
virtual ~OutputRetriever()
Destructor.
Definition: loader.cpp:195
virtual void retrieveData(const KURL &url)
Executes the program referenced by the given URL and retrieves the data which the program prints to s...
Definition: loader.cpp:200
OutputRetriever()
Default constructor.
Definition: loader.cpp:190
virtual int errorCode() const
Definition: loader.cpp:218