akregator/src/librss

document.cpp
1/*
2 * document.cpp
3 *
4 * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
5 *
6 * This program is distributed in the hope that it will be useful, but WITHOUT
7 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8 * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
9 * accompanying file 'COPYING'.
10 *
11 */
12#include "document.h"
13#include "article.h"
14#include "image.h"
15#include "textinput.h"
16#include "tools_p.h"
17
18#include <krfcdate.h>
19#include <kurl.h>
20
21#include <tqdatetime.h>
22#include <tqdom.h>
23#include <tqptrlist.h>
24
25#include <kdebug.h>
26
27using namespace RSS;
28
29struct Document::Private : public Shared
30{
31 Private() : version(v0_90), image(NULL), textInput(NULL), language(en)
32 {
33 format=UnknownFormat;
34 valid=false;
35 ttl=-1;
36 }
37
38 ~Private()
39 {
40 delete textInput;
41 delete image;
42 }
43
44 Version version;
45 TQString title;
46 TQString description;
47 KURL link;
48 Image *image;
49 TextInput *textInput;
50 Article::List articles;
51 Language language;
52 Format format;
53 TQString copyright;
54 TQDateTime pubDate;
55 TQDateTime lastBuildDate;
56 TQString rating;
57 KURL docs;
58 int ttl;
59 TQString managingEditor;
60 TQString webMaster;
61 HourList skipHours;
62 DayList skipDays;
63 bool valid;
64};
65
66Document::Document() : d(new Private)
67{
68}
69
70Document::Document(const Document &other) : d(0)
71{
72 *this = other;
73}
74
75static TQString extractLink(const TQDomNode& node, Format format)
76{
77 if (format == AtomFeed)
78 {
79 TQDomNode n;
80 for (n = node.firstChild(); !n.isNull(); n = n.nextSibling()) {
81 const TQDomElement e = n.toElement();
82 if ( (e.tagName() == TQString::fromLatin1("link"))
83 && (e.attribute(TQString::fromLatin1("rel"), TQString::fromLatin1("alternate")) == TQString::fromLatin1("alternate")))
84 {
85 return n.toElement().attribute(TQString::fromLatin1("href"));
86 }
87 }
88 }
89
90 return extractNode(node, TQString::fromLatin1("link"));
91
92}
93
94Document::Document(const TQDomDocument &doc) : d(new Private)
95{
96 TQString elemText;
97 TQDomNode rootNode = doc.documentElement();
98
99 // Determine the version of the present RSS markup.
100 TQString attr;
101
102 // we should probably check that it ISN'T feed or rss, rather than check if it is xhtml
103 if (rootNode.toElement().tagName()==TQString::fromLatin1("html"))
104 d->valid=false;
105 else
106 d->valid=true;
107
108 attr = rootNode.toElement().attribute(TQString::fromLatin1("version"), TQString());
109 if (rootNode.toElement().tagName()==TQString::fromLatin1("feed"))
110 {
111 d->format=AtomFeed;
112 if (attr == TQString::fromLatin1("0.3"))
113 d->version = vAtom_0_3;
114 else if (attr == TQString::fromLatin1("0.2")) /* smt -> review */
115 d->version = vAtom_0_2;
116 else if (attr == TQString::fromLatin1("0.1")) /* smt -> review */
117 d->version = vAtom_0_1;
118 else
119 d->version = vAtom_1_0;
120 }
121 else
122 {
123 d->format=RSSFeed;
124 if (attr == TQString::fromLatin1("0.91"))
125 d->version = v0_91;
126 else if (attr == TQString::fromLatin1("0.92"))
127 d->version = v0_92;
128 else if (attr == TQString::fromLatin1("0.93"))
129 d->version = v0_93;
130 else if (attr == TQString::fromLatin1("0.94"))
131 d->version = v0_94;
132 else // otherwise, we just assume a RSS2 compatible feed. As rss2 is generally
133 // backward-compatible, this should work
134 d->version = v2_0;
135 }
136
137
138 if (d->format==UnknownFormat)
139 {
140 attr = rootNode.toElement().attribute(TQString::fromLatin1("xmlns"), TQString());
141 if (!attr.isNull()) {
142 /*
143 * Hardcoding these URLs is actually a bad idea, since the DTD doesn't
144 * dictate a specific namespace. Still, most RSS files seem to use
145 * these two, so I'll go for them now. If it turns out that many
146 * mirrors of this RSS namespace are in use, I'll probably have to
147 * distinguish the RSS versions by analyzing the relationship between
148 * the nodes.
149 */
150 if (attr == TQString::fromLatin1("http://my.netscape.com/rdf/simple/0.9/")) {
151 d->format=RSSFeed;
152 d->version = v0_90;
153 }
154 else if (attr == TQString::fromLatin1("http://purl.org/rss/1.0/")) {
155 d->format=RSSFeed;
156 d->version = v1_0;
157 }
158 }
159 }
160
161 TQDomNode channelNode;
162
163 if (d->format == AtomFeed)
164 channelNode=rootNode;
165 else
166 channelNode=rootNode.namedItem(TQString::fromLatin1("channel"));
167
168 if (!(elemText = extractTitle(channelNode)).isNull())
169 d->title = elemText;
170 TQString descriptionTagName = "description";
171
172 if (d->format == AtomFeed)
173 {
174 if (d->version == vAtom_1_0)
175 descriptionTagName = "subtitle";
176 else
177 descriptionTagName = "tagline";
178 }
179
180 if (!(elemText = extractNode(channelNode, descriptionTagName)).isNull())
181 d->description = elemText;
182
183 d->link = extractLink(channelNode, d->format);
184
185
186 /* This is ugly but necessary since RSS 0.90 and 1.0 have a different parent
187 * node for <image>, <textinput> and <item> than RSS 0.91-0.94 and RSS 2.0.
188 */
189 TQDomNode parentNode;
190 if (d->version == v0_90 || d->version == v1_0 || d->format == AtomFeed)
191 parentNode = rootNode;
192 else
193 {
194 // following is a HACK for broken 0.91 feeds like xanga.com's
195 if (!rootNode.namedItem(TQString::fromLatin1("item")).isNull())
196 parentNode = rootNode;
197 else
198 parentNode = channelNode;
199 }
200
201 // image and textinput aren't supported by Atom.. handle in case feed provides
202 TQDomNode n = parentNode.namedItem(TQString::fromLatin1("image"));
203 if (!n.isNull())
204 d->image = new Image(n);
205
206 n = parentNode.namedItem(TQString::fromLatin1("textinput"));
207 if (!n.isNull())
208 d->textInput = new TextInput(n);
209
210 // Our (hopefully faster) version of elementsByTagName()
211 TQString tagName;
212 if (d->format == AtomFeed)
213 tagName=TQString::fromLatin1("entry");
214 else
215 tagName=TQString::fromLatin1("item");
216
217 for (n = parentNode.firstChild(); !n.isNull(); n = n.nextSibling()) {
218 const TQDomElement e = n.toElement();
219 if (e.tagName() == tagName)
220 d->articles.append(Article(e, d->format, d->version));
221 }
222
223 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("copyright"))).isNull())
224 d->copyright = elemText;
225
226 if (d->format == AtomFeed)
227 elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString());
228 else
229 elemText = extractNode(channelNode, TQString::fromLatin1("language"));
230
231 if (!elemText.isNull()){
232 if (elemText == TQString::fromLatin1("af"))
233 d->language = af;
234 else if (elemText == TQString::fromLatin1("sq"))
235 d->language = sq;
236 else if (elemText == TQString::fromLatin1("eu"))
237 d->language = eu;
238 else if (elemText == TQString::fromLatin1("be"))
239 d->language = be;
240 else if (elemText == TQString::fromLatin1("bg"))
241 d->language = bg;
242 else if (elemText == TQString::fromLatin1("ca"))
243 d->language = ca;
244 else if (elemText == TQString::fromLatin1("zh-cn"))
245 d->language = zh_cn;
246 else if (elemText == TQString::fromLatin1("zh-tw"))
247 d->language = zh_tw;
248 else if (elemText == TQString::fromLatin1("hr"))
249 d->language = hr;
250 else if (elemText == TQString::fromLatin1("cs"))
251 d->language = cs;
252 else if (elemText == TQString::fromLatin1("da"))
253 d->language = da;
254 else if (elemText == TQString::fromLatin1("nl"))
255 d->language = nl;
256 else if (elemText == TQString::fromLatin1("nl-be"))
257 d->language = nl_be;
258 else if (elemText == TQString::fromLatin1("nl-nl"))
259 d->language = nl_nl;
260 else if (elemText == TQString::fromLatin1("en"))
261 d->language = en;
262 else if (elemText == TQString::fromLatin1("en-au"))
263 d->language = en_au;
264 else if (elemText == TQString::fromLatin1("en-bz"))
265 d->language = en_bz;
266 else if (elemText == TQString::fromLatin1("en-ca"))
267 d->language = en_ca;
268 else if (elemText == TQString::fromLatin1("en-ie"))
269 d->language = en_ie;
270 else if (elemText == TQString::fromLatin1("en-jm"))
271 d->language = en_jm;
272 else if (elemText == TQString::fromLatin1("en-nz"))
273 d->language = en_nz;
274 else if (elemText == TQString::fromLatin1("en-ph"))
275 d->language = en_ph;
276 else if (elemText == TQString::fromLatin1("en-za"))
277 d->language = en_za;
278 else if (elemText == TQString::fromLatin1("en-tt"))
279 d->language = en_tt;
280 else if (elemText == TQString::fromLatin1("en-gb"))
281 d->language = en_gb;
282 else if (elemText == TQString::fromLatin1("en-us"))
283 d->language = en_us;
284 else if (elemText == TQString::fromLatin1("en-zw"))
285 d->language = en_zw;
286 else if (elemText == TQString::fromLatin1("fo"))
287 d->language = fo;
288 else if (elemText == TQString::fromLatin1("fi"))
289 d->language = fi;
290 else if (elemText == TQString::fromLatin1("fr"))
291 d->language = fr;
292 else if (elemText == TQString::fromLatin1("fr-be"))
293 d->language = fr_be;
294 else if (elemText == TQString::fromLatin1("fr-ca"))
295 d->language = fr_ca;
296 else if (elemText == TQString::fromLatin1("fr-fr"))
297 d->language = fr_fr;
298 else if (elemText == TQString::fromLatin1("fr-lu"))
299 d->language = fr_lu;
300 else if (elemText == TQString::fromLatin1("fr-mc"))
301 d->language = fr_mc;
302 else if (elemText == TQString::fromLatin1("fr-ch"))
303 d->language = fr_ch;
304 else if (elemText == TQString::fromLatin1("gl"))
305 d->language = gl;
306 else if (elemText == TQString::fromLatin1("gd"))
307 d->language = gd;
308 else if (elemText == TQString::fromLatin1("de"))
309 d->language = de;
310 else if (elemText == TQString::fromLatin1("de-at"))
311 d->language = de_at;
312 else if (elemText == TQString::fromLatin1("de-de"))
313 d->language = de_de;
314 else if (elemText == TQString::fromLatin1("de-li"))
315 d->language = de_li;
316 else if (elemText == TQString::fromLatin1("de-lu"))
317 d->language = de_lu;
318 else if (elemText == TQString::fromLatin1("de-ch"))
319 d->language = de_ch;
320 else if (elemText == TQString::fromLatin1("el"))
321 d->language = el;
322 else if (elemText == TQString::fromLatin1("hu"))
323 d->language = hu;
324 else if (elemText == TQString::fromLatin1("is"))
325 d->language = is;
326 else if (elemText == TQString::fromLatin1("id"))
327 d->language = id;
328 else if (elemText == TQString::fromLatin1("ga"))
329 d->language = ga;
330 else if (elemText == TQString::fromLatin1("it"))
331 d->language = it;
332 else if (elemText == TQString::fromLatin1("it-it"))
333 d->language = it_it;
334 else if (elemText == TQString::fromLatin1("it-ch"))
335 d->language = it_ch;
336 else if (elemText == TQString::fromLatin1("ja"))
337 d->language = ja;
338 else if (elemText == TQString::fromLatin1("ko"))
339 d->language = ko;
340 else if (elemText == TQString::fromLatin1("mk"))
341 d->language = mk;
342 else if (elemText == TQString::fromLatin1("no"))
343 d->language = no;
344 else if (elemText == TQString::fromLatin1("pl"))
345 d->language = pl;
346 else if (elemText == TQString::fromLatin1("pt"))
347 d->language = pt;
348 else if (elemText == TQString::fromLatin1("pt-br"))
349 d->language = pt_br;
350 else if (elemText == TQString::fromLatin1("pt-pt"))
351 d->language = pt_pt;
352 else if (elemText == TQString::fromLatin1("ro"))
353 d->language = ro;
354 else if (elemText == TQString::fromLatin1("ro-mo"))
355 d->language = ro_mo;
356 else if (elemText == TQString::fromLatin1("ro-ro"))
357 d->language = ro_ro;
358 else if (elemText == TQString::fromLatin1("ru"))
359 d->language = ru;
360 else if (elemText == TQString::fromLatin1("ru-mo"))
361 d->language = ru_mo;
362 else if (elemText == TQString::fromLatin1("ru-ru"))
363 d->language = ru_ru;
364 else if (elemText == TQString::fromLatin1("sr"))
365 d->language = sr;
366 else if (elemText == TQString::fromLatin1("sk"))
367 d->language = sk;
368 else if (elemText == TQString::fromLatin1("sl"))
369 d->language = sl;
370 else if (elemText == TQString::fromLatin1("es"))
371 d->language = es;
372 else if (elemText == TQString::fromLatin1("es-ar"))
373 d->language = es_ar;
374 else if (elemText == TQString::fromLatin1("es-bo"))
375 d->language = es_bo;
376 else if (elemText == TQString::fromLatin1("es-cl"))
377 d->language = es_cl;
378 else if (elemText == TQString::fromLatin1("es-co"))
379 d->language = es_co;
380 else if (elemText == TQString::fromLatin1("es-cr"))
381 d->language = es_cr;
382 else if (elemText == TQString::fromLatin1("es-do"))
383 d->language = es_do;
384 else if (elemText == TQString::fromLatin1("es-ec"))
385 d->language = es_ec;
386 else if (elemText == TQString::fromLatin1("es-sv"))
387 d->language = es_sv;
388 else if (elemText == TQString::fromLatin1("es-gt"))
389 d->language = es_gt;
390 else if (elemText == TQString::fromLatin1("es-hn"))
391 d->language = es_hn;
392 else if (elemText == TQString::fromLatin1("es-mx"))
393 d->language = es_mx;
394 else if (elemText == TQString::fromLatin1("es-ni"))
395 d->language = es_ni;
396 else if (elemText == TQString::fromLatin1("es-pa"))
397 d->language = es_pa;
398 else if (elemText == TQString::fromLatin1("es-py"))
399 d->language = es_py;
400 else if (elemText == TQString::fromLatin1("es-pe"))
401 d->language = es_pe;
402 else if (elemText == TQString::fromLatin1("es-pr"))
403 d->language = es_pr;
404 else if (elemText == TQString::fromLatin1("es-es"))
405 d->language = es_es;
406 else if (elemText == TQString::fromLatin1("es-uy"))
407 d->language = es_uy;
408 else if (elemText == TQString::fromLatin1("es-ve"))
409 d->language = es_ve;
410 else if (elemText == TQString::fromLatin1("sv"))
411 d->language = sv;
412 else if (elemText == TQString::fromLatin1("sv-fi"))
413 d->language = sv_fi;
414 else if (elemText == TQString::fromLatin1("sv-se"))
415 d->language = sv_se;
416 else if (elemText == TQString::fromLatin1("tr"))
417 d->language = tr;
418 else if (elemText == TQString::fromLatin1("uk"))
419 d->language = uk;
420 else
421 d->language = UndefinedLanguage;
422 }
423
424 if (d->format == AtomFeed)
425 tagName=TQString::fromLatin1("issued"); // atom doesn't specify this for feeds
426 // but some broken feeds do this
427 else
428 tagName=TQString::fromLatin1("pubDate");
429
430 if (!(elemText = extractNode(channelNode, tagName)).isNull()) {
431 time_t _time;
432
433 if (d->format == AtomFeed)
434 _time=parseISO8601Date(elemText);
435 else
436 _time=KRFCDate::parseDate(elemText);
437 /* \bug This isn't really the right way since it will set the date to
438 * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that
439 * we cannot distinguish between that date, and invalid values. :-/
440 */
441 d->pubDate.setTime_t(_time);
442 }
443
444 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) {
445 time_t _time = parseISO8601Date(elemText);
446 /* \bug This isn't really the right way since it will set the date to
447 * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that
448 * we cannot distinguish between that date, and invalid values. :-/
449 */
450 d->pubDate.setTime_t(_time);
451 }
452
453 if (d->format == AtomFeed)
454 tagName=TQString::fromLatin1("modified");
455 else
456 tagName=TQString::fromLatin1("lastBuildDate");
457 if (!(elemText = extractNode(channelNode, tagName)).isNull()) {
458 time_t _time;
459 if (d->format == AtomFeed)
460 _time = parseISO8601Date(elemText);
461 else
462 _time = KRFCDate::parseDate(elemText);
463 d->lastBuildDate.setTime_t(_time);
464 }
465
466 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("rating"))).isNull())
467 d->rating = elemText;
468 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("docs"))).isNull())
469 d->docs = elemText;
470 if (!(elemText = extractNode(channelNode, TQString::fromLatin1((d->format == AtomFeed) ? "author" : "managingEditor"))).isNull())
471 d->managingEditor = elemText;
472 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("webMaster"))).isNull())
473 d->webMaster = elemText;
474
475 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("ttl"))).isNull())
476 d->ttl = elemText.toUInt();
477
478 n = channelNode.namedItem(TQString::fromLatin1("skipHours"));
479 if (!n.isNull())
480 for (TQDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement())
481 if (e.tagName() == TQString::fromLatin1("hour"))
482 d->skipHours.append(e.text().toUInt());
483
484 n = channelNode.namedItem(TQString::fromLatin1("skipDays"));
485 if (!n.isNull()) {
486 Day day;
487 TQString elemText;
488 for (TQDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement())
489 if (e.tagName() == TQString::fromLatin1("day")) {
490 elemText = e.text().lower();
491 if (elemText == TQString::fromLatin1("monday"))
492 day = Monday;
493 else if (elemText == TQString::fromLatin1("tuesday"))
494 day = Tuesday;
495 else if (elemText == TQString::fromLatin1("wednesday"))
496 day = Wednesday;
497 else if (elemText == TQString::fromLatin1("thursday"))
498 day = Thursday;
499 else if (elemText == TQString::fromLatin1("friday"))
500 day = Friday;
501 else if (elemText == TQString::fromLatin1("saturday"))
502 day = Saturday;
503 else if (elemText == TQString::fromLatin1("sunday"))
504 day = Sunday;
505 else
506 day = UndefinedDay;
507 if (day != UndefinedDay)
508 d->skipDays.append(day);
509 }
510 }
511}
512
514{
515 if (d->deref())
516 delete d;
517}
518
520{
521 return d->valid;
522}
523
524Version Document::version() const
525{
526 return d->version;
527}
528
529TQString Document::verbVersion() const
530{
531 switch (d->version) {
532 case v0_90: return TQString::fromLatin1("0.90");
533 case v0_91: return TQString::fromLatin1("0.91");
534 case v0_92: return TQString::fromLatin1("0.92");
535 case v0_93: return TQString::fromLatin1("0.93");
536 case v0_94: return TQString::fromLatin1("0.94");
537 case v1_0: return TQString::fromLatin1("1.0");
538 case v2_0: return TQString::fromLatin1("2.0");
539 case vAtom_0_3: return TQString::fromLatin1("0.3");
540 case vAtom_0_2: return TQString::fromLatin1("0.2");
541 case vAtom_0_1: return TQString::fromLatin1("0.1");
542 case vAtom_1_0: return TQString::fromLatin1("1.0");
543 }
544 return TQString();
545}
546
547TQString Document::title() const
548{
549 return d->title;
550}
551
552TQString Document::description() const
553{
554 return d->description;
555}
556
557const KURL &Document::link() const
558{
559 return d->link;
560}
561
563{
564 return d->image;
565}
566
567const Image *Document::image() const
568{
569 return d->image;
570}
571
573{
574 return d->textInput;
575}
576
578{
579 return d->textInput;
580}
581
583{
584 return d->articles;
585}
586
587Language Document::language() const
588{
589 return d->language;
590}
591
592TQString Document::copyright() const
593{
594 return d->copyright;
595}
596
597const TQDateTime &Document::pubDate() const
598{
599 return d->pubDate;
600}
601
602const TQDateTime &Document::lastBuildDate() const
603{
604 return d->lastBuildDate;
605}
606
607TQString Document::rating() const
608{
609 return d->rating;
610}
611
612const KURL &Document::docs() const
613{
614 return d->docs;
615}
616
618{
619 return d->managingEditor;
620}
621
622TQString Document::webMaster() const
623{
624 return d->webMaster;
625}
626
627const HourList &Document::skipHours() const
628{
629 return d->skipHours;
630}
631
632const DayList &Document::skipDays() const
633{
634 return d->skipDays;
635}
636
637int Document::ttl() const
638{
639 return d->ttl;
640}
641
643{
644 if (this != &other) {
645 other.d->ref();
646 if (d && d->deref())
647 delete d;
648 d = other.d;
649 }
650 return *this;
651}
Represents an article as stored in a RSS file.
Definition: article.h:38
TQValueList< Article > List
A list of articles.
Definition: article.h:43
Represents a RSS document and provides all the features and properties as stored in it.
Definition: document.h:32
Image * image()
RSS 0.90 and upwards.
Definition: document.cpp:562
TQString verbVersion() const
Convenience method.
Definition: document.cpp:529
const DayList & skipDays() const
RSS 0.91 and upwards.
Definition: document.cpp:632
Language language() const
RSS 0.91 and upwards.
Definition: document.cpp:587
const TQDateTime & pubDate() const
RSS 0.91 and upwards.
Definition: document.cpp:597
TQString description() const
RSS 0.90 and upwards.
Definition: document.cpp:552
const KURL & link() const
RSS 0.90 and upwards.
Definition: document.cpp:557
TQString rating() const
RSS 0.91 and upwards.
Definition: document.cpp:607
TQString managingEditor() const
RSS 0.91 and upwards.
Definition: document.cpp:617
Document & operator=(const Document &other)
Assignment operator.
Definition: document.cpp:642
TQString webMaster() const
RSS 0.91 and upwards.
Definition: document.cpp:622
bool isValid() const
Definition: document.cpp:519
TQString title() const
RSS 0.90 and upwards.
Definition: document.cpp:547
const TQDateTime & lastBuildDate() const
RSS 0.91 and upwards.
Definition: document.cpp:602
const KURL & docs() const
RSS 0.91 and upwards.
Definition: document.cpp:612
~Document()
Destructor.
Definition: document.cpp:513
const HourList & skipHours() const
RSS 0.91 and upwards.
Definition: document.cpp:627
Document()
Default constructor.
Definition: document.cpp:66
TQString copyright() const
RSS 0.91 and upwards.
Definition: document.cpp:592
Version version() const
Definition: document.cpp:524
const Article::List & articles() const
RSS 0.90 and upwards.
Definition: document.cpp:582
TextInput * textInput()
RSS 0.90 and upwards.
Definition: document.cpp:572
Represents an image as stored in a RSS file.
Definition: image.h:35
Represents a text input facility as stored in a RSS file for the purpose of allowing users to submit ...
Definition: textinput.h:31