1 /++
2 	RSS/Atom feed reading
3 
4 	References:
5 	$(LIST
6 		* https://cyber.harvard.edu/rss/rss.html
7 		* http://www.rssboard.org/rss-specification
8 		* https://tools.ietf.org/html/rfc4287
9 		* https://en.wikipedia.org/wiki/Atom_(Web_standard)
10 	)
11 +/
12 module arsd.rss;
13 
14 import arsd.dom;
15 
16 /// generic subset of rss and atom, normalized for easy consumption
17 struct Feed {
18 	string title; ///
19 	string description; ///
20 	string lastUpdated; ///
21 
22 	///
23 	static struct Item {
24 		string title; ///
25 		string link; ///
26 		string description; /// could be html or text!
27 		string author; /// Typical format: email (name)
28 		string publicationDate; /// the format is 2005-07-31T12:29:29Z
29 		string lastUpdatedDate; /// the format is 2005-07-31T12:29:29Z
30 		string guid; ///
31 
32 		string enclosureUri; ///
33 		string enclosureType; /// a mime type
34 		string enclosureSize; ///
35 	}
36 
37 	Item[] items; ///
38 }
39 
40 /+
41 import arsd.cgi;
42 mixin GenericMain!handler;
43 void handler(Cgi cgi) {
44 	cgi.setResponseContentType("application/atom+xml");
45 	cgi.write(feedToAtom(parseFeed(Document.fromUrl("http://dpldocs.info/this-week-in-d/twid.rss", true).root)).toString);
46 }
47 +/
48 
49 /++
50 	Turns a generic feed back into an Atom document.
51 
52 	History:
53 		Added March 18, 2021
54 +/
55 XmlDocument feedToAtom(Feed feed) {
56 	auto document = new XmlDocument(`<feed xmlns="http://www.w3.org/2005/Atom"></feed>`);
57 	document.root.addChild("title", feed.title);
58 	document.root.addChild("subtitle", feed.description);
59 	document.root.addChild("updated", feed.lastUpdated);
60 
61 	foreach(item; feed.items) {
62 		auto entry = document.root.addChild("entry");
63 		entry.addChild("title", item.title);
64 		entry.addChild("link").setAttribute("href", item.link);
65 		if(item.enclosureUri.length)
66 			entry.addChild("link").
67 				setAttribute("rel", "enclosure").
68 				setAttribute("href", item.enclosureUri).
69 				setAttribute("length", item.enclosureSize).
70 				setAttribute("type", item.enclosureType);
71 		entry.addChild("id", item.guid);
72 		entry.addChild("published", item.publicationDate);
73 		entry.addChild("updated", item.lastUpdatedDate);
74 		entry.addChild("content", item.description).setAttribute("type", "html"); // or summary? idk
75 		if(item.author.length) {
76 			auto author = entry.addChild("author");
77 			import std.string;
78 			auto idx = item.author.indexOf("(");
79 			if(idx == -1) {
80 				author.addChild("email", item.author);
81 			} else {
82 				if(item.author.length > idx + 2)
83 					author.addChild("name", item.author[idx + 1 .. $-1]);
84 				author.addChild("email", item.author[0 .. idx -1]);
85 			}
86 		}
87 	}
88 
89 	return document;
90 }
91 
92 ///
93 enum FeedType {
94 	unknown, ///
95 	rss, ///
96 	atom ///
97 }
98 
99 ///
100 FeedType identifyFeed(Element e) {
101 	assert(e !is null);
102 
103 	if(e.tagName == "rss")
104 		return FeedType.rss;
105 	if(e.tagName == "feed" || e.tagName == "atom:feed")
106 		return FeedType.atom;
107 
108 	return FeedType.unknown;
109 }
110 
111 /// Parses a feed generically
112 Feed parseFeed(Element e) {
113 	final switch(identifyFeed(e)) {
114 		case FeedType.unknown:
115 			throw new Exception("Unknown feed type");
116 		case FeedType.rss:
117 			return parseRss(e).toGenericFeed();
118 		case FeedType.atom:
119 			return parseAtom(e).toGenericFeed();
120 	}
121 }
122 
123 // application/rss+xml
124 // though some use text/rss+xml or application/rdf+xml
125 
126 // root node of <rss version="whatever">
127 
128 struct RssChannel {
129 	string title;
130 	string link;
131 	string description;
132 	string lastBuildDate; // last time content in here changed
133 	string pubDate; // format like "Sat, 07 Sep 2002 00:00:01 GMT" when it officially changes
134 	string docs; // idk?
135 
136 	string cloud; // has domain, port, path, registerProcedure, protocol
137 
138 	string language; // optional
139 	string copyright;
140 	string managingEditor;
141 	string webMaster;
142 
143 	string category;
144 
145 	string ttl; // in minutes, if present
146 
147 	RssImage image;
148 
149 	RssItem[] items;
150 
151 	Feed toGenericFeed() {
152 		Feed f;
153 		f.title = this.title;
154 		f.description = this.description; // FIXME text vs html?
155 		f.lastUpdated = this.lastBuildDate.rssDateToAtom;
156 
157 		foreach(item; items) {
158 			Feed.Item fi;
159 
160 			fi.title = item.title;
161 			fi.link = item.link;
162 			fi.description = item.description; // FIXME: try to normalize text vs html
163 			fi.author = item.author; // FIXME
164 			fi.lastUpdatedDate = fi.publicationDate = item.pubDate.rssDateToAtom;
165 			fi.guid = item.guid;
166 			//fi.lastUpdatedDate; // not available i think
167 
168 			fi.enclosureUri = item.enclosure.url;
169 			fi.enclosureType = item.enclosure.type;
170 			fi.enclosureSize = item.enclosure.length;
171 
172 			f.items ~= fi;
173 		}
174 		return f;
175 	}
176 }
177 
178 struct RssImage {
179 	string title; /// img alt
180 	string url; /// like the img src
181 	string link; /// like a href
182 	string width;
183 	string height;
184 	string description; /// img title
185 }
186 
187 struct RssItem {
188 	string title;
189 	string link;
190 	string description; // may have html!
191 
192 	string author;
193 	string category;
194 	string comments; // a link
195 
196 	string pubDate;
197 	string guid;
198 
199 	RssSource source;
200 	RssEnclosure enclosure;
201 }
202 
203 struct RssEnclosure {
204 	string url;
205 	string length;
206 	string type;
207 }
208 
209 struct RssSource {
210 	string title;
211 	string url;
212 }
213 
214 
215 /++
216 	Parses RSS into structs. Requires the element to be RSS; if you are unsure
217 	of the type and want a generic response, use parseFeed instead.
218 +/
219 RssChannel parseRss(Element element) {
220 	assert(element !is null && element.tagName == "rss");
221 	RssChannel c;
222 	element = element.requireSelector(" > channel");
223 	foreach(memberName; __traits(allMembers, RssChannel)) {
224 		static if(memberName == "image") {
225 			if(auto image = element.querySelector(" > image")) {
226 				RssImage i;
227 				foreach(mn; __traits(allMembers, RssImage)) {
228 					__traits(getMember, i, mn) = image.optionSelector(" > " ~ mn).innerText;
229 				}
230 				c.image = i;
231 			}
232 		} else static if(memberName == "items") {
233 			foreach(item; element.querySelectorAll(" > item")) {
234 				RssItem i;
235 				foreach(mn; __traits(allMembers, RssItem)) {
236 					static if(mn == "source") {
237 						if(auto s = item.querySelector(" > source")) {
238 							i.source.title = s.innerText;
239 							i.source.url = s.attrs.url;
240 						}
241 					} else static if(mn == "enclosure") {
242 						if(auto s = item.querySelector(" > enclosure")) {
243 							i.enclosure.url = s.attrs.url;
244 							i.enclosure.type = s.attrs.type;
245 							i.enclosure.length = s.attrs.length;
246 						}
247 					} else {
248 						__traits(getMember, i, mn) = item.optionSelector(" > " ~ mn).innerText;
249 					}
250 				}
251 				c.items ~= i;
252 			}
253 		} else static if(is(typeof( __traits(getMember, c, memberName).offsetof))) {
254 			__traits(getMember, c, memberName) = element.optionSelector(" > " ~ memberName).innerText;
255 		}
256 	}
257 
258 	return c;
259 }
260 
261 ///
262 RssChannel parseRss(string s) {
263 	auto document = new Document(s, true, true);
264 	return parseRss(document.root);
265 }
266 
267 /*
268 struct SyndicationInfo {
269 	string updatePeriod; // sy:updatePeriod
270 	string updateFrequency;
271 	string updateBase;
272 
273 	string skipHours; // stored as <hour> elements
274 	string skipDays; // stored as <day> elements
275 }
276 */
277 
278 
279 // /////////////////// atom ////////////////////
280 
281 // application/atom+xml
282 
283 /+ rss vs atom
284 	date format is different
285 	atom:xxx links
286 
287 	root node is <feed>, organization has no <channel>, and <entry>
288 	instead of <item>
289 +/
290 
291 /++
292 
293 +/
294 struct AtomFeed {
295 	string title; /// has a type attribute - text or html
296 	string subtitle; /// has a type attribute
297 
298 	string updated; /// io string
299 
300 	string id; ///
301 	string link; /// i want the text/html type really, certainly not rel=self
302 	string rights; ///
303 	string generator; ///
304 
305 	AtomEntry[] entries; ///
306 
307 	///
308 	Feed toGenericFeed() {
309 		Feed feed;
310 
311 		feed.title = this.title;
312 		feed.description = this.subtitle;
313 		feed.lastUpdated = this.updated;
314 
315 		foreach(entry; this.entries) {
316 			Feed.Item item;
317 
318 			item.title = entry.title;
319 			item.link = entry.link;
320 			if(entry.content.html.length || entry.content.text.length)
321 				item.description = entry.content.html.length ? entry.content.html : entry.content.text; // FIXME
322 			else
323 				item.description = entry.summary.html.length ? entry.summary.html : entry.summary.text; // FIXME
324 			item.author = entry.author.email;
325 			if(entry.author.name.length)
326 				item.author ~= " (" ~ entry.author.name ~ ")";
327 			item.publicationDate = entry.published;
328 			item.lastUpdatedDate = entry.updated;
329 			item.guid = entry.id;
330 
331 			item.enclosureUri = entry.enclosure.url;
332 			item.enclosureType = entry.enclosure.type;
333 			item.enclosureSize = entry.enclosure.length;
334 
335 			feed.items ~= item;
336 		}
337 
338 		return feed;
339 	}
340 }
341 
342 ///
343 struct AtomEntry {
344 	string title; ///
345 	string link; /// the alternate
346 	AtomEnclosure enclosure; ///
347 	string id; ///
348 	string updated; ///
349 	string published; ///
350 
351 	AtomPerson author; ///
352 	AtomPerson[] contributors; ///
353 	AtomContent content; /// // should check type. may also have a src element for a link. type of html is escaped, type of xhtml is embedded.
354 	AtomContent summary; ///
355 }
356 
357 ///
358 struct AtomEnclosure {
359 	string url; ///
360 	string length; ///
361 	string type; ///
362 }
363 
364 
365 ///
366 struct AtomContent {
367 	string text; ///
368 	string html; ///
369 }
370 
371 ///
372 struct AtomPerson {
373 	string name; ///
374 	string uri; ///
375 	string email; ///
376 }
377 
378 ///
379 AtomFeed parseAtom(Element ele) {
380 	AtomFeed af;
381 	af.title = ele.optionSelector(` > title, > atom\:title`).innerText;
382 	af.subtitle = ele.optionSelector(` > subtitle, > atom\:subtitle`).innerText;
383 	af.id = ele.optionSelector(` > id, > atom\:id`).innerText;
384 	af.updated = ele.optionSelector(` > updated, > atom\:updated`).innerText;
385 	af.rights = ele.optionSelector(` > rights, > atom\:rights`).innerText;
386 	af.generator = ele.optionSelector(` > generator, > atom\:generator`).innerText;
387 	af.link = ele.optionSelector(` > link:not([rel])`).getAttribute("href");
388 
389 	foreach(entry; ele.querySelectorAll(` > entry`)) {
390 		AtomEntry ae;
391 
392 		ae.title = entry.optionSelector(` > title, > atom\:title`).innerText;
393 		ae.updated = entry.optionSelector(` > updated, > atom\:updated`).innerText;
394 		ae.published = entry.optionSelector(` > published, > atom\:published`).innerText;
395 		ae.id = entry.optionSelector(` > id, > atom\:id`).innerText;
396 
397 		ae.link = entry.optionSelector(` > link:not([rel]), > link[rel=alternate], > link[type="type/html"]`).getAttribute("href");
398 
399 		if(auto enclosure = entry.querySelector(` > link[rel=enclosure]`)) {
400 			ae.enclosure.url = enclosure.attrs.href;
401 			ae.enclosure.length = enclosure.attrs.length;
402 			ae.enclosure.type = enclosure.attrs.type;
403 		}
404 
405 		if(auto author = entry.querySelector(` > author`)) {
406 			ae.author.name = author.optionSelector(` > name`).innerText;
407 			ae.author.uri = author.optionSelector(` > uri`).innerText;
408 			ae.author.email = author.optionSelector(` > email`).innerText;
409 		}
410 
411 		foreach(contributor; entry.querySelectorAll(` > contributor`)) {
412 			AtomPerson c;
413 			c.name = contributor.optionSelector(` > name`).innerText;
414 			c.uri = contributor.optionSelector(` > uri`).innerText;
415 			c.email = contributor.optionSelector(` > email`).innerText;
416 			ae.contributors ~= c;
417 		}
418 
419 		if(auto e = entry.querySelector("content[type=xhtml]"))
420 			ae.content.html = e.innerHTML;
421 		if(auto e = entry.querySelector("content[type=html]"))
422 			ae.content.html = e.innerText;
423 		if(auto e = entry.querySelector("content[type=text], content:not([type])"))
424 			ae.content.text = e.innerText;
425 
426 		if(auto e = entry.querySelector("summary[type=xhtml]"))
427 			ae.summary.html = e.innerHTML;
428 		if(auto e = entry.querySelector("summary[type=html]"))
429 			ae.summary.html = e.innerText;
430 		if(auto e = entry.querySelector("summary[type=text], summary:not([type])"))
431 			ae.summary.text = e.innerText;
432 
433 		af.entries ~= ae;
434 	}
435 
436 	return af;
437 }
438 
439 AtomFeed parseAtom(string s) {
440 	auto document = new Document(s, true, true);
441 	return parseAtom(document.root);
442 }
443 
444 string rssDateToAtom(string d) {
445 	auto orig = d;
446 	if(d.length < 22 || d[3] != ',')
447 		return orig; // doesn't appear to be the right format
448 	d = d[5 .. $];
449 
450 	import std.conv;
451 	auto day = parse!int(d);
452 	if(d.length == 0 || d[0] != ' ')
453 		return orig;
454 	d = d[1 .. $];
455 
456 	if(d.length < 4)
457 		return orig;
458 
459 	int month;
460 
461 	string months = "JanFebMarAprMayJunJulAugSepOctNovDec";
462 	foreach(i; 0 .. 12) {
463 		if(months[i * 3 .. i * 3 + 3] == d[0 .. 3]) {
464 			month = i + 1;
465 			break;
466 		}
467 	}
468 
469 	d = d[4 .. $];
470 
471 	auto year = parse!int(d);
472 
473 	if(d.length == 0 || d[0] != ' ')
474 		return orig;
475 	d = d[1 .. $];
476 
477 	auto hour = parse!int(d);
478 
479 	if(d.length == 0 || d[0] != ':')
480 		return orig;
481 	d = d[1 .. $];
482 
483 	auto minute = parse!int(d);
484 
485 	if(d.length == 0 || d[0] != ':')
486 		return orig;
487 	d = d[1 .. $];
488 
489 	auto second = parse!int(d);
490 
491 	import std.format;
492 	return format("%04d-%02d-%02dT%02d:%02d:%02dZ", year, month, day, hour, minute, second);
493 }
494 unittest {
495 	assert(rssDateToAtom("Mon, 18 Nov 2019 12:05:44 GMT") == "2019-11-18T12:05:44Z");
496 }
497 
498 unittest {
499 
500 auto test1 = `<?xml version="1.0" encoding="ISO-8859-1"?>
501 <rss version="0.91">
502 	<channel>
503 		<title>WriteTheWeb</title> 
504 		<link>http://writetheweb.com</link> 
505 		<description>News for web users that write back</description> 
506 		<language>en-us</language> 
507 		<copyright>Copyright 2000, WriteTheWeb team.</copyright> 
508 		<managingEditor>editor@writetheweb.com</managingEditor> 
509 		<webMaster>webmaster@writetheweb.com</webMaster> 
510 		<image>
511 			<title>WriteTheWeb</title> 
512 			<url>http://writetheweb.com/images/mynetscape88.gif</url> 
513 			<link>http://writetheweb.com</link> 
514 			<width>88</width> 
515 			<height>31</height> 
516 			<description>News for web users that write back</description> 
517 			</image>
518 		<item>
519 			<title>Giving the world a pluggable Gnutella</title> 
520 			<link>http://writetheweb.com/read.php?item=24</link> 
521 			<description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description> 
522 			</item>
523 		<item>
524 			<title>Syndication discussions hot up</title> 
525 			<link>http://writetheweb.com/read.php?item=23</link> 
526 			<description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description> 
527 			</item>
528 		<item>
529 			<title>Personal web server integrates file sharing and messaging</title> 
530 			<link>http://writetheweb.com/read.php?item=22</link> 
531 			<description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description> 
532 			</item>
533 		<item>
534 			<title>Syndication and Metadata</title> 
535 			<link>http://writetheweb.com/read.php?item=21</link> 
536 			<description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description> 
537 			</item>
538 		<item>
539 			<title>UK bloggers get organised</title> 
540 			<link>http://writetheweb.com/read.php?item=20</link> 
541 			<description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description> 
542 			</item>
543 		<item>
544 			<title>Yournamehere.com more important than anything</title> 
545 			<link>http://writetheweb.com/read.php?item=19</link> 
546 			<description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description> 
547 			</item>
548 		</channel>
549 	</rss>`;
550 
551 
552 	{
553 		auto e = parseRss(test1);
554 		assert(e.items.length == 6);
555 		assert(e.items[$-1].title == "Yournamehere.com more important than anything", e.items[$-1].title);
556 		assert(e.items[0].title == "Giving the world a pluggable Gnutella");
557 		assert(e.items[0].link == "http://writetheweb.com/read.php?item=24");
558 		assert(e.image.url == "http://writetheweb.com/images/mynetscape88.gif");
559 
560 		auto df = e.toGenericFeed();
561 		assert(df.items.length == 6);
562 		assert(df.items[0].link == "http://writetheweb.com/read.php?item=24");
563 	}
564 
565 auto test2 = `<?xml version="1.0"?>
566 <!-- RSS generation done by 'Radio UserLand' on Fri, 13 Apr 2001 19:23:02 GMT -->
567 <rss version="0.92">
568 	<channel>
569 		<title>Dave Winer: Grateful Dead</title>
570 		<link>http://www.scripting.com/blog/categories/gratefulDead.html</link>
571 		<description>A high-fidelity Grateful Dead song every day. This is where we're experimenting with enclosures on RSS news items that download when you're not using your computer. If it works (it will) it will be the end of the Click-And-Wait multimedia experience on the Internet. </description>
572 		<lastBuildDate>Fri, 13 Apr 2001 19:23:02 GMT</lastBuildDate>
573 		<docs>http://backend.userland.com/rss092</docs>
574 		<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
575 		<webMaster>dave@userland.com (Dave Winer)</webMaster>
576 		<cloud domain="data.ourfavoritesongs.com" port="80" path="/RPC2" registerProcedure="ourFavoriteSongs.rssPleaseNotify" protocol="xml-rpc"/>
577 		<item>
578 			<description>It's been a few days since I added a song to the Grateful Dead channel. Now that there are all these new Radio users, many of whom are tuned into this channel (it's #16 on the hotlist of upstreaming Radio users, there's no way of knowing how many non-upstreaming users are subscribing, have to do something about this..). Anyway, tonight's song is a live version of Weather Report Suite from Dick's Picks Volume 7. It's wistful music. Of course a beautiful song, oft-quoted here on Scripting News. &lt;i&gt;A little change, the wind and rain.&lt;/i&gt;
579 </description>
580 			<enclosure url="http://www.scripting.com/mp3s/weatherReportDicksPicsVol7.mp3" length="6182912" type="audio/mpeg"/>
581 			</item>
582 		<item>
583 			<description>Kevin Drennan started a &lt;a href="http://deadend.editthispage.com/"&gt;Grateful Dead Weblog&lt;/a&gt;. Hey it's cool, he even has a &lt;a href="http://deadend.editthispage.com/directory/61"&gt;directory&lt;/a&gt;. &lt;i&gt;A Frontier 7 feature.&lt;/i&gt;</description>
584 			<source url="http://scriptingnews.userland.com/xml/scriptingNews2.xml">Scripting News</source>
585 			</item>
586 		<item>
587 			<description>&lt;a href="http://arts.ucsc.edu/GDead/AGDL/other1.html"&gt;The Other One&lt;/a&gt;, live instrumental, One From The Vault. Very rhythmic very spacy, you can listen to it many times, and enjoy something new every time.</description>
588 			<enclosure url="http://www.scripting.com/mp3s/theOtherOne.mp3" length="6666097" type="audio/mpeg"/>
589 			</item>
590 		<item>
591 			<description>This is a test of a change I just made. Still diggin..</description>
592 			</item>
593 		<item>
594 			<description>The HTML rendering almost &lt;a href="http://validator.w3.org/check/referer"&gt;validates&lt;/a&gt;. Close. Hey I wonder if anyone has ever published a style guide for ALT attributes on images? What are you supposed to say in the ALT attribute? I sure don't know. If you're blind send me an email if u cn rd ths. </description>
595 			</item>
596 		<item>
597 			<description>&lt;a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Franklin's_Tower.txt"&gt;Franklin's Tower&lt;/a&gt;, a live version from One From The Vault.</description>
598 			<enclosure url="http://www.scripting.com/mp3s/franklinsTower.mp3" length="6701402" type="audio/mpeg"/>
599 			</item>
600 		<item>
601 			<description>Moshe Weitzman says Shakedown Street is what I'm lookin for for tonight. I'm listening right now. It's one of my favorites. "Don't tell me this town ain't got no heart." Too bright. I like the jazziness of Weather Report Suite. Dreamy and soft. How about The Other One? "Spanish lady come to me.."</description>
602 			<source url="http://scriptingnews.userland.com/xml/scriptingNews2.xml">Scripting News</source>
603 			</item>
604 		<item>
605 			<description>&lt;a href="http://www.scripting.com/mp3s/youWinAgain.mp3"&gt;The news is out&lt;/a&gt;, all over town..&lt;p&gt;
606 You've been seen, out runnin round. &lt;p&gt;
607 The lyrics are &lt;a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/You_Win_Again.txt"&gt;here&lt;/a&gt;, short and sweet. &lt;p&gt;
608 &lt;i&gt;You win again!&lt;/i&gt;
609 </description>
610 			<enclosure url="http://www.scripting.com/mp3s/youWinAgain.mp3" length="3874816" type="audio/mpeg"/>
611 			</item>
612 		<item>
613 			<description>&lt;a href="http://www.getlyrics.com/lyrics/grateful-dead/wake-of-the-flood/07.htm"&gt;Weather Report Suite&lt;/a&gt;: "Winter rain, now tell me why, summers fade, and roses die? The answer came. The wind and rain. Golden hills, now veiled in grey, summer leaves have blown away. Now what remains? The wind and rain."</description>
614 			<enclosure url="http://www.scripting.com/mp3s/weatherReportSuite.mp3" length="12216320" type="audio/mpeg"/>
615 			</item>
616 		<item>
617 			<description>&lt;a href="http://arts.ucsc.edu/gdead/agdl/darkstar.html"&gt;Dark Star&lt;/a&gt; crashes, pouring its light into ashes.</description>
618 			<enclosure url="http://www.scripting.com/mp3s/darkStar.mp3" length="10889216" type="audio/mpeg"/>
619 			</item>
620 		<item>
621 			<description>DaveNet: &lt;a href="http://davenet.userland.com/2001/01/21/theUsBlues"&gt;The U.S. Blues&lt;/a&gt;.</description>
622 			</item>
623 		<item>
624 			<description>Still listening to the US Blues. &lt;i&gt;"Wave that flag, wave it wide and high.."&lt;/i&gt; Mistake made in the 60s. We gave our country to the assholes. Ah ah. Let's take it back. Hey I'm still a hippie. &lt;i&gt;"You could call this song The United States Blues."&lt;/i&gt;</description>
625 			</item>
626 		<item>
627 			<description>&lt;a href="http://www.sixties.com/html/garcia_stack_0.html"&gt;&lt;img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"&gt;&lt;/a&gt;In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the &lt;a href="http://searchlyrics2.homestead.com/gd_usblues.html"&gt;lyrics&lt;/a&gt;. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. &lt;i&gt;I truly do!&lt;/i&gt;</description>
628 			<enclosure url="http://www.scripting.com/mp3s/usBlues.mp3" length="5272510" type="audio/mpeg"/>
629 			</item>
630 		<item>
631 			<description>Grateful Dead: "Tennessee, Tennessee, ain't no place I'd rather be."</description>
632 			<enclosure url="http://www.scripting.com/mp3s/tennesseeJed.mp3" length="3442648" type="audio/mpeg"/>
633 			</item>
634 		<item>
635 			<description>Ed Cone: "Had a nice Deadhead experience with my wife, who never was one but gets the vibe and knows and likes a lot of the music. Somehow she made it to the age of 40 without ever hearing Wharf Rat. We drove to Jersey and back over Christmas with the live album commonly known as Skull and Roses in the CD player much of the way, and it was cool to see her discover one the band's finest moments. That song is unique and underappreciated. Fun to hear that disc again after a few years off -- you get Jerry as blues-guitar hero on Big Railroad Blues and a nice version of Bertha."</description>
636 			<enclosure url="http://www.scripting.com/mp3s/darkStarWharfRat.mp3" length="27503386" type="audio/mpeg"/>
637 			</item>
638 		<item>
639 			<description>&lt;a href="http://arts.ucsc.edu/GDead/AGDL/fotd.html"&gt;Tonight's Song&lt;/a&gt;: "If I get home before daylight I just might get some sleep tonight." </description>
640 			<enclosure url="http://www.scripting.com/mp3s/friendOfTheDevil.mp3" length="3219742" type="audio/mpeg"/>
641 			</item>
642 		<item>
643 			<description>&lt;a href="http://arts.ucsc.edu/GDead/AGDL/uncle.html"&gt;Tonight's song&lt;/a&gt;: "Come hear Uncle John's Band by the river side. Got some things to talk about here beside the rising tide."</description>
644 			<enclosure url="http://www.scripting.com/mp3s/uncleJohnsBand.mp3" length="4587102" type="audio/mpeg"/>
645 			</item>
646 		<item>
647 			<description>&lt;a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Me_and_My_Uncle.txt"&gt;Me and My Uncle&lt;/a&gt;: "I loved my uncle, God rest his soul, taught me good, Lord, taught me all I know. Taught me so well, I grabbed that gold and I left his dead ass there by the side of the road."
648 </description>
649 			<enclosure url="http://www.scripting.com/mp3s/meAndMyUncle.mp3" length="2949248" type="audio/mpeg"/>
650 			</item>
651 		<item>
652 			<description>Truckin, like the doo-dah man, once told me gotta play your hand. Sometimes the cards ain't worth a dime, if you don't lay em down.</description>
653 			<enclosure url="http://www.scripting.com/mp3s/truckin.mp3" length="4847908" type="audio/mpeg"/>
654 			</item>
655 		<item>
656 			<description>Two-Way-Web: &lt;a href="http://www.thetwowayweb.com/payloadsForRss"&gt;Payloads for RSS&lt;/a&gt;. "When I started talking with Adam late last year, he wanted me to think about high quality video on the Internet, and I totally didn't want to hear about it."</description>
657 			</item>
658 		<item>
659 			<description>A touch of gray, kinda suits you anyway..</description>
660 			<enclosure url="http://www.scripting.com/mp3s/touchOfGrey.mp3" length="5588242" type="audio/mpeg"/>
661 			</item>
662 		<item>
663 			<description>&lt;a href="http://www.sixties.com/html/garcia_stack_0.html"&gt;&lt;img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"&gt;&lt;/a&gt;In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the &lt;a href="http://searchlyrics2.homestead.com/gd_usblues.html"&gt;lyrics&lt;/a&gt;. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. &lt;i&gt;I truly do!&lt;/i&gt;</description>
664 			<enclosure url="http://www.scripting.com/mp3s/usBlues.mp3" length="5272510" type="audio/mpeg"/>
665 			</item>
666 		</channel>
667 	</rss><?xml version="1.0"?>`;
668 
669 	{
670 		auto e = parseRss(test2);
671 		assert(e.items[$-1].enclosure.url == "http://www.scripting.com/mp3s/usBlues.mp3");
672 	}
673 
674 auto test3 = `<rss version="2.0">
675    <channel>
676       <title>Liftoff News</title>
677       <link>http://liftoff.msfc.nasa.gov/</link>
678       <description>Liftoff to Space Exploration.</description>
679       <language>en-us</language>
680       <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
681       <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
682       <docs>http://blogs.law.harvard.edu/tech/rss</docs>
683       <generator>Weblog Editor 2.0</generator>
684       <managingEditor>editor@example.com</managingEditor>
685       <webMaster>webmaster@example.com</webMaster>
686       <item>
687          <title>Star City</title>
688          <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
689          <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
690          <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
691          <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
692       </item>
693       <item>
694          <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
695          <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
696          <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
697       </item>
698       <item>
699          <title>The Engine That Does More</title>
700          <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
701          <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.</description>
702          <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
703          <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
704       </item>
705       <item>
706          <title>Astronauts' Dirty Laundry</title>
707          <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
708          <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.</description>
709          <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
710          <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
711       </item>
712    </channel>
713 </rss>`;
714 
715 
716 auto testAtom1 = `<?xml version="1.0" encoding="utf-8"?>
717 
718 <feed xmlns="http://www.w3.org/2005/Atom">
719 
720 	<title>Example Feed</title>
721 	<subtitle>A subtitle.</subtitle>
722 	<link href="http://example.org/feed/" rel="self" />
723 	<link href="http://example.org/" />
724 	<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
725 	<updated>2003-12-13T18:30:02Z</updated>
726 	
727 	
728 	<entry>
729 		<title>Atom-Powered Robots Run Amok</title>
730 		<link href="http://example.org/2003/12/13/atom03" />
731 		<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
732 		<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
733 		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
734 		<updated>2003-12-13T18:30:02Z</updated>
735 		<summary>Some text.</summary>
736 		<content type="xhtml">
737 			<div xmlns="http://www.w3.org/1999/xhtml">
738 				<p>This is the entry content.</p>
739 			</div>
740 		</content>
741 		<author>
742 			<name>John Doe</name>
743 			<email>johndoe@example.com</email>
744 		</author>
745 	</entry>
746 
747 </feed>`;
748 
749 	{
750 		auto e = parseAtom(testAtom1);
751 
752 		assert(e.entries.length == 1);
753 		assert(e.link == "http://example.org/");
754 		assert(e.title == "Example Feed");
755 		assert(e.entries[0].title == "Atom-Powered Robots Run Amok");
756 		assert(e.entries[0].link == "http://example.org/2003/12/13/atom03", e.entries[0].link);
757 		assert(e.entries[0].summary.text == "Some text.", e.entries[0].summary.text);
758 		assert(e.entries[0].summary.html.length == 0);
759 		assert(e.entries[0].content.text.length == 0);
760 		assert(e.entries[0].content.html.length > 10);
761 
762 		auto gf = e.toGenericFeed();
763 
764 		assert(gf.items[0].lastUpdatedDate == "2003-12-13T18:30:02Z");
765 	}
766 
767 	{
768 		auto xml = `<rss version="2.0">
769 			<channel>
770 			<title>NYT > World News</title>
771 			<link>
772 			https://www.nytimes.com/section/world?emc=rss&amp;partner=rss
773 			</link>
774 			<atom:link href="https://rss.nytimes.com/services/xml/rss/nyt/World.xml" rel="self" type="application/rss+xml"/>
775 			<description/>
776 			<language>en-us</language>
777 			<copyright>Copyright 2019 The New York Times Company</copyright>
778 			<lastBuildDate>Sat, 07 Dec 2019 00:15:41 +0000</lastBuildDate>
779 			<image>
780 			<title>NYT > World News</title>
781 			<url>
782 			https://static01.nyt.com/images/misc/NYT_logo_rss_250x40.png
783 			</url>
784 			<link>
785 			https://www.nytimes.com/section/world?emc=rss&amp;partner=rss
786 			</link>
787 			</image>
788 			<item>
789 			<title>
790 			France Is Hit by Second Day of Pension Strikes as Unions Dig In
791 			</title>
792 			<link>https://www.nytimes.com/2019/12/06/world/europe/france-pension-strike-macron.html?emc=rss&amp;partner=rss</link>
793 			<guid isPermaLink="true">
794 			https://www.nytimes.com/2019/12/06/world/europe/france-pension-strike-macron.html
795 			</guid>
796 			<atom:link href="https://www.nytimes.com/2019/12/06/world/europe/france-pension-strike-macron.html?emc=rss&amp;partner=rss" rel="standout"/>
797 			<description>
798 			Transportation was severely disrupted in Paris and other cities, a day after huge protests over government plans to overhaul pensions. Unions are planning more protests next week.
799 			</description>
800 			<dc:creator>Aurelien Breeden</dc:creator>
801 			<pubDate>Fri, 06 Dec 2019 18:02:13 +0000</pubDate>
802 			<category domain="http://www.nytimes.com/namespaces/keywords/nyt_geo">France</category>
803 			<category domain="http://www.nytimes.com/namespaces/keywords/des">Demonstrations, Protests and Riots</category>
804 			<category domain="http://www.nytimes.com/namespaces/keywords/des">Pensions and Retirement Plans</category>
805 			<category domain="http://www.nytimes.com/namespaces/keywords/des">Politics and Government</category>
806 			<category domain="http://www.nytimes.com/namespaces/keywords/des">Strikes</category>
807 			<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Macron, Emmanuel (1977- )</category>
808 			<media:content height="151" medium="image" url="https://static01.nyt.com/images/2019/12/06/world/06france-strikes/merlin_165509820_476d5340-3717-4fbb-b187-097ae7718e48-moth.jpg" width="151"/>
809 			<media:credit>Rafael Yaghobzadeh/Associated Press</media:credit>
810 			<media:description>
811 			A deserted Gare de Lyon train station in Paris on Friday. Unions are aiming for a protracted strike.
812 			</media:description>
813 			</item></channel></rss>`;
814 
815 			auto e = parseRss(xml);
816 			assert(e.items[0].link == "https://www.nytimes.com/2019/12/06/world/europe/france-pension-strike-macron.html?emc=rss&partner=rss", e.items[0].link);
817 
818 			auto gf = e.toGenericFeed();
819 			assert(gf.items[0].link == "https://www.nytimes.com/2019/12/06/world/europe/france-pension-strike-macron.html?emc=rss&partner=rss", e.items[0].link);
820 
821 			assert(gf.items[0].publicationDate == "2019-12-06T18:02:13Z");
822 	}
823 
824 }