1 /++ 2 RSS/Atom feed reading 3 4 References: 5 $(LIST 6 * https://cyber.harvard.edu/rss/rss.html 7 * http://www.rssboard.org/rss-specification 8 * https://tools.ietf.org/html/rfc4287 9 * https://en.wikipedia.org/wiki/Atom_(Web_standard) 10 ) 11 +/ 12 module arsd.rss; 13 14 import arsd.dom; 15 16 /// generic subset of rss and atom, normalized for easy consumption 17 struct Feed { 18 string title; /// 19 string description; /// 20 string lastUpdated; /// 21 22 /// 23 static struct Item { 24 string title; /// 25 string link; /// 26 string description; /// 27 string author; /// 28 string publicationDate; /// 29 string lastUpdatedDate; /// 30 string guid; /// 31 32 string enclosureUri; /// 33 string enclosureType; /// 34 string enclosureSize; /// 35 } 36 37 Item[] items; /// 38 } 39 40 /// 41 enum FeedType { 42 unknown, /// 43 rss, /// 44 atom /// 45 } 46 47 /// 48 FeedType identifyFeed(Element e) { 49 assert(e !is null); 50 51 if(e.tagName == "rss") 52 return FeedType.rss; 53 if(e.tagName == "feed" || e.tagName == "atom:feed") 54 return FeedType.atom; 55 56 return FeedType.unknown; 57 } 58 59 /// Parses a feed generically 60 Feed parseFeed(Element e) { 61 final switch(identifyFeed(e)) { 62 case FeedType.unknown: 63 throw new Exception("Unknown feed type"); 64 case FeedType.rss: 65 return parseRss(e).toGenericFeed(); 66 case FeedType.atom: 67 return parseAtom(e).toGenericFeed(); 68 } 69 } 70 71 // application/rss+xml 72 // though some use text/rss+xml or application/rdf+xml 73 74 // root node of <rss version="whatever"> 75 76 struct RssChannel { 77 string title; 78 string link; 79 string description; 80 string lastBuildDate; // last time content in here changed 81 string pubDate; // format like "Sat, 07 Sep 2002 00:00:01 GMT" when it officially changes 82 string docs; // idk? 83 84 string cloud; // has domain, port, path, registerProcedure, protocol 85 86 string language; // optional 87 string copyright; 88 string managingEditor; 89 string webMaster; 90 91 string category; 92 93 string ttl; // in minutes, if present 94 95 RssImage image; 96 97 RssItem[] items; 98 99 Feed toGenericFeed() { 100 Feed f; 101 f.title = this.title; 102 f.description = this.description; // FIXME text vs html? 103 f.lastUpdated = this.lastBuildDate; // FIXME: normalize format rss uses "Mon, 18 Nov 2019 12:00:00 GMT" 104 105 foreach(item; items) { 106 Feed.Item fi; 107 108 fi.title = item.title; 109 fi.link = item.link; 110 fi.description = item.description; // FIXME: try to normalize text vs html 111 fi.author = item.author; // FIXME 112 fi.publicationDate = item.pubDate; // FIXME 113 fi.guid = item.guid; 114 //fi.lastUpdatedDate; // not available i think 115 116 fi.enclosureUri = item.enclosure.url; 117 fi.enclosureType = item.enclosure.type; 118 fi.enclosureSize = item.enclosure.length; 119 120 f.items ~= fi; 121 } 122 return f; 123 } 124 } 125 126 struct RssImage { 127 string title; /// img alt 128 string url; /// like the img src 129 string link; /// like a href 130 string width; 131 string height; 132 string description; /// img title 133 } 134 135 struct RssItem { 136 string title; 137 string link; 138 string description; // may have html! 139 140 string author; 141 string category; 142 string comments; // a link 143 144 string pubDate; 145 string guid; 146 147 RssSource source; 148 RssEnclosure enclosure; 149 } 150 151 struct RssEnclosure { 152 string url; 153 string length; 154 string type; 155 } 156 157 struct RssSource { 158 string title; 159 string url; 160 } 161 162 163 /++ 164 Parses RSS into structs. Requires the element to be RSS; if you are unsure 165 of the type and want a generic response, use parseFeed instead. 166 +/ 167 RssChannel parseRss(Element element) { 168 assert(element !is null && element.tagName == "rss"); 169 RssChannel c; 170 element = element.requireSelector(" > channel"); 171 foreach(memberName; __traits(allMembers, RssChannel)) { 172 static if(memberName == "image") { 173 if(auto image = element.querySelector(" > image")) { 174 RssImage i; 175 foreach(mn; __traits(allMembers, RssImage)) { 176 __traits(getMember, i, mn) = image.optionSelector(" > " ~ mn).innerText; 177 } 178 c.image = i; 179 } 180 } else static if(memberName == "items") { 181 foreach(item; element.querySelectorAll(" > item")) { 182 RssItem i; 183 foreach(mn; __traits(allMembers, RssItem)) { 184 static if(mn == "source") { 185 if(auto s = item.querySelector(" > source")) { 186 i.source.title = s.innerText; 187 i.source.url = s.attrs.url; 188 } 189 } else static if(mn == "enclosure") { 190 if(auto s = item.querySelector(" > enclosure")) { 191 i.enclosure.url = s.attrs.url; 192 i.enclosure.type = s.attrs.type; 193 i.enclosure.length = s.attrs.length; 194 } 195 } else { 196 __traits(getMember, i, mn) = item.optionSelector(" > " ~ mn).innerText; 197 } 198 } 199 c.items ~= i; 200 } 201 } else static if(is(typeof( __traits(getMember, c, memberName).offsetof))) { 202 __traits(getMember, c, memberName) = element.optionSelector(" > " ~ memberName).innerText; 203 } 204 } 205 206 return c; 207 } 208 209 /// 210 RssChannel parseRss(string s) { 211 auto document = new Document(s, true, true); 212 return parseRss(document.root); 213 } 214 215 /* 216 struct SyndicationInfo { 217 string updatePeriod; // sy:updatePeriod 218 string updateFrequency; 219 string updateBase; 220 221 string skipHours; // stored as <hour> elements 222 string skipDays; // stored as <day> elements 223 } 224 */ 225 226 227 // /////////////////// atom //////////////////// 228 229 // application/atom+xml 230 231 /+ rss vs atom 232 date format is different 233 atom:xxx links 234 235 root node is <feed>, organization has no <channel>, and <entry> 236 instead of <item> 237 +/ 238 239 /++ 240 241 +/ 242 struct AtomFeed { 243 string title; /// has a type attribute - text or html 244 string subtitle; /// has a type attribute 245 246 string updated; /// io string 247 248 string id; /// 249 string link; /// i want the text/html type really, certainly not rel=self 250 string rights; /// 251 string generator; /// 252 253 AtomEntry[] entries; /// 254 255 /// 256 Feed toGenericFeed() { 257 Feed feed; 258 259 feed.title = this.title; 260 feed.description = this.subtitle; 261 feed.lastUpdated = this.updated; // FIXME: normalize the format is 2005-07-31T12:29:29Z 262 263 foreach(entry; this.entries) { 264 Feed.Item item; 265 266 item.title = entry.title; 267 item.link = entry.link; 268 item.description = entry.summary.html.length ? entry.summary.html : entry.summary.text; // FIXME 269 item.author = entry.author.email; // FIXME normalize; RSS does "email (name)" 270 item.publicationDate = entry.published; // FIXME the format is 2005-07-31T12:29:29Z 271 item.lastUpdatedDate = entry.updated; 272 item.guid = entry.id; 273 274 item.enclosureUri = entry.enclosure.url; 275 item.enclosureType = entry.enclosure.type; 276 item.enclosureSize = entry.enclosure.length; 277 278 feed.items ~= item; 279 } 280 281 return feed; 282 } 283 } 284 285 /// 286 struct AtomEntry { 287 string title; /// 288 string link; /// the alternate 289 AtomEnclosure enclosure; /// 290 string id; /// 291 string updated; /// 292 string published; /// 293 294 AtomPerson author; /// 295 AtomPerson[] contributors; /// 296 AtomContent content; /// // should check type. may also have a src element for a link. type of html is escaped, type of xhtml is embedded. 297 AtomContent summary; /// 298 } 299 300 /// 301 struct AtomEnclosure { 302 string url; /// 303 string length; /// 304 string type; /// 305 } 306 307 308 /// 309 struct AtomContent { 310 string text; /// 311 string html; /// 312 } 313 314 /// 315 struct AtomPerson { 316 string name; /// 317 string uri; /// 318 string email; /// 319 } 320 321 /// 322 AtomFeed parseAtom(Element ele) { 323 AtomFeed af; 324 af.title = ele.optionSelector(` > title, > atom\:title`).innerText; 325 af.subtitle = ele.optionSelector(` > subtitle, > atom\:subtitle`).innerText; 326 af.id = ele.optionSelector(` > id, > atom\:id`).innerText; 327 af.updated = ele.optionSelector(` > updated, > atom\:updated`).innerText; 328 af.rights = ele.optionSelector(` > rights, > atom\:rights`).innerText; 329 af.generator = ele.optionSelector(` > generator, > atom\:generator`).innerText; 330 af.link = ele.optionSelector(` > link:not([rel])`).getAttribute("href"); 331 332 foreach(entry; ele.querySelectorAll(` > entry`)) { 333 AtomEntry ae; 334 335 ae.title = entry.optionSelector(` > title, > atom\:title`).innerText; 336 ae.updated = entry.optionSelector(` > updated, > atom\:updated`).innerText; 337 ae.published = entry.optionSelector(` > published, > atom\:published`).innerText; 338 ae.id = entry.optionSelector(` > id, > atom\:id`).innerText; 339 340 ae.link = entry.optionSelector(` > link:not([rel]), > link[rel=alternate], > link[type="type/html"]`).getAttribute("href"); 341 342 if(auto enclosure = entry.querySelector(` > link[rel=enclosure]`)) { 343 ae.enclosure.url = enclosure.attrs.href; 344 ae.enclosure.length = enclosure.attrs.length; 345 ae.enclosure.type = enclosure.attrs.type; 346 } 347 348 if(auto author = entry.querySelector(` > author`)) { 349 ae.author.name = author.optionSelector(` > name`).innerText; 350 ae.author.uri = author.optionSelector(` > uri`).innerText; 351 ae.author.email = author.optionSelector(` > email`).innerText; 352 } 353 354 foreach(contributor; entry.querySelectorAll(` > contributor`)) { 355 AtomPerson c; 356 c.name = contributor.optionSelector(` > name`).innerText; 357 c.uri = contributor.optionSelector(` > uri`).innerText; 358 c.email = contributor.optionSelector(` > email`).innerText; 359 ae.contributors ~= c; 360 } 361 362 if(auto e = entry.querySelector("content[type=xhtml]")) 363 ae.content.html = e.innerHTML; 364 if(auto e = entry.querySelector("content[type=html]")) 365 ae.content.html = e.innerText; 366 if(auto e = entry.querySelector("content[type=text], content:not([type])")) 367 ae.content.text = e.innerText; 368 369 if(auto e = entry.querySelector("summary[type=xhtml]")) 370 ae.summary.html = e.innerHTML; 371 if(auto e = entry.querySelector("summary[type=html]")) 372 ae.summary.html = e.innerText; 373 if(auto e = entry.querySelector("summary[type=text], summary:not([type])")) 374 ae.summary.text = e.innerText; 375 376 af.entries ~= ae; 377 } 378 379 return af; 380 } 381 382 AtomFeed parseAtom(string s) { 383 auto document = new Document(s, true, true); 384 return parseAtom(document.root); 385 } 386 387 unittest { 388 389 auto test1 = `<?xml version="1.0" encoding="ISO-8859-1"?> 390 <rss version="0.91"> 391 <channel> 392 <title>WriteTheWeb</title> 393 <link>http://writetheweb.com</link> 394 <description>News for web users that write back</description> 395 <language>en-us</language> 396 <copyright>Copyright 2000, WriteTheWeb team.</copyright> 397 <managingEditor>editor@writetheweb.com</managingEditor> 398 <webMaster>webmaster@writetheweb.com</webMaster> 399 <image> 400 <title>WriteTheWeb</title> 401 <url>http://writetheweb.com/images/mynetscape88.gif</url> 402 <link>http://writetheweb.com</link> 403 <width>88</width> 404 <height>31</height> 405 <description>News for web users that write back</description> 406 </image> 407 <item> 408 <title>Giving the world a pluggable Gnutella</title> 409 <link>http://writetheweb.com/read.php?item=24</link> 410 <description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description> 411 </item> 412 <item> 413 <title>Syndication discussions hot up</title> 414 <link>http://writetheweb.com/read.php?item=23</link> 415 <description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description> 416 </item> 417 <item> 418 <title>Personal web server integrates file sharing and messaging</title> 419 <link>http://writetheweb.com/read.php?item=22</link> 420 <description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description> 421 </item> 422 <item> 423 <title>Syndication and Metadata</title> 424 <link>http://writetheweb.com/read.php?item=21</link> 425 <description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description> 426 </item> 427 <item> 428 <title>UK bloggers get organised</title> 429 <link>http://writetheweb.com/read.php?item=20</link> 430 <description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description> 431 </item> 432 <item> 433 <title>Yournamehere.com more important than anything</title> 434 <link>http://writetheweb.com/read.php?item=19</link> 435 <description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description> 436 </item> 437 </channel> 438 </rss>`; 439 440 441 { 442 auto e = parseRss(test1); 443 assert(e.items.length = 6); 444 assert(e.items[$-1].title == "Yournamehere.com more important than anything", e.items[$-1].title); 445 assert(e.items[0].title == "Giving the world a pluggable Gnutella"); 446 assert(e.image.url == "http://writetheweb.com/images/mynetscape88.gif"); 447 } 448 449 auto test2 = `<?xml version="1.0"?> 450 <!-- RSS generation done by 'Radio UserLand' on Fri, 13 Apr 2001 19:23:02 GMT --> 451 <rss version="0.92"> 452 <channel> 453 <title>Dave Winer: Grateful Dead</title> 454 <link>http://www.scripting.com/blog/categories/gratefulDead.html</link> 455 <description>A high-fidelity Grateful Dead song every day. This is where we're experimenting with enclosures on RSS news items that download when you're not using your computer. If it works (it will) it will be the end of the Click-And-Wait multimedia experience on the Internet. </description> 456 <lastBuildDate>Fri, 13 Apr 2001 19:23:02 GMT</lastBuildDate> 457 <docs>http://backend.userland.com/rss092</docs> 458 <managingEditor>dave@userland.com (Dave Winer)</managingEditor> 459 <webMaster>dave@userland.com (Dave Winer)</webMaster> 460 <cloud domain="data.ourfavoritesongs.com" port="80" path="/RPC2" registerProcedure="ourFavoriteSongs.rssPleaseNotify" protocol="xml-rpc"/> 461 <item> 462 <description>It's been a few days since I added a song to the Grateful Dead channel. Now that there are all these new Radio users, many of whom are tuned into this channel (it's #16 on the hotlist of upstreaming Radio users, there's no way of knowing how many non-upstreaming users are subscribing, have to do something about this..). Anyway, tonight's song is a live version of Weather Report Suite from Dick's Picks Volume 7. It's wistful music. Of course a beautiful song, oft-quoted here on Scripting News. <i>A little change, the wind and rain.</i> 463 </description> 464 <enclosure url="http://www.scripting.com/mp3s/weatherReportDicksPicsVol7.mp3" length="6182912" type="audio/mpeg"/> 465 </item> 466 <item> 467 <description>Kevin Drennan started a <a href="http://deadend.editthispage.com/">Grateful Dead Weblog</a>. Hey it's cool, he even has a <a href="http://deadend.editthispage.com/directory/61">directory</a>. <i>A Frontier 7 feature.</i></description> 468 <source url="http://scriptingnews.userland.com/xml/scriptingNews2.xml">Scripting News</source> 469 </item> 470 <item> 471 <description><a href="http://arts.ucsc.edu/GDead/AGDL/other1.html">The Other One</a>, live instrumental, One From The Vault. Very rhythmic very spacy, you can listen to it many times, and enjoy something new every time.</description> 472 <enclosure url="http://www.scripting.com/mp3s/theOtherOne.mp3" length="6666097" type="audio/mpeg"/> 473 </item> 474 <item> 475 <description>This is a test of a change I just made. Still diggin..</description> 476 </item> 477 <item> 478 <description>The HTML rendering almost <a href="http://validator.w3.org/check/referer">validates</a>. Close. Hey I wonder if anyone has ever published a style guide for ALT attributes on images? What are you supposed to say in the ALT attribute? I sure don't know. If you're blind send me an email if u cn rd ths. </description> 479 </item> 480 <item> 481 <description><a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Franklin's_Tower.txt">Franklin's Tower</a>, a live version from One From The Vault.</description> 482 <enclosure url="http://www.scripting.com/mp3s/franklinsTower.mp3" length="6701402" type="audio/mpeg"/> 483 </item> 484 <item> 485 <description>Moshe Weitzman says Shakedown Street is what I'm lookin for for tonight. I'm listening right now. It's one of my favorites. "Don't tell me this town ain't got no heart." Too bright. I like the jazziness of Weather Report Suite. Dreamy and soft. How about The Other One? "Spanish lady come to me.."</description> 486 <source url="http://scriptingnews.userland.com/xml/scriptingNews2.xml">Scripting News</source> 487 </item> 488 <item> 489 <description><a href="http://www.scripting.com/mp3s/youWinAgain.mp3">The news is out</a>, all over town..<p> 490 You've been seen, out runnin round. <p> 491 The lyrics are <a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/You_Win_Again.txt">here</a>, short and sweet. <p> 492 <i>You win again!</i> 493 </description> 494 <enclosure url="http://www.scripting.com/mp3s/youWinAgain.mp3" length="3874816" type="audio/mpeg"/> 495 </item> 496 <item> 497 <description><a href="http://www.getlyrics.com/lyrics/grateful-dead/wake-of-the-flood/07.htm">Weather Report Suite</a>: "Winter rain, now tell me why, summers fade, and roses die? The answer came. The wind and rain. Golden hills, now veiled in grey, summer leaves have blown away. Now what remains? The wind and rain."</description> 498 <enclosure url="http://www.scripting.com/mp3s/weatherReportSuite.mp3" length="12216320" type="audio/mpeg"/> 499 </item> 500 <item> 501 <description><a href="http://arts.ucsc.edu/gdead/agdl/darkstar.html">Dark Star</a> crashes, pouring its light into ashes.</description> 502 <enclosure url="http://www.scripting.com/mp3s/darkStar.mp3" length="10889216" type="audio/mpeg"/> 503 </item> 504 <item> 505 <description>DaveNet: <a href="http://davenet.userland.com/2001/01/21/theUsBlues">The U.S. Blues</a>.</description> 506 </item> 507 <item> 508 <description>Still listening to the US Blues. <i>"Wave that flag, wave it wide and high.."</i> Mistake made in the 60s. We gave our country to the assholes. Ah ah. Let's take it back. Hey I'm still a hippie. <i>"You could call this song The United States Blues."</i></description> 509 </item> 510 <item> 511 <description><a href="http://www.sixties.com/html/garcia_stack_0.html"><img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"></a>In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the <a href="http://searchlyrics2.homestead.com/gd_usblues.html">lyrics</a>. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. <i>I truly do!</i></description> 512 <enclosure url="http://www.scripting.com/mp3s/usBlues.mp3" length="5272510" type="audio/mpeg"/> 513 </item> 514 <item> 515 <description>Grateful Dead: "Tennessee, Tennessee, ain't no place I'd rather be."</description> 516 <enclosure url="http://www.scripting.com/mp3s/tennesseeJed.mp3" length="3442648" type="audio/mpeg"/> 517 </item> 518 <item> 519 <description>Ed Cone: "Had a nice Deadhead experience with my wife, who never was one but gets the vibe and knows and likes a lot of the music. Somehow she made it to the age of 40 without ever hearing Wharf Rat. We drove to Jersey and back over Christmas with the live album commonly known as Skull and Roses in the CD player much of the way, and it was cool to see her discover one the band's finest moments. That song is unique and underappreciated. Fun to hear that disc again after a few years off -- you get Jerry as blues-guitar hero on Big Railroad Blues and a nice version of Bertha."</description> 520 <enclosure url="http://www.scripting.com/mp3s/darkStarWharfRat.mp3" length="27503386" type="audio/mpeg"/> 521 </item> 522 <item> 523 <description><a href="http://arts.ucsc.edu/GDead/AGDL/fotd.html">Tonight's Song</a>: "If I get home before daylight I just might get some sleep tonight." </description> 524 <enclosure url="http://www.scripting.com/mp3s/friendOfTheDevil.mp3" length="3219742" type="audio/mpeg"/> 525 </item> 526 <item> 527 <description><a href="http://arts.ucsc.edu/GDead/AGDL/uncle.html">Tonight's song</a>: "Come hear Uncle John's Band by the river side. Got some things to talk about here beside the rising tide."</description> 528 <enclosure url="http://www.scripting.com/mp3s/uncleJohnsBand.mp3" length="4587102" type="audio/mpeg"/> 529 </item> 530 <item> 531 <description><a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Me_and_My_Uncle.txt">Me and My Uncle</a>: "I loved my uncle, God rest his soul, taught me good, Lord, taught me all I know. Taught me so well, I grabbed that gold and I left his dead ass there by the side of the road." 532 </description> 533 <enclosure url="http://www.scripting.com/mp3s/meAndMyUncle.mp3" length="2949248" type="audio/mpeg"/> 534 </item> 535 <item> 536 <description>Truckin, like the doo-dah man, once told me gotta play your hand. Sometimes the cards ain't worth a dime, if you don't lay em down.</description> 537 <enclosure url="http://www.scripting.com/mp3s/truckin.mp3" length="4847908" type="audio/mpeg"/> 538 </item> 539 <item> 540 <description>Two-Way-Web: <a href="http://www.thetwowayweb.com/payloadsForRss">Payloads for RSS</a>. "When I started talking with Adam late last year, he wanted me to think about high quality video on the Internet, and I totally didn't want to hear about it."</description> 541 </item> 542 <item> 543 <description>A touch of gray, kinda suits you anyway..</description> 544 <enclosure url="http://www.scripting.com/mp3s/touchOfGrey.mp3" length="5588242" type="audio/mpeg"/> 545 </item> 546 <item> 547 <description><a href="http://www.sixties.com/html/garcia_stack_0.html"><img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"></a>In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the <a href="http://searchlyrics2.homestead.com/gd_usblues.html">lyrics</a>. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. <i>I truly do!</i></description> 548 <enclosure url="http://www.scripting.com/mp3s/usBlues.mp3" length="5272510" type="audio/mpeg"/> 549 </item> 550 </channel> 551 </rss><?xml version="1.0"?>`; 552 553 { 554 auto e = parseRss(test2); 555 assert(e.items[$-1].enclosure.url == "http://www.scripting.com/mp3s/usBlues.mp3"); 556 } 557 558 auto test3 = `<rss version="2.0"> 559 <channel> 560 <title>Liftoff News</title> 561 <link>http://liftoff.msfc.nasa.gov/</link> 562 <description>Liftoff to Space Exploration.</description> 563 <language>en-us</language> 564 <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate> 565 <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate> 566 <docs>http://blogs.law.harvard.edu/tech/rss</docs> 567 <generator>Weblog Editor 2.0</generator> 568 <managingEditor>editor@example.com</managingEditor> 569 <webMaster>webmaster@example.com</webMaster> 570 <item> 571 <title>Star City</title> 572 <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link> 573 <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description> 574 <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate> 575 <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid> 576 </item> 577 <item> 578 <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description> 579 <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate> 580 <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid> 581 </item> 582 <item> 583 <title>The Engine That Does More</title> 584 <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link> 585 <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description> 586 <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate> 587 <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid> 588 </item> 589 <item> 590 <title>Astronauts' Dirty Laundry</title> 591 <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link> 592 <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description> 593 <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate> 594 <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid> 595 </item> 596 </channel> 597 </rss>`; 598 599 600 auto testAtom1 = `<?xml version="1.0" encoding="utf-8"?> 601 602 <feed xmlns="http://www.w3.org/2005/Atom"> 603 604 <title>Example Feed</title> 605 <subtitle>A subtitle.</subtitle> 606 <link href="http://example.org/feed/" rel="self" /> 607 <link href="http://example.org/" /> 608 <id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id> 609 <updated>2003-12-13T18:30:02Z</updated> 610 611 612 <entry> 613 <title>Atom-Powered Robots Run Amok</title> 614 <link href="http://example.org/2003/12/13/atom03" /> 615 <link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/> 616 <link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/> 617 <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> 618 <updated>2003-12-13T18:30:02Z</updated> 619 <summary>Some text.</summary> 620 <content type="xhtml"> 621 <div xmlns="http://www.w3.org/1999/xhtml"> 622 <p>This is the entry content.</p> 623 </div> 624 </content> 625 <author> 626 <name>John Doe</name> 627 <email>johndoe@example.com</email> 628 </author> 629 </entry> 630 631 </feed>`; 632 633 { 634 auto e = parseAtom(testAtom1); 635 636 assert(e.entries.length == 1); 637 assert(e.link == "http://example.org/"); 638 assert(e.title == "Example Feed"); 639 assert(e.entries[0].title == "Atom-Powered Robots Run Amok"); 640 assert(e.entries[0].link == "http://example.org/2003/12/13/atom03", e.entries[0].link); 641 assert(e.entries[0].summary.text == "Some text.", e.entries[0].summary.text); 642 assert(e.entries[0].summary.html.length == 0); 643 assert(e.entries[0].content.text.length == 0); 644 assert(e.entries[0].content.html.length > 10); 645 } 646 647 }