1 // FIXME: add classList 2 // FIXME: xml namespace support??? 3 // FIXME: add matchesSelector - standard name is `matches`. also `closest` walks up to find the parent that matches 4 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 5 // FIXME: appendChild should not fail if the thing already has a parent; it should just automatically remove it per standard. 6 7 // FIXME: the scriptable list is quite arbitrary 8 9 10 // xml entity references?! 11 12 /++ 13 This is an html DOM implementation, started with cloning 14 what the browser offers in Javascript, but going well beyond 15 it in convenience. 16 17 If you can do it in Javascript, you can probably do it with 18 this module, and much more. 19 20 --- 21 import arsd.dom; 22 23 void main() { 24 auto document = new Document("<html><p>paragraph</p></html>"); 25 writeln(document.querySelector("p")); 26 document.root.innerHTML = "<p>hey</p>"; 27 writeln(document); 28 } 29 --- 30 31 BTW: this file optionally depends on `arsd.characterencodings`, to 32 help it correctly read files from the internet. You should be able to 33 get characterencodings.d from the same place you got this file. 34 35 If you want it to stand alone, just always use the `Document.parseUtf8` 36 function or the constructor that takes a string. 37 +/ 38 module arsd.dom; 39 40 // FIXME: support the css standard namespace thing in the selectors too 41 42 version(with_arsd_jsvar) 43 import arsd.jsvar; 44 else { 45 enum scriptable = "arsd_jsvar_compatible"; 46 } 47 48 // this is only meant to be used at compile time, as a filter for opDispatch 49 // lists the attributes we want to allow without the use of .attr 50 bool isConvenientAttribute(string name) { 51 static immutable list = [ 52 "name", "id", "href", "value", 53 "checked", "selected", "type", 54 "src", "content", "pattern", 55 "placeholder", "required", "alt", 56 "rel", 57 "method", "action", "enctype" 58 ]; 59 foreach(l; list) 60 if(name == l) return true; 61 return false; 62 } 63 64 65 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 66 // FIXME: failing to close a paragraph sometimes messes things up too 67 68 // FIXME: it would be kinda cool to have some support for internal DTDs 69 // and maybe XPath as well, to some extent 70 /* 71 we could do 72 meh this sux 73 74 auto xpath = XPath(element); 75 76 // get the first p 77 xpath.p[0].a["href"] 78 */ 79 80 81 /// The main document interface, including a html parser. 82 class Document : FileResource { 83 /// Convenience method for web scraping. Requires [arsd.http2] to be 84 /// included in the build as well as [arsd.characterencodings]. 85 static Document fromUrl()(string url) { 86 import arsd.http2; 87 auto client = new HttpClient(); 88 89 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 90 auto res = req.waitForCompletion(); 91 92 auto document = new Document(); 93 document.parseGarbage(cast(string) res.content); 94 95 return document; 96 } 97 98 ///. 99 this(string data, bool caseSensitive = false, bool strict = false) { 100 parseUtf8(data, caseSensitive, strict); 101 } 102 103 /** 104 Creates an empty document. It has *nothing* in it at all. 105 */ 106 this() { 107 108 } 109 110 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 111 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 112 /// can chain it. 113 /// 114 /// Example: document["p"].innerText("hello").addClass("modified"); 115 /// 116 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 117 /// 118 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 119 /// 120 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 121 /// you could put in some kind of custom filter function tho. 122 ElementCollection opIndex(string selector) { 123 auto e = ElementCollection(this.root); 124 return e[selector]; 125 } 126 127 string _contentType = "text/html; charset=utf-8"; 128 129 /// If you're using this for some other kind of XML, you can 130 /// set the content type here. 131 /// 132 /// Note: this has no impact on the function of this class. 133 /// It is only used if the document is sent via a protocol like HTTP. 134 /// 135 /// This may be called by parse() if it recognizes the data. Otherwise, 136 /// if you don't set it, it assumes text/html; charset=utf-8. 137 @property string contentType(string mimeType) { 138 _contentType = mimeType; 139 return _contentType; 140 } 141 142 /// implementing the FileResource interface, useful for sending via 143 /// http automatically. 144 override @property string contentType() const { 145 return _contentType; 146 } 147 148 /// implementing the FileResource interface; it calls toString. 149 override immutable(ubyte)[] getData() const { 150 return cast(immutable(ubyte)[]) this.toString(); 151 } 152 153 154 /// Concatenates any consecutive text nodes 155 /* 156 void normalize() { 157 158 } 159 */ 160 161 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 162 /// Call this before calling parse(). 163 164 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 165 void enableAddingSpecialTagsToDom() { 166 parseSawComment = (string) => true; 167 parseSawAspCode = (string) => true; 168 parseSawPhpCode = (string) => true; 169 parseSawQuestionInstruction = (string) => true; 170 parseSawBangInstruction = (string) => true; 171 } 172 173 /// If the parser sees a html comment, it will call this callback 174 /// <!-- comment --> will call parseSawComment(" comment ") 175 /// Return true if you want the node appended to the document. 176 bool delegate(string) parseSawComment; 177 178 /// If the parser sees <% asp code... %>, it will call this callback. 179 /// It will be passed "% asp code... %" or "%= asp code .. %" 180 /// Return true if you want the node appended to the document. 181 bool delegate(string) parseSawAspCode; 182 183 /// If the parser sees <?php php code... ?>, it will call this callback. 184 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 185 /// Note: dom.d cannot identify the other php <? code ?> short format. 186 /// Return true if you want the node appended to the document. 187 bool delegate(string) parseSawPhpCode; 188 189 /// if it sees a <?xxx> that is not php or asp 190 /// it calls this function with the contents. 191 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 192 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 193 /// Return true if you want the node appended to the document. 194 bool delegate(string) parseSawQuestionInstruction; 195 196 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 197 /// it calls this function with the contents. 198 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 199 /// Return true if you want the node appended to the document. 200 bool delegate(string) parseSawBangInstruction; 201 202 /// Given the kind of garbage you find on the Internet, try to make sense of it. 203 /// Equivalent to document.parse(data, false, false, null); 204 /// (Case-insensitive, non-strict, determine character encoding from the data.) 205 206 /// NOTE: this makes no attempt at added security. 207 /// 208 /// It is a template so it lazily imports characterencodings. 209 void parseGarbage()(string data) { 210 parse(data, false, false, null); 211 } 212 213 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 214 /// Will throw exceptions on things like unclosed tags. 215 void parseStrict(string data) { 216 parseStream(toUtf8Stream(data), true, true); 217 } 218 219 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 220 /// tag soup, but does NOT try to correct bad character encodings. 221 /// 222 /// They will still throw an exception. 223 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 224 parseStream(toUtf8Stream(data), caseSensitive, strict); 225 } 226 227 // this is a template so we get lazy import behavior 228 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 229 import arsd.characterencodings; 230 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 231 if(dataEncoding is null) { 232 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 233 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 234 // Now, XML and HTML can both list encoding in the document, but we can't really parse 235 // it here without changing a lot of code until we know the encoding. So I'm going to 236 // do some hackish string checking. 237 if(dataEncoding is null) { 238 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 239 // first, look for an XML prolog 240 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 241 if(idx != -1) { 242 idx += "encoding=\"".length; 243 // we're probably past the prolog if it's this far in; we might be looking at 244 // content. Forget about it. 245 if(idx > 100) 246 idx = -1; 247 } 248 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 249 if(idx == -1) { 250 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 251 if(idx != -1) { 252 idx += "charset=".length; 253 if(dataAsBytes[idx] == '"') 254 idx++; 255 } 256 } 257 258 // found something in either branch... 259 if(idx != -1) { 260 // read till a quote or about 12 chars, whichever comes first... 261 auto end = idx; 262 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 263 end++; 264 265 dataEncoding = cast(string) dataAsBytes[idx .. end]; 266 } 267 // otherwise, we just don't know. 268 } 269 } 270 271 if(dataEncoding is null) { 272 if(strict) 273 throw new MarkupException("I couldn't figure out the encoding of this document."); 274 else 275 // if we really don't know by here, it means we already tried UTF-8, 276 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 277 // tags... let's assume it's Windows-1252, since that's probably the most 278 // common aside from utf that wouldn't be labeled. 279 280 dataEncoding = "Windows 1252"; 281 } 282 283 // and now, go ahead and convert it. 284 285 string data; 286 287 if(!strict) { 288 // if we're in non-strict mode, we need to check 289 // the document for mislabeling too; sometimes 290 // web documents will say they are utf-8, but aren't 291 // actually properly encoded. If it fails to validate, 292 // we'll assume it's actually Windows encoding - the most 293 // likely candidate for mislabeled garbage. 294 dataEncoding = dataEncoding.toLower(); 295 dataEncoding = dataEncoding.replace(" ", ""); 296 dataEncoding = dataEncoding.replace("-", ""); 297 dataEncoding = dataEncoding.replace("_", ""); 298 if(dataEncoding == "utf8") { 299 try { 300 validate(rawdata); 301 } catch(UTFException e) { 302 dataEncoding = "Windows 1252"; 303 } 304 } 305 } 306 307 if(dataEncoding != "UTF-8") { 308 if(strict) 309 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 310 else { 311 try { 312 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 313 } catch(Exception e) { 314 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 315 } 316 } 317 } else 318 data = rawdata; 319 320 return toUtf8Stream(data); 321 } 322 323 private 324 Utf8Stream toUtf8Stream(in string rawdata) { 325 string data = rawdata; 326 static if(is(Utf8Stream == string)) 327 return data; 328 else 329 return new Utf8Stream(data); 330 } 331 332 /** 333 Take XMLish data and try to make the DOM tree out of it. 334 335 The goal isn't to be perfect, but to just be good enough to 336 approximate Javascript's behavior. 337 338 If strict, it throws on something that doesn't make sense. 339 (Examples: mismatched tags. It doesn't validate!) 340 If not strict, it tries to recover anyway, and only throws 341 when something is REALLY unworkable. 342 343 If strict is false, it uses a magic list of tags that needn't 344 be closed. If you are writing a document specifically for this, 345 try to avoid such - use self closed tags at least. Easier to parse. 346 347 The dataEncoding argument can be used to pass a specific 348 charset encoding for automatic conversion. If null (which is NOT 349 the default!), it tries to determine from the data itself, 350 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 351 352 If this assumption is wrong, it can throw on non-ascii 353 characters! 354 355 356 Note that it previously assumed the data was encoded as UTF-8, which 357 is why the dataEncoding argument defaults to that. 358 359 So it shouldn't break backward compatibility. 360 361 But, if you want the best behavior on wild data - figuring it out from the document 362 instead of assuming - you'll probably want to change that argument to null. 363 364 This is a template so it lazily imports arsd.characterencodings, which is required 365 to fix up data encodings. 366 367 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 368 dependency. If it is data from the Internet though, a random website, the encoding 369 is often a lie. This function, if dataEncoding == null, can correct for that, or 370 you can try parseGarbage. In those cases, arsd.characterencodings is required to 371 compile. 372 */ 373 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 374 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 375 parseStream(data, caseSensitive, strict); 376 } 377 378 // note: this work best in strict mode, unless data is just a simple string wrapper 379 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 380 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 381 // of my big app. 382 383 assert(data !is null); 384 385 // go through character by character. 386 // if you see a <, consider it a tag. 387 // name goes until the first non tagname character 388 // then see if it self closes or has an attribute 389 390 // if not in a tag, anything not a tag is a big text 391 // node child. It ends as soon as it sees a < 392 393 // Whitespace in text or attributes is preserved, but not between attributes 394 395 // & and friends are converted when I know them, left the same otherwise 396 397 398 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 399 //validate(data); // it *must* be UTF-8 for this to work correctly 400 401 sizediff_t pos = 0; 402 403 clear(); 404 405 loose = !caseSensitive; 406 407 bool sawImproperNesting = false; 408 bool paragraphHackfixRequired = false; 409 410 int getLineNumber(sizediff_t p) { 411 int line = 1; 412 foreach(c; data[0..p]) 413 if(c == '\n') 414 line++; 415 return line; 416 } 417 418 void parseError(string message) { 419 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 420 } 421 422 bool eatWhitespace() { 423 bool ateAny = false; 424 while(pos < data.length && data[pos].isSimpleWhite) { 425 pos++; 426 ateAny = true; 427 } 428 return ateAny; 429 } 430 431 string readTagName() { 432 // remember to include : for namespaces 433 // basically just keep going until >, /, or whitespace 434 auto start = pos; 435 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 436 { 437 pos++; 438 if(pos == data.length) { 439 if(strict) 440 throw new Exception("tag name incomplete when file ended"); 441 else 442 break; 443 } 444 } 445 446 if(!caseSensitive) 447 return toLower(data[start..pos]); 448 else 449 return data[start..pos]; 450 } 451 452 string readAttributeName() { 453 // remember to include : for namespaces 454 // basically just keep going until >, /, or whitespace 455 auto start = pos; 456 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 457 { 458 if(data[pos] == '<') { 459 if(strict) 460 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 461 else 462 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 463 } 464 pos++; 465 if(pos == data.length) { 466 if(strict) 467 throw new Exception("unterminated attribute name"); 468 else 469 break; 470 } 471 } 472 473 if(!caseSensitive) 474 return toLower(data[start..pos]); 475 else 476 return data[start..pos]; 477 } 478 479 string readAttributeValue() { 480 if(pos >= data.length) { 481 if(strict) 482 throw new Exception("no attribute value before end of file"); 483 else 484 return null; 485 } 486 switch(data[pos]) { 487 case '\'': 488 case '"': 489 auto started = pos; 490 char end = data[pos]; 491 pos++; 492 auto start = pos; 493 while(pos < data.length && data[pos] != end) 494 pos++; 495 if(strict && pos == data.length) 496 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 497 string v = htmlEntitiesDecode(data[start..pos], strict); 498 pos++; // skip over the end 499 return v; 500 default: 501 if(strict) 502 parseError("Attributes must be quoted"); 503 // read until whitespace or terminator (/> or >) 504 auto start = pos; 505 while( 506 pos < data.length && 507 data[pos] != '>' && 508 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 509 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 510 !data[pos].isSimpleWhite) 511 pos++; 512 513 string v = htmlEntitiesDecode(data[start..pos], strict); 514 // don't skip the end - we'll need it later 515 return v; 516 } 517 } 518 519 TextNode readTextNode() { 520 auto start = pos; 521 while(pos < data.length && data[pos] != '<') { 522 pos++; 523 } 524 525 return TextNode.fromUndecodedString(this, data[start..pos]); 526 } 527 528 // this is obsolete! 529 RawSource readCDataNode() { 530 auto start = pos; 531 while(pos < data.length && data[pos] != '<') { 532 pos++; 533 } 534 535 return new RawSource(this, data[start..pos]); 536 } 537 538 539 struct Ele { 540 int type; // element or closing tag or nothing 541 /* 542 type == 0 means regular node, self-closed (element is valid) 543 type == 1 means closing tag (payload is the tag name, element may be valid) 544 type == 2 means you should ignore it completely 545 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 546 type == 4 means the document was totally empty 547 */ 548 Element element; // for type == 0 or type == 3 549 string payload; // for type == 1 550 } 551 // recursively read a tag 552 Ele readElement(string[] parentChain = null) { 553 // FIXME: this is the slowest function in this module, by far, even in strict mode. 554 // Loose mode should perform decently, but strict mode is the important one. 555 if(!strict && parentChain is null) 556 parentChain = []; 557 558 static string[] recentAutoClosedTags; 559 560 if(pos >= data.length) 561 { 562 if(strict) { 563 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 564 } else { 565 if(parentChain.length) 566 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 567 else 568 return Ele(4); // signal emptiness upstream 569 } 570 } 571 572 if(data[pos] != '<') { 573 return Ele(0, readTextNode(), null); 574 } 575 576 enforce(data[pos] == '<'); 577 pos++; 578 if(pos == data.length) { 579 if(strict) 580 throw new MarkupException("Found trailing < at end of file"); 581 // if not strict, we'll just skip the switch 582 } else 583 switch(data[pos]) { 584 // I don't care about these, so I just want to skip them 585 case '!': // might be a comment, a doctype, or a special instruction 586 pos++; 587 588 // FIXME: we should store these in the tree too 589 // though I like having it stripped out tbh. 590 591 if(pos == data.length) { 592 if(strict) 593 throw new MarkupException("<! opened at end of file"); 594 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 595 // comment 596 pos += 2; 597 598 // FIXME: technically, a comment is anything 599 // between -- and -- inside a <!> block. 600 // so in <!-- test -- lol> , the " lol" is NOT a comment 601 // and should probably be handled differently in here, but for now 602 // I'll just keep running until --> since that's the common way 603 604 auto commentStart = pos; 605 while(pos+3 < data.length && data[pos..pos+3] != "-->") 606 pos++; 607 608 auto end = commentStart; 609 610 if(pos + 3 >= data.length) { 611 if(strict) 612 throw new MarkupException("unclosed comment"); 613 end = data.length; 614 pos = data.length; 615 } else { 616 end = pos; 617 assert(data[pos] == '-'); 618 pos++; 619 assert(data[pos] == '-'); 620 pos++; 621 assert(data[pos] == '>'); 622 pos++; 623 } 624 625 if(parseSawComment !is null) 626 if(parseSawComment(data[commentStart .. end])) { 627 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 628 } 629 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 630 pos += 7; 631 632 auto cdataStart = pos; 633 634 ptrdiff_t end = -1; 635 typeof(end) cdataEnd; 636 637 if(pos < data.length) { 638 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 639 end = data[pos .. $].indexOf("]]>"); 640 } 641 642 if(end == -1) { 643 if(strict) 644 throw new MarkupException("Unclosed CDATA section"); 645 end = pos; 646 cdataEnd = pos; 647 } else { 648 cdataEnd = pos + end; 649 pos = cdataEnd + 3; 650 } 651 652 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 653 } else { 654 auto start = pos; 655 while(pos < data.length && data[pos] != '>') 656 pos++; 657 658 auto bangEnds = pos; 659 if(pos == data.length) { 660 if(strict) 661 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 662 } else pos++; // skipping the > 663 664 if(parseSawBangInstruction !is null) 665 if(parseSawBangInstruction(data[start .. bangEnds])) { 666 // FIXME: these should be able to modify the parser state, 667 // doing things like adding entities, somehow. 668 669 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 670 } 671 } 672 673 /* 674 if(pos < data.length && data[pos] == '>') 675 pos++; // skip the > 676 else 677 assert(!strict); 678 */ 679 break; 680 case '%': 681 case '?': 682 /* 683 Here's what we want to support: 684 685 <% asp code %> 686 <%= asp code %> 687 <?php php code ?> 688 <?= php code ?> 689 690 The contents don't really matter, just if it opens with 691 one of the above for, it ends on the two char terminator. 692 693 <?something> 694 this is NOT php code 695 because I've seen this in the wild: <?EM-dummyText> 696 697 This could be php with shorttags which would be cut off 698 prematurely because if(a >) - that > counts as the close 699 of the tag, but since dom.d can't tell the difference 700 between that and the <?EM> real world example, it will 701 not try to look for the ?> ending. 702 703 The difference between this and the asp/php stuff is that it 704 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 705 on >. 706 */ 707 708 char end = data[pos]; 709 auto started = pos; 710 bool isAsp = end == '%'; 711 int currentIndex = 0; 712 bool isPhp = false; 713 bool isEqualTag = false; 714 int phpCount = 0; 715 716 more: 717 pos++; // skip the start 718 if(pos == data.length) { 719 if(strict) 720 throw new MarkupException("Unclosed <"~end~" by end of file"); 721 } else { 722 currentIndex++; 723 if(currentIndex == 1 && data[pos] == '=') { 724 if(!isAsp) 725 isPhp = true; 726 isEqualTag = true; 727 goto more; 728 } 729 if(currentIndex == 1 && data[pos] == 'p') 730 phpCount++; 731 if(currentIndex == 2 && data[pos] == 'h') 732 phpCount++; 733 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 734 isPhp = true; 735 736 if(data[pos] == '>') { 737 if((isAsp || isPhp) && data[pos - 1] != end) 738 goto more; 739 // otherwise we're done 740 } else 741 goto more; 742 } 743 744 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 745 auto code = data[started .. pos]; 746 747 748 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 749 if(pos < data.length) 750 pos++; // get past the > 751 752 if(isAsp && parseSawAspCode !is null) { 753 if(parseSawAspCode(code)) { 754 return Ele(3, new AspCode(this, code), null); 755 } 756 } else if(isPhp && parseSawPhpCode !is null) { 757 if(parseSawPhpCode(code)) { 758 return Ele(3, new PhpCode(this, code), null); 759 } 760 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 761 if(parseSawQuestionInstruction(code)) { 762 return Ele(3, new QuestionInstruction(this, code), null); 763 } 764 } 765 break; 766 case '/': // closing an element 767 pos++; // skip the start 768 auto p = pos; 769 while(pos < data.length && data[pos] != '>') 770 pos++; 771 //writefln("</%s>", data[p..pos]); 772 if(pos == data.length && data[pos-1] != '>') { 773 if(strict) 774 throw new MarkupException("File ended before closing tag had a required >"); 775 else 776 data ~= ">"; // just hack it in 777 } 778 pos++; // skip the '>' 779 780 string tname = data[p..pos-1]; 781 if(!caseSensitive) 782 tname = tname.toLower(); 783 784 return Ele(1, null, tname); // closing tag reports itself here 785 case ' ': // assume it isn't a real element... 786 if(strict) 787 parseError("bad markup - improperly placed <"); 788 else 789 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 790 break; 791 default: 792 793 if(!strict) { 794 // what about something that kinda looks like a tag, but isn't? 795 auto nextTag = data[pos .. $].indexOf("<"); 796 auto closeTag = data[pos .. $].indexOf(">"); 797 if(closeTag != -1 && nextTag != -1) 798 if(nextTag < closeTag) { 799 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 800 801 auto equal = data[pos .. $].indexOf("=\""); 802 if(equal != -1 && equal < closeTag) { 803 // this MIGHT be ok, soldier on 804 } else { 805 // definitely no good, this must be a (horribly distorted) text node 806 pos++; // skip the < we're on - don't want text node to end prematurely 807 auto node = readTextNode(); 808 node.contents = "<" ~ node.contents; // put this back 809 return Ele(0, node, null); 810 } 811 } 812 } 813 814 string tagName = readTagName(); 815 string[string] attributes; 816 817 Ele addTag(bool selfClosed) { 818 if(selfClosed) 819 pos++; 820 else { 821 if(!strict) 822 if(tagName.isInArray(selfClosedElements)) 823 // these are de-facto self closed 824 selfClosed = true; 825 } 826 827 if(strict) 828 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100])); 829 else { 830 // if we got here, it's probably because a slash was in an 831 // unquoted attribute - don't trust the selfClosed value 832 if(!selfClosed) 833 selfClosed = tagName.isInArray(selfClosedElements); 834 835 while(pos < data.length && data[pos] != '>') 836 pos++; 837 838 if(pos >= data.length) { 839 // the tag never closed 840 assert(data.length != 0); 841 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 842 } 843 } 844 845 auto whereThisTagStarted = pos; // for better error messages 846 847 pos++; 848 849 auto e = createElement(tagName); 850 e.attributes = attributes; 851 version(dom_node_indexes) { 852 if(e.dataset.nodeIndex.length == 0) 853 e.dataset.nodeIndex = to!string(&(e.attributes)); 854 } 855 e.selfClosed = selfClosed; 856 e.parseAttributes(); 857 858 859 // HACK to handle script and style as a raw data section as it is in HTML browsers 860 if(tagName == "script" || tagName == "style") { 861 if(!selfClosed) { 862 string closer = "</" ~ tagName ~ ">"; 863 ptrdiff_t ending; 864 if(pos >= data.length) 865 ending = -1; 866 else 867 ending = indexOf(data[pos..$], closer); 868 869 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 870 /* 871 if(loose && ending == -1 && pos < data.length) 872 ending = indexOf(data[pos..$], closer.toUpper()); 873 */ 874 if(ending == -1) { 875 if(strict) 876 throw new Exception("tag " ~ tagName ~ " never closed"); 877 else { 878 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 879 if(pos < data.length) { 880 e = new TextNode(this, data[pos .. $]); 881 pos = data.length; 882 } 883 } 884 } else { 885 ending += pos; 886 e.innerRawSource = data[pos..ending]; 887 pos = ending + closer.length; 888 } 889 } 890 return Ele(0, e, null); 891 } 892 893 bool closed = selfClosed; 894 895 void considerHtmlParagraphHack(Element n) { 896 assert(!strict); 897 if(e.tagName == "p" && e.tagName == n.tagName) { 898 // html lets you write <p> para 1 <p> para 1 899 // but in the dom tree, they should be siblings, not children. 900 paragraphHackfixRequired = true; 901 } 902 } 903 904 //writef("<%s>", tagName); 905 while(!closed) { 906 Ele n; 907 if(strict) 908 n = readElement(); 909 else 910 n = readElement(parentChain ~ tagName); 911 912 if(n.type == 4) return n; // the document is empty 913 914 if(n.type == 3 && n.element !is null) { 915 // special node, append if possible 916 if(e !is null) 917 e.appendChild(n.element); 918 else 919 piecesBeforeRoot ~= n.element; 920 } else if(n.type == 0) { 921 if(!strict) 922 considerHtmlParagraphHack(n.element); 923 e.appendChild(n.element); 924 } else if(n.type == 1) { 925 bool found = false; 926 if(n.payload != tagName) { 927 if(strict) 928 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 929 else { 930 sawImproperNesting = true; 931 // this is so we don't drop several levels of awful markup 932 if(n.element) { 933 if(!strict) 934 considerHtmlParagraphHack(n.element); 935 e.appendChild(n.element); 936 n.element = null; 937 } 938 939 // is the element open somewhere up the chain? 940 foreach(i, parent; parentChain) 941 if(parent == n.payload) { 942 recentAutoClosedTags ~= tagName; 943 // just rotating it so we don't inadvertently break stuff with vile crap 944 if(recentAutoClosedTags.length > 4) 945 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 946 947 n.element = e; 948 return n; 949 } 950 951 // if not, this is a text node; we can't fix it up... 952 953 // If it's already in the tree somewhere, assume it is closed by algorithm 954 // and we shouldn't output it - odds are the user just flipped a couple tags 955 foreach(ele; e.tree) { 956 if(ele.tagName == n.payload) { 957 found = true; 958 break; 959 } 960 } 961 962 foreach(ele; recentAutoClosedTags) { 963 if(ele == n.payload) { 964 found = true; 965 break; 966 } 967 } 968 969 if(!found) // if not found in the tree though, it's probably just text 970 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 971 } 972 } else { 973 if(n.element) { 974 if(!strict) 975 considerHtmlParagraphHack(n.element); 976 e.appendChild(n.element); 977 } 978 } 979 980 if(n.payload == tagName) // in strict mode, this is always true 981 closed = true; 982 } else { /*throw new Exception("wtf " ~ tagName);*/ } 983 } 984 //writef("</%s>\n", tagName); 985 return Ele(0, e, null); 986 } 987 988 // if a tag was opened but not closed by end of file, we can arrive here 989 if(!strict && pos >= data.length) 990 return addTag(false); 991 //else if(strict) assert(0); // should be caught before 992 993 switch(data[pos]) { 994 default: assert(0); 995 case '/': // self closing tag 996 return addTag(true); 997 case '>': 998 return addTag(false); 999 case ' ': 1000 case '\t': 1001 case '\n': 1002 case '\r': 1003 // there might be attributes... 1004 moreAttributes: 1005 eatWhitespace(); 1006 1007 // same deal as above the switch.... 1008 if(!strict && pos >= data.length) 1009 return addTag(false); 1010 1011 if(strict && pos >= data.length) 1012 throw new MarkupException("tag open, didn't find > before end of file"); 1013 1014 switch(data[pos]) { 1015 case '/': // self closing tag 1016 return addTag(true); 1017 case '>': // closed tag; open -- we now read the contents 1018 return addTag(false); 1019 default: // it is an attribute 1020 string attrName = readAttributeName(); 1021 string attrValue = attrName; 1022 1023 bool ateAny = eatWhitespace(); 1024 if(strict && ateAny) 1025 throw new MarkupException("inappropriate whitespace after attribute name"); 1026 1027 if(pos >= data.length) { 1028 if(strict) 1029 assert(0, "this should have thrown in readAttributeName"); 1030 else { 1031 data ~= ">"; 1032 goto blankValue; 1033 } 1034 } 1035 if(data[pos] == '=') { 1036 pos++; 1037 1038 ateAny = eatWhitespace(); 1039 if(strict && ateAny) 1040 throw new MarkupException("inappropriate whitespace after attribute equals"); 1041 1042 attrValue = readAttributeValue(); 1043 1044 eatWhitespace(); 1045 } 1046 1047 blankValue: 1048 1049 if(strict && attrName in attributes) 1050 throw new MarkupException("Repeated attribute: " ~ attrName); 1051 1052 if(attrName.strip().length) 1053 attributes[attrName] = attrValue; 1054 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1055 1056 if(!strict && pos < data.length && data[pos] == '<') { 1057 // this is the broken tag that doesn't have a > at the end 1058 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1059 // let's insert one as a hack 1060 goto case '>'; 1061 } 1062 1063 goto moreAttributes; 1064 } 1065 } 1066 } 1067 1068 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1069 //assert(0); 1070 } 1071 1072 eatWhitespace(); 1073 Ele r; 1074 do { 1075 r = readElement(); // there SHOULD only be one element... 1076 1077 if(r.type == 3 && r.element !is null) 1078 piecesBeforeRoot ~= r.element; 1079 1080 if(r.type == 4) 1081 break; // the document is completely empty... 1082 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1083 1084 root = r.element; 1085 1086 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1087 while(r.type != 4) { 1088 r = readElement(); 1089 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1090 if(r.element !is null) 1091 piecesAfterRoot ~= r.element; 1092 } 1093 } 1094 1095 if(root is null) 1096 { 1097 if(strict) 1098 assert(0, "empty document should be impossible in strict mode"); 1099 else 1100 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1101 } 1102 1103 if(paragraphHackfixRequired) { 1104 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1105 1106 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1107 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1108 1109 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1110 // Kind of inefficient because we can't detect when we recurse back out of a node. 1111 Element[Element] insertLocations; 1112 auto iterator = root.tree; 1113 foreach(ele; iterator) { 1114 if(ele.parentNode is null) 1115 continue; 1116 1117 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1118 auto shouldBePreviousSibling = ele.parentNode; 1119 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1120 if (auto p = holder in insertLocations) { 1121 shouldBePreviousSibling = *p; 1122 assert(shouldBePreviousSibling.parentNode is holder); 1123 } 1124 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1125 insertLocations[holder] = ele; 1126 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1127 } 1128 } 1129 } 1130 } 1131 1132 /* end massive parse function */ 1133 1134 /// Gets the <title> element's innerText, if one exists 1135 @property string title() { 1136 bool doesItMatch(Element e) { 1137 return (e.tagName == "title"); 1138 } 1139 1140 auto e = findFirst(&doesItMatch); 1141 if(e) 1142 return e.innerText(); 1143 return ""; 1144 } 1145 1146 /// Sets the title of the page, creating a <title> element if needed. 1147 @property void title(string t) { 1148 bool doesItMatch(Element e) { 1149 return (e.tagName == "title"); 1150 } 1151 1152 auto e = findFirst(&doesItMatch); 1153 1154 if(!e) { 1155 e = createElement("title"); 1156 auto heads = getElementsByTagName("head"); 1157 if(heads.length) 1158 heads[0].appendChild(e); 1159 } 1160 1161 if(e) 1162 e.innerText = t; 1163 } 1164 1165 // FIXME: would it work to alias root this; ???? might be a good idea 1166 /// These functions all forward to the root element. See the documentation in the Element class. 1167 Element getElementById(string id) { 1168 return root.getElementById(id); 1169 } 1170 1171 /// ditto 1172 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1173 if( is(SomeElementType : Element)) 1174 out(ret) { assert(ret !is null); } 1175 body { 1176 return root.requireElementById!(SomeElementType)(id, file, line); 1177 } 1178 1179 /// ditto 1180 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1181 if( is(SomeElementType : Element)) 1182 out(ret) { assert(ret !is null); } 1183 body { 1184 return root.requireSelector!(SomeElementType)(selector, file, line); 1185 } 1186 1187 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1188 if(is(SomeElementType : Element)) 1189 { 1190 return root.optionSelector!(SomeElementType)(selector, file, line); 1191 } 1192 1193 1194 /// ditto 1195 Element querySelector(string selector) { 1196 return root.querySelector(selector); 1197 } 1198 1199 /// ditto 1200 Element[] querySelectorAll(string selector) { 1201 return root.querySelectorAll(selector); 1202 } 1203 1204 /// ditto 1205 Element[] getElementsBySelector(string selector) { 1206 return root.getElementsBySelector(selector); 1207 } 1208 1209 /// ditto 1210 Element[] getElementsByTagName(string tag) { 1211 return root.getElementsByTagName(tag); 1212 } 1213 1214 /// ditto 1215 Element[] getElementsByClassName(string tag) { 1216 return root.getElementsByClassName(tag); 1217 } 1218 1219 /** FIXME: btw, this could just be a lazy range...... */ 1220 Element getFirstElementByTagName(string tag) { 1221 if(loose) 1222 tag = tag.toLower(); 1223 bool doesItMatch(Element e) { 1224 return e.tagName == tag; 1225 } 1226 return findFirst(&doesItMatch); 1227 } 1228 1229 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1230 Element mainBody() { 1231 return getFirstElementByTagName("body"); 1232 } 1233 1234 /// this uses a weird thing... it's [name=] if no colon and 1235 /// [property=] if colon 1236 string getMeta(string name) { 1237 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1238 auto e = querySelector("head meta["~thing~"="~name~"]"); 1239 if(e is null) 1240 return null; 1241 return e.content; 1242 } 1243 1244 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1245 void setMeta(string name, string value) { 1246 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1247 auto e = querySelector("head meta["~thing~"="~name~"]"); 1248 if(e is null) { 1249 e = requireSelector("head").addChild("meta"); 1250 e.setAttribute(thing, name); 1251 } 1252 1253 e.content = value; 1254 } 1255 1256 ///. 1257 Form[] forms() { 1258 return cast(Form[]) getElementsByTagName("form"); 1259 } 1260 1261 ///. 1262 Form createForm() 1263 out(ret) { 1264 assert(ret !is null); 1265 } 1266 body { 1267 return cast(Form) createElement("form"); 1268 } 1269 1270 ///. 1271 Element createElement(string name) { 1272 if(loose) 1273 name = name.toLower(); 1274 1275 auto e = Element.make(name); 1276 e.parentDocument = this; 1277 1278 return e; 1279 1280 // return new Element(this, name, null, selfClosed); 1281 } 1282 1283 ///. 1284 Element createFragment() { 1285 return new DocumentFragment(this); 1286 } 1287 1288 ///. 1289 Element createTextNode(string content) { 1290 return new TextNode(this, content); 1291 } 1292 1293 1294 ///. 1295 Element findFirst(bool delegate(Element) doesItMatch) { 1296 Element result; 1297 1298 bool goThroughElement(Element e) { 1299 if(doesItMatch(e)) { 1300 result = e; 1301 return true; 1302 } 1303 1304 foreach(child; e.children) { 1305 if(goThroughElement(child)) 1306 return true; 1307 } 1308 1309 return false; 1310 } 1311 1312 goThroughElement(root); 1313 1314 return result; 1315 } 1316 1317 ///. 1318 void clear() { 1319 root = null; 1320 loose = false; 1321 } 1322 1323 ///. 1324 void setProlog(string d) { 1325 _prolog = d; 1326 prologWasSet = true; 1327 } 1328 1329 ///. 1330 private string _prolog = "<!DOCTYPE html>\n"; 1331 private bool prologWasSet = false; // set to true if the user changed it 1332 1333 @property string prolog() const { 1334 // if the user explicitly changed it, do what they want 1335 // or if we didn't keep/find stuff from the document itself, 1336 // we'll use the builtin one as a default. 1337 if(prologWasSet || piecesBeforeRoot.length == 0) 1338 return _prolog; 1339 1340 string p; 1341 foreach(e; piecesBeforeRoot) 1342 p ~= e.toString() ~ "\n"; 1343 return p; 1344 } 1345 1346 ///. 1347 override string toString() const { 1348 return prolog ~ root.toString(); 1349 } 1350 1351 /++ 1352 Writes it out with whitespace for easier eyeball debugging 1353 1354 Do NOT use for anything other than eyeball debugging, 1355 because whitespace may be significant content in XML. 1356 +/ 1357 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1358 string s = prolog; 1359 1360 if(insertComments) s ~= "<!--"; 1361 s ~= "\n"; 1362 if(insertComments) s ~= "-->"; 1363 1364 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1365 foreach(a; piecesAfterRoot) 1366 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1367 return s; 1368 } 1369 1370 ///. 1371 Element root; 1372 1373 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1374 Element[] piecesBeforeRoot; 1375 1376 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1377 Element[] piecesAfterRoot; 1378 1379 ///. 1380 bool loose; 1381 1382 1383 1384 // what follows are for mutation events that you can observe 1385 void delegate(DomMutationEvent)[] eventObservers; 1386 1387 void dispatchMutationEvent(DomMutationEvent e) { 1388 foreach(o; eventObservers) 1389 o(e); 1390 } 1391 } 1392 1393 /// This represents almost everything in the DOM. 1394 class Element { 1395 /// Returns a collection of elements by selector. 1396 /// See: [Document.opIndex] 1397 ElementCollection opIndex(string selector) { 1398 auto e = ElementCollection(this); 1399 return e[selector]; 1400 } 1401 1402 /++ 1403 Returns the child node with the particular index. 1404 1405 Be aware that child nodes include text nodes, including 1406 whitespace-only nodes. 1407 +/ 1408 Element opIndex(size_t index) { 1409 if(index >= children.length) 1410 return null; 1411 return this.children[index]; 1412 } 1413 1414 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1415 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1416 if( 1417 is(SomeElementType : Element) 1418 ) 1419 out(ret) { 1420 assert(ret !is null); 1421 } 1422 body { 1423 auto e = cast(SomeElementType) getElementById(id); 1424 if(e is null) 1425 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1426 return e; 1427 } 1428 1429 /// ditto but with selectors instead of ids 1430 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1431 if( 1432 is(SomeElementType : Element) 1433 ) 1434 out(ret) { 1435 assert(ret !is null); 1436 } 1437 body { 1438 auto e = cast(SomeElementType) querySelector(selector); 1439 if(e is null) 1440 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1441 return e; 1442 } 1443 1444 1445 /++ 1446 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1447 +/ 1448 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1449 if(is(SomeElementType : Element)) 1450 { 1451 auto e = cast(SomeElementType) querySelector(selector); 1452 return MaybeNullElement!SomeElementType(e); 1453 } 1454 1455 1456 1457 /// get all the classes on this element 1458 @property string[] classes() { 1459 return split(className, " "); 1460 } 1461 1462 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1463 @scriptable 1464 Element addClass(string c) { 1465 if(hasClass(c)) 1466 return this; // don't add it twice 1467 1468 string cn = getAttribute("class"); 1469 if(cn.length == 0) { 1470 setAttribute("class", c); 1471 return this; 1472 } else { 1473 setAttribute("class", cn ~ " " ~ c); 1474 } 1475 1476 return this; 1477 } 1478 1479 /// Removes a particular class name. 1480 @scriptable 1481 Element removeClass(string c) { 1482 if(!hasClass(c)) 1483 return this; 1484 string n; 1485 foreach(name; classes) { 1486 if(c == name) 1487 continue; // cut it out 1488 if(n.length) 1489 n ~= " "; 1490 n ~= name; 1491 } 1492 1493 className = n.strip(); 1494 1495 return this; 1496 } 1497 1498 /// Returns whether the given class appears in this element. 1499 bool hasClass(string c) { 1500 string cn = className; 1501 1502 auto idx = cn.indexOf(c); 1503 if(idx == -1) 1504 return false; 1505 1506 foreach(cla; cn.split(" ")) 1507 if(cla == c) 1508 return true; 1509 return false; 1510 1511 /* 1512 int rightSide = idx + c.length; 1513 1514 bool checkRight() { 1515 if(rightSide == cn.length) 1516 return true; // it's the only class 1517 else if(iswhite(cn[rightSide])) 1518 return true; 1519 return false; // this is a substring of something else.. 1520 } 1521 1522 if(idx == 0) { 1523 return checkRight(); 1524 } else { 1525 if(!iswhite(cn[idx - 1])) 1526 return false; // substring 1527 return checkRight(); 1528 } 1529 1530 assert(0); 1531 */ 1532 } 1533 1534 1535 /* ******************************* 1536 DOM Mutation 1537 *********************************/ 1538 /// convenience function to quickly add a tag with some text or 1539 /// other relevant info (for example, it's a src for an <img> element 1540 /// instead of inner text) 1541 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1542 in { 1543 assert(tagName !is null); 1544 } 1545 out(e) { 1546 assert(e.parentNode is this); 1547 assert(e.parentDocument is this.parentDocument); 1548 } 1549 body { 1550 auto e = Element.make(tagName, childInfo, childInfo2); 1551 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1552 // return the parent. That will break existing code though. 1553 return appendChild(e); 1554 } 1555 1556 /// Another convenience function. Adds a child directly after the current one, returning 1557 /// the new child. 1558 /// 1559 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1560 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1561 in { 1562 assert(tagName !is null); 1563 assert(parentNode !is null); 1564 } 1565 out(e) { 1566 assert(e.parentNode is this.parentNode); 1567 assert(e.parentDocument is this.parentDocument); 1568 } 1569 body { 1570 auto e = Element.make(tagName, childInfo, childInfo2); 1571 return parentNode.insertAfter(this, e); 1572 } 1573 1574 /// 1575 Element addSibling(Element e) { 1576 return parentNode.insertAfter(this, e); 1577 } 1578 1579 /// 1580 Element addChild(Element e) { 1581 return this.appendChild(e); 1582 } 1583 1584 /// Convenience function to append text intermixed with other children. 1585 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1586 /// or div.addChildren("Hello, ", user.name, "!"); 1587 1588 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1589 void addChildren(T...)(T t) { 1590 foreach(item; t) { 1591 static if(is(item : Element)) 1592 appendChild(item); 1593 else static if (is(isSomeString!(item))) 1594 appendText(to!string(item)); 1595 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1596 } 1597 } 1598 1599 ///. 1600 Element addChild(string tagName, Element firstChild, string info2 = null) 1601 in { 1602 assert(firstChild !is null); 1603 } 1604 out(ret) { 1605 assert(ret !is null); 1606 assert(ret.parentNode is this); 1607 assert(firstChild.parentNode is ret); 1608 1609 assert(ret.parentDocument is this.parentDocument); 1610 //assert(firstChild.parentDocument is this.parentDocument); 1611 } 1612 body { 1613 auto e = Element.make(tagName, "", info2); 1614 e.appendChild(firstChild); 1615 this.appendChild(e); 1616 return e; 1617 } 1618 1619 /// 1620 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1621 in { 1622 } 1623 out(ret) { 1624 assert(ret !is null); 1625 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1626 assert(ret.parentDocument is this.parentDocument); 1627 } 1628 body { 1629 auto e = Element.make(tagName, "", info2); 1630 this.appendChild(e); 1631 e.innerHTML = innerHtml.source; 1632 return e; 1633 } 1634 1635 1636 /// . 1637 void appendChildren(Element[] children) { 1638 foreach(ele; children) 1639 appendChild(ele); 1640 } 1641 1642 ///. 1643 void reparent(Element newParent) 1644 in { 1645 assert(newParent !is null); 1646 assert(parentNode !is null); 1647 } 1648 out { 1649 assert(this.parentNode is newParent); 1650 //assert(isInArray(this, newParent.children)); 1651 } 1652 body { 1653 parentNode.removeChild(this); 1654 newParent.appendChild(this); 1655 } 1656 1657 /** 1658 Strips this tag out of the document, putting its inner html 1659 as children of the parent. 1660 1661 For example, given: `<p>hello <b>there</b></p>`, if you 1662 call `stripOut` on the `b` element, you'll be left with 1663 `<p>hello there<p>`. 1664 1665 The idea here is to make it easy to get rid of garbage 1666 markup you aren't interested in. 1667 */ 1668 void stripOut() 1669 in { 1670 assert(parentNode !is null); 1671 } 1672 out { 1673 assert(parentNode is null); 1674 assert(children.length == 0); 1675 } 1676 body { 1677 foreach(c; children) 1678 c.parentNode = null; // remove the parent 1679 if(children.length) 1680 parentNode.replaceChild(this, this.children); 1681 else 1682 parentNode.removeChild(this); 1683 this.children.length = 0; // we reparented them all above 1684 } 1685 1686 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1687 /// if the element already isn't in a tree, it does nothing. 1688 Element removeFromTree() 1689 in { 1690 1691 } 1692 out(var) { 1693 assert(this.parentNode is null); 1694 assert(var is this); 1695 } 1696 body { 1697 if(this.parentNode is null) 1698 return this; 1699 1700 this.parentNode.removeChild(this); 1701 1702 return this; 1703 } 1704 1705 /++ 1706 Wraps this element inside the given element. 1707 It's like `this.replaceWith(what); what.appendchild(this);` 1708 1709 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1710 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1711 +/ 1712 Element wrapIn(Element what) 1713 in { 1714 assert(what !is null); 1715 } 1716 out(ret) { 1717 assert(this.parentNode is what); 1718 assert(ret is what); 1719 } 1720 body { 1721 this.replaceWith(what); 1722 what.appendChild(this); 1723 1724 return what; 1725 } 1726 1727 /// Replaces this element with something else in the tree. 1728 Element replaceWith(Element e) 1729 in { 1730 assert(this.parentNode !is null); 1731 } 1732 body { 1733 e.removeFromTree(); 1734 this.parentNode.replaceChild(this, e); 1735 return e; 1736 } 1737 1738 /** 1739 Splits the className into an array of each class given 1740 */ 1741 string[] classNames() const { 1742 return className().split(" "); 1743 } 1744 1745 /** 1746 Fetches the first consecutive text nodes concatenated together. 1747 1748 1749 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1750 1751 See_also: [directText], [innerText] 1752 */ 1753 string firstInnerText() const { 1754 string s; 1755 foreach(child; children) { 1756 if(child.nodeType != NodeType.Text) 1757 break; 1758 1759 s ~= child.nodeValue(); 1760 } 1761 return s; 1762 } 1763 1764 1765 /** 1766 Returns the text directly under this element. 1767 1768 1769 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1770 past child tags. So, `<example>some <b>bold</b> text</example>` 1771 will return `some text` because it only gets the text, skipping non-text children. 1772 1773 See_also: [firstInnerText], [innerText] 1774 */ 1775 @property string directText() { 1776 string ret; 1777 foreach(e; children) { 1778 if(e.nodeType == NodeType.Text) 1779 ret ~= e.nodeValue(); 1780 } 1781 1782 return ret; 1783 } 1784 1785 /** 1786 Sets the direct text, without modifying other child nodes. 1787 1788 1789 Unlike [innerText], this does *not* remove existing elements in the element. 1790 1791 It only replaces the first text node it sees. 1792 1793 If there are no text nodes, it calls [appendText]. 1794 1795 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1796 */ 1797 @property void directText(string text) { 1798 foreach(e; children) { 1799 if(e.nodeType == NodeType.Text) { 1800 auto it = cast(TextNode) e; 1801 it.contents = text; 1802 return; 1803 } 1804 } 1805 1806 appendText(text); 1807 } 1808 1809 // do nothing, this is primarily a virtual hook 1810 // for links and forms 1811 void setValue(string field, string value) { } 1812 1813 1814 // this is a thing so i can remove observer support if it gets slow 1815 // I have not implemented all these yet 1816 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1817 if(parentDocument is null) return; 1818 DomMutationEvent me; 1819 me.operation = operation; 1820 me.target = this; 1821 me.relatedString = s1; 1822 me.relatedString2 = s2; 1823 me.related = r; 1824 me.related2 = r2; 1825 parentDocument.dispatchMutationEvent(me); 1826 } 1827 1828 // putting all the members up front 1829 1830 // this ought to be private. don't use it directly. 1831 Element[] children; 1832 1833 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1834 string tagName; 1835 1836 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1837 string[string] attributes; 1838 1839 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1840 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1841 private bool selfClosed; 1842 1843 /// Get the parent Document object that contains this element. 1844 /// It may be null, so remember to check for that. 1845 Document parentDocument; 1846 1847 ///. 1848 inout(Element) parentNode() inout { 1849 auto p = _parentNode; 1850 1851 if(cast(DocumentFragment) p) 1852 return p._parentNode; 1853 1854 return p; 1855 } 1856 1857 //protected 1858 Element parentNode(Element e) { 1859 return _parentNode = e; 1860 } 1861 1862 private Element _parentNode; 1863 1864 // the next few methods are for implementing interactive kind of things 1865 private CssStyle _computedStyle; 1866 1867 // these are here for event handlers. Don't forget that this library never fires events. 1868 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1869 EventHandler[][string] bubblingEventHandlers; 1870 EventHandler[][string] capturingEventHandlers; 1871 EventHandler[string] defaultEventHandlers; 1872 1873 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1874 if(event.length > 2 && event[0..2] == "on") 1875 event = event[2 .. $]; 1876 1877 if(useCapture) 1878 capturingEventHandlers[event] ~= handler; 1879 else 1880 bubblingEventHandlers[event] ~= handler; 1881 } 1882 1883 1884 // and now methods 1885 1886 /// Convenience function to try to do the right thing for HTML. This is the main 1887 /// way I create elements. 1888 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 1889 bool selfClosed = tagName.isInArray(selfClosedElements); 1890 1891 Element e; 1892 // want to create the right kind of object for the given tag... 1893 switch(tagName) { 1894 case "#text": 1895 e = new TextNode(null, childInfo); 1896 return e; 1897 // break; 1898 case "table": 1899 e = new Table(null); 1900 break; 1901 case "a": 1902 e = new Link(null); 1903 break; 1904 case "form": 1905 e = new Form(null); 1906 break; 1907 case "tr": 1908 e = new TableRow(null); 1909 break; 1910 case "td", "th": 1911 e = new TableCell(null, tagName); 1912 break; 1913 default: 1914 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1915 } 1916 1917 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1918 e.tagName = tagName; 1919 e.selfClosed = selfClosed; 1920 1921 if(childInfo !is null) 1922 switch(tagName) { 1923 /* html5 convenience tags */ 1924 case "audio": 1925 if(childInfo.length) 1926 e.addChild("source", childInfo); 1927 if(childInfo2 !is null) 1928 e.appendText(childInfo2); 1929 break; 1930 case "source": 1931 e.src = childInfo; 1932 if(childInfo2 !is null) 1933 e.type = childInfo2; 1934 break; 1935 /* regular html 4 stuff */ 1936 case "img": 1937 e.src = childInfo; 1938 if(childInfo2 !is null) 1939 e.alt = childInfo2; 1940 break; 1941 case "link": 1942 e.href = childInfo; 1943 if(childInfo2 !is null) 1944 e.rel = childInfo2; 1945 break; 1946 case "option": 1947 e.innerText = childInfo; 1948 if(childInfo2 !is null) 1949 e.value = childInfo2; 1950 break; 1951 case "input": 1952 e.type = "hidden"; 1953 e.name = childInfo; 1954 if(childInfo2 !is null) 1955 e.value = childInfo2; 1956 break; 1957 case "button": 1958 e.innerText = childInfo; 1959 if(childInfo2 !is null) 1960 e.type = childInfo2; 1961 break; 1962 case "a": 1963 e.innerText = childInfo; 1964 if(childInfo2 !is null) 1965 e.href = childInfo2; 1966 break; 1967 case "script": 1968 case "style": 1969 e.innerRawSource = childInfo; 1970 break; 1971 case "meta": 1972 e.name = childInfo; 1973 if(childInfo2 !is null) 1974 e.content = childInfo2; 1975 break; 1976 /* generically, assume we were passed text and perhaps class */ 1977 default: 1978 e.innerText = childInfo; 1979 if(childInfo2.length) 1980 e.className = childInfo2; 1981 } 1982 1983 return e; 1984 } 1985 1986 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 1987 // FIXME: childInfo2 is ignored when info1 is null 1988 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 1989 m.innerHTML = innerHtml.source; 1990 return m; 1991 } 1992 1993 static Element make(string tagName, Element child, string childInfo2 = null) { 1994 auto m = Element.make(tagName, cast(string) null, childInfo2); 1995 m.appendChild(child); 1996 return m; 1997 } 1998 1999 2000 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2001 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2002 parentDocument = _parentDocument; 2003 tagName = _tagName; 2004 if(_attributes !is null) 2005 attributes = _attributes; 2006 selfClosed = _selfClosed; 2007 2008 version(dom_node_indexes) 2009 this.dataset.nodeIndex = to!string(&(this.attributes)); 2010 2011 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2012 } 2013 2014 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2015 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 2016 this(string _tagName, string[string] _attributes = null) { 2017 tagName = _tagName; 2018 if(_attributes !is null) 2019 attributes = _attributes; 2020 selfClosed = tagName.isInArray(selfClosedElements); 2021 2022 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2023 //children.length = 8; 2024 //children.length = 0; 2025 2026 version(dom_node_indexes) 2027 this.dataset.nodeIndex = to!string(&(this.attributes)); 2028 } 2029 2030 private this(Document _parentDocument) { 2031 parentDocument = _parentDocument; 2032 2033 version(dom_node_indexes) 2034 this.dataset.nodeIndex = to!string(&(this.attributes)); 2035 } 2036 2037 2038 /* ******************************* 2039 Navigating the DOM 2040 *********************************/ 2041 2042 /// Returns the first child of this element. If it has no children, returns null. 2043 /// Remember, text nodes are children too. 2044 @property Element firstChild() { 2045 return children.length ? children[0] : null; 2046 } 2047 2048 /// 2049 @property Element lastChild() { 2050 return children.length ? children[$ - 1] : null; 2051 } 2052 2053 /// UNTESTED 2054 /// the next element you would encounter if you were reading it in the source 2055 Element nextInSource() { 2056 auto n = firstChild; 2057 if(n is null) 2058 n = nextSibling(); 2059 if(n is null) { 2060 auto p = this.parentNode; 2061 while(p !is null && n is null) { 2062 n = p.nextSibling; 2063 } 2064 } 2065 2066 return n; 2067 } 2068 2069 /// UNTESTED 2070 /// ditto 2071 Element previousInSource() { 2072 auto p = previousSibling; 2073 if(p is null) { 2074 auto par = parentNode; 2075 if(par) 2076 p = par.lastChild; 2077 if(p is null) 2078 p = par; 2079 } 2080 return p; 2081 } 2082 2083 ///. 2084 @property Element previousSibling(string tagName = null) { 2085 if(this.parentNode is null) 2086 return null; 2087 Element ps = null; 2088 foreach(e; this.parentNode.childNodes) { 2089 if(e is this) 2090 break; 2091 if(tagName == "*" && e.nodeType != NodeType.Text) { 2092 ps = e; 2093 break; 2094 } 2095 if(tagName is null || e.tagName == tagName) 2096 ps = e; 2097 } 2098 2099 return ps; 2100 } 2101 2102 ///. 2103 @property Element nextSibling(string tagName = null) { 2104 if(this.parentNode is null) 2105 return null; 2106 Element ns = null; 2107 bool mightBe = false; 2108 foreach(e; this.parentNode.childNodes) { 2109 if(e is this) { 2110 mightBe = true; 2111 continue; 2112 } 2113 if(mightBe) { 2114 if(tagName == "*" && e.nodeType != NodeType.Text) { 2115 ns = e; 2116 break; 2117 } 2118 if(tagName is null || e.tagName == tagName) { 2119 ns = e; 2120 break; 2121 } 2122 } 2123 } 2124 2125 return ns; 2126 } 2127 2128 2129 /// Gets the nearest node, going up the chain, with the given tagName 2130 /// May return null or throw. 2131 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2132 if(tagName is null) { 2133 static if(is(T == Form)) 2134 tagName = "form"; 2135 else static if(is(T == Table)) 2136 tagName = "table"; 2137 else static if(is(T == Link)) 2138 tagName == "a"; 2139 } 2140 2141 auto par = this.parentNode; 2142 while(par !is null) { 2143 if(tagName is null || par.tagName == tagName) 2144 break; 2145 par = par.parentNode; 2146 } 2147 2148 static if(!is(T == Element)) { 2149 auto t = cast(T) par; 2150 if(t is null) 2151 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2152 } else 2153 auto t = par; 2154 2155 return t; 2156 } 2157 2158 ///. 2159 Element getElementById(string id) { 2160 // FIXME: I use this function a lot, and it's kinda slow 2161 // not terribly slow, but not great. 2162 foreach(e; tree) 2163 if(e.id == id) 2164 return e; 2165 return null; 2166 } 2167 2168 /// Note: you can give multiple selectors, separated by commas. 2169 /// It will return the first match it finds. 2170 @scriptable 2171 Element querySelector(string selector) { 2172 // FIXME: inefficient; it gets all results just to discard most of them 2173 auto list = getElementsBySelector(selector); 2174 if(list.length == 0) 2175 return null; 2176 return list[0]; 2177 } 2178 2179 /// a more standards-compliant alias for getElementsBySelector 2180 Element[] querySelectorAll(string selector) { 2181 return getElementsBySelector(selector); 2182 } 2183 2184 /** 2185 Returns elements that match the given CSS selector 2186 2187 * -- all, default if nothing else is there 2188 2189 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2190 2191 It is all additive 2192 2193 OP 2194 2195 space = descendant 2196 > = direct descendant 2197 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2198 2199 [foo] Foo is present as an attribute 2200 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2201 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2202 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2203 2204 [item$=sdas] ends with 2205 [item^-sdsad] begins with 2206 2207 Quotes are optional here. 2208 2209 Pseudos: 2210 :first-child 2211 :last-child 2212 :link (same as a[href] for our purposes here) 2213 2214 2215 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2216 2217 2218 2219 This ONLY cares about elements. text, etc, are ignored 2220 2221 2222 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2223 */ 2224 Element[] getElementsBySelector(string selector) { 2225 // FIXME: this function could probably use some performance attention 2226 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2227 2228 2229 bool caseSensitiveTags = true; 2230 if(parentDocument && parentDocument.loose) 2231 caseSensitiveTags = false; 2232 2233 Element[] ret; 2234 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2235 ret ~= sel.getElements(this); 2236 return ret; 2237 } 2238 2239 /// . 2240 Element[] getElementsByClassName(string cn) { 2241 // is this correct? 2242 return getElementsBySelector("." ~ cn); 2243 } 2244 2245 ///. 2246 Element[] getElementsByTagName(string tag) { 2247 if(parentDocument && parentDocument.loose) 2248 tag = tag.toLower(); 2249 Element[] ret; 2250 foreach(e; tree) 2251 if(e.tagName == tag) 2252 ret ~= e; 2253 return ret; 2254 } 2255 2256 2257 /* ******************************* 2258 Attributes 2259 *********************************/ 2260 2261 /** 2262 Gets the given attribute value, or null if the 2263 attribute is not set. 2264 2265 Note that the returned string is decoded, so it no longer contains any xml entities. 2266 */ 2267 @scriptable 2268 string getAttribute(string name) const { 2269 if(parentDocument && parentDocument.loose) 2270 name = name.toLower(); 2271 auto e = name in attributes; 2272 if(e) 2273 return *e; 2274 else 2275 return null; 2276 } 2277 2278 /** 2279 Sets an attribute. Returns this for easy chaining 2280 */ 2281 @scriptable 2282 Element setAttribute(string name, string value) { 2283 if(parentDocument && parentDocument.loose) 2284 name = name.toLower(); 2285 2286 // I never use this shit legitimately and neither should you 2287 auto it = name.toLower(); 2288 if(it == "href" || it == "src") { 2289 auto v = value.strip().toLower(); 2290 if(v.startsWith("vbscript:")) 2291 value = value[9..$]; 2292 if(v.startsWith("javascript:")) 2293 value = value[11..$]; 2294 } 2295 2296 attributes[name] = value; 2297 2298 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2299 2300 return this; 2301 } 2302 2303 /** 2304 Returns if the attribute exists. 2305 */ 2306 @scriptable 2307 bool hasAttribute(string name) { 2308 if(parentDocument && parentDocument.loose) 2309 name = name.toLower(); 2310 2311 if(name in attributes) 2312 return true; 2313 else 2314 return false; 2315 } 2316 2317 /** 2318 Removes the given attribute from the element. 2319 */ 2320 @scriptable 2321 Element removeAttribute(string name) 2322 out(ret) { 2323 assert(ret is this); 2324 } 2325 body { 2326 if(parentDocument && parentDocument.loose) 2327 name = name.toLower(); 2328 if(name in attributes) 2329 attributes.remove(name); 2330 2331 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2332 return this; 2333 } 2334 2335 /** 2336 Gets the class attribute's contents. Returns 2337 an empty string if it has no class. 2338 */ 2339 @property string className() const { 2340 auto c = getAttribute("class"); 2341 if(c is null) 2342 return ""; 2343 return c; 2344 } 2345 2346 ///. 2347 @property Element className(string c) { 2348 setAttribute("class", c); 2349 return this; 2350 } 2351 2352 /** 2353 Provides easy access to common HTML attributes, object style. 2354 2355 --- 2356 auto element = Element.make("a"); 2357 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2358 string where = a.href; // same as a.getAttribute("href"); 2359 --- 2360 2361 */ 2362 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2363 if(v !is null) 2364 setAttribute(name, v); 2365 return getAttribute(name); 2366 } 2367 2368 /** 2369 Old access to attributes. Use [attrs] instead. 2370 2371 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2372 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2373 2374 Instead, use element.attrs.attribute, element.attrs["attribute"], 2375 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2376 */ 2377 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2378 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2379 } 2380 2381 /* 2382 // this would be nice for convenience, but it broke the getter above. 2383 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2384 if(boolean) 2385 setAttribute(name, name); 2386 else 2387 removeAttribute(name); 2388 } 2389 */ 2390 2391 /** 2392 Returns the element's children. 2393 */ 2394 @property const(Element[]) childNodes() const { 2395 return children; 2396 } 2397 2398 /// Mutable version of the same 2399 @property Element[] childNodes() { // FIXME: the above should be inout 2400 return children; 2401 } 2402 2403 /++ 2404 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2405 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2406 +/ 2407 @property DataSet dataset() { 2408 return DataSet(this); 2409 } 2410 2411 /++ 2412 Gives dot/opIndex access to attributes 2413 --- 2414 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2415 --- 2416 +/ 2417 @property AttributeSet attrs() { 2418 return AttributeSet(this); 2419 } 2420 2421 /++ 2422 Provides both string and object style (like in Javascript) access to the style attribute. 2423 2424 --- 2425 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2426 --- 2427 +/ 2428 @property ElementStyle style() { 2429 return ElementStyle(this); 2430 } 2431 2432 /++ 2433 This sets the style attribute with a string. 2434 +/ 2435 @property ElementStyle style(string s) { 2436 this.setAttribute("style", s); 2437 return this.style; 2438 } 2439 2440 private void parseAttributes(string[] whichOnes = null) { 2441 /+ 2442 if(whichOnes is null) 2443 whichOnes = attributes.keys; 2444 foreach(attr; whichOnes) { 2445 switch(attr) { 2446 case "id": 2447 2448 break; 2449 case "class": 2450 2451 break; 2452 case "style": 2453 2454 break; 2455 default: 2456 // we don't care about it 2457 } 2458 } 2459 +/ 2460 } 2461 2462 2463 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2464 /// Don't use this. 2465 @property CssStyle computedStyle() { 2466 if(_computedStyle is null) { 2467 auto style = this.getAttribute("style"); 2468 /* we'll treat shitty old html attributes as css here */ 2469 if(this.hasAttribute("width")) 2470 style ~= "; width: " ~ this.attrs.width; 2471 if(this.hasAttribute("height")) 2472 style ~= "; height: " ~ this.attrs.height; 2473 if(this.hasAttribute("bgcolor")) 2474 style ~= "; background-color: " ~ this.attrs.bgcolor; 2475 if(this.tagName == "body" && this.hasAttribute("text")) 2476 style ~= "; color: " ~ this.attrs.text; 2477 if(this.hasAttribute("color")) 2478 style ~= "; color: " ~ this.attrs.color; 2479 /* done */ 2480 2481 2482 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2483 } 2484 return _computedStyle; 2485 } 2486 2487 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2488 version(browser) { 2489 void* expansionHook; ///ditto 2490 int offsetWidth; ///ditto 2491 int offsetHeight; ///ditto 2492 int offsetLeft; ///ditto 2493 int offsetTop; ///ditto 2494 Element offsetParent; ///ditto 2495 bool hasLayout; ///ditto 2496 int zIndex; ///ditto 2497 2498 ///ditto 2499 int absoluteLeft() { 2500 int a = offsetLeft; 2501 auto p = offsetParent; 2502 while(p) { 2503 a += p.offsetLeft; 2504 p = p.offsetParent; 2505 } 2506 2507 return a; 2508 } 2509 2510 ///ditto 2511 int absoluteTop() { 2512 int a = offsetTop; 2513 auto p = offsetParent; 2514 while(p) { 2515 a += p.offsetTop; 2516 p = p.offsetParent; 2517 } 2518 2519 return a; 2520 } 2521 } 2522 2523 // Back to the regular dom functions 2524 2525 public: 2526 2527 2528 /* ******************************* 2529 DOM Mutation 2530 *********************************/ 2531 2532 /// Removes all inner content from the tag; all child text and elements are gone. 2533 void removeAllChildren() 2534 out { 2535 assert(this.children.length == 0); 2536 } 2537 body { 2538 children = null; 2539 } 2540 2541 2542 /// Appends the given element to this one. The given element must not have a parent already. 2543 Element appendChild(Element e) 2544 in { 2545 assert(e !is null); 2546 assert(e.parentNode is null, e.parentNode.toString); 2547 } 2548 out (ret) { 2549 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2550 assert(e.parentDocument is this.parentDocument); 2551 assert(e is ret); 2552 } 2553 body { 2554 selfClosed = false; 2555 e.parentNode = this; 2556 e.parentDocument = this.parentDocument; 2557 if(auto frag = cast(DocumentFragment) e) 2558 children ~= frag.children; 2559 else 2560 children ~= e; 2561 2562 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2563 2564 return e; 2565 } 2566 2567 /// Inserts the second element to this node, right before the first param 2568 Element insertBefore(in Element where, Element what) 2569 in { 2570 assert(where !is null); 2571 assert(where.parentNode is this); 2572 assert(what !is null); 2573 assert(what.parentNode is null); 2574 } 2575 out (ret) { 2576 assert(where.parentNode is this); 2577 assert(what.parentNode is this); 2578 2579 assert(what.parentDocument is this.parentDocument); 2580 assert(ret is what); 2581 } 2582 body { 2583 foreach(i, e; children) { 2584 if(e is where) { 2585 if(auto frag = cast(DocumentFragment) what) 2586 children = children[0..i] ~ frag.children ~ children[i..$]; 2587 else 2588 children = children[0..i] ~ what ~ children[i..$]; 2589 what.parentDocument = this.parentDocument; 2590 what.parentNode = this; 2591 return what; 2592 } 2593 } 2594 2595 return what; 2596 2597 assert(0); 2598 } 2599 2600 /++ 2601 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2602 +/ 2603 Element insertAfter(in Element where, Element what) 2604 in { 2605 assert(where !is null); 2606 assert(where.parentNode is this); 2607 assert(what !is null); 2608 assert(what.parentNode is null); 2609 } 2610 out (ret) { 2611 assert(where.parentNode is this); 2612 assert(what.parentNode is this); 2613 assert(what.parentDocument is this.parentDocument); 2614 assert(ret is what); 2615 } 2616 body { 2617 foreach(i, e; children) { 2618 if(e is where) { 2619 if(auto frag = cast(DocumentFragment) what) 2620 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2621 else 2622 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2623 what.parentNode = this; 2624 what.parentDocument = this.parentDocument; 2625 return what; 2626 } 2627 } 2628 2629 return what; 2630 2631 assert(0); 2632 } 2633 2634 /// swaps one child for a new thing. Returns the old child which is now parentless. 2635 Element swapNode(Element child, Element replacement) 2636 in { 2637 assert(child !is null); 2638 assert(replacement !is null); 2639 assert(child.parentNode is this); 2640 } 2641 out(ret) { 2642 assert(ret is child); 2643 assert(ret.parentNode is null); 2644 assert(replacement.parentNode is this); 2645 assert(replacement.parentDocument is this.parentDocument); 2646 } 2647 body { 2648 foreach(ref c; this.children) 2649 if(c is child) { 2650 c.parentNode = null; 2651 c = replacement; 2652 c.parentNode = this; 2653 c.parentDocument = this.parentDocument; 2654 return child; 2655 } 2656 assert(0); 2657 } 2658 2659 2660 /++ 2661 Appends the given to the node. 2662 2663 2664 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2665 yields `<example>text <b>bold</b> hi</example>`. 2666 2667 See_Also: 2668 [firstInnerText], [directText], [innerText], [appendChild] 2669 +/ 2670 @scriptable 2671 Element appendText(string text) { 2672 Element e = new TextNode(parentDocument, text); 2673 appendChild(e); 2674 return this; 2675 } 2676 2677 /++ 2678 Returns child elements which are of a tag type (excludes text, comments, etc.). 2679 2680 2681 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2682 2683 Params: 2684 tagName = filter results to only the child elements with the given tag name. 2685 +/ 2686 @property Element[] childElements(string tagName = null) { 2687 Element[] ret; 2688 foreach(c; children) 2689 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2690 ret ~= c; 2691 return ret; 2692 } 2693 2694 /++ 2695 Appends the given html to the element, returning the elements appended 2696 2697 2698 This is similar to `element.innerHTML += "html string";` in Javascript. 2699 +/ 2700 @scriptable 2701 Element[] appendHtml(string html) { 2702 Document d = new Document("<root>" ~ html ~ "</root>"); 2703 return stealChildren(d.root); 2704 } 2705 2706 2707 ///. 2708 void insertChildAfter(Element child, Element where) 2709 in { 2710 assert(child !is null); 2711 assert(where !is null); 2712 assert(where.parentNode is this); 2713 assert(!selfClosed); 2714 //assert(isInArray(where, children)); 2715 } 2716 out { 2717 assert(child.parentNode is this); 2718 assert(where.parentNode is this); 2719 //assert(isInArray(where, children)); 2720 //assert(isInArray(child, children)); 2721 } 2722 body { 2723 foreach(ref i, c; children) { 2724 if(c is where) { 2725 i++; 2726 if(auto frag = cast(DocumentFragment) child) 2727 children = children[0..i] ~ child.children ~ children[i..$]; 2728 else 2729 children = children[0..i] ~ child ~ children[i..$]; 2730 child.parentNode = this; 2731 child.parentDocument = this.parentDocument; 2732 break; 2733 } 2734 } 2735 } 2736 2737 /++ 2738 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2739 2740 Params: 2741 e = the element whose children you want to steal 2742 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2743 +/ 2744 Element[] stealChildren(Element e, Element position = null) 2745 in { 2746 assert(!selfClosed); 2747 assert(e !is null); 2748 //if(position !is null) 2749 //assert(isInArray(position, children)); 2750 } 2751 out (ret) { 2752 assert(e.children.length == 0); 2753 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2754 version(none) 2755 debug foreach(child; ret) { 2756 assert(child.parentNode is this); 2757 assert(child.parentDocument is this.parentDocument); 2758 } 2759 } 2760 body { 2761 foreach(c; e.children) { 2762 c.parentNode = this; 2763 c.parentDocument = this.parentDocument; 2764 } 2765 if(position is null) 2766 children ~= e.children; 2767 else { 2768 foreach(i, child; children) { 2769 if(child is position) { 2770 children = children[0..i] ~ 2771 e.children ~ 2772 children[i..$]; 2773 break; 2774 } 2775 } 2776 } 2777 2778 auto ret = e.children[]; 2779 e.children.length = 0; 2780 2781 return ret; 2782 } 2783 2784 /// Puts the current element first in our children list. The given element must not have a parent already. 2785 Element prependChild(Element e) 2786 in { 2787 assert(e.parentNode is null); 2788 assert(!selfClosed); 2789 } 2790 out { 2791 assert(e.parentNode is this); 2792 assert(e.parentDocument is this.parentDocument); 2793 assert(children[0] is e); 2794 } 2795 body { 2796 e.parentNode = this; 2797 e.parentDocument = this.parentDocument; 2798 if(auto frag = cast(DocumentFragment) e) 2799 children = e.children ~ children; 2800 else 2801 children = e ~ children; 2802 return e; 2803 } 2804 2805 2806 /** 2807 Returns a string containing all child elements, formatted such that it could be pasted into 2808 an XML file. 2809 */ 2810 @property string innerHTML(Appender!string where = appender!string()) const { 2811 if(children is null) 2812 return ""; 2813 2814 auto start = where.data.length; 2815 2816 foreach(child; children) { 2817 assert(child !is null); 2818 2819 child.writeToAppender(where); 2820 } 2821 2822 return where.data[start .. $]; 2823 } 2824 2825 /** 2826 Takes some html and replaces the element's children with the tree made from the string. 2827 */ 2828 @property Element innerHTML(string html, bool strict = false) { 2829 if(html.length) 2830 selfClosed = false; 2831 2832 if(html.length == 0) { 2833 // I often say innerHTML = ""; as a shortcut to clear it out, 2834 // so let's optimize that slightly. 2835 removeAllChildren(); 2836 return this; 2837 } 2838 2839 auto doc = new Document(); 2840 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2841 2842 children = doc.root.children; 2843 foreach(c; children) { 2844 c.parentNode = this; 2845 c.parentDocument = this.parentDocument; 2846 } 2847 2848 reparentTreeDocuments(); 2849 2850 doc.root.children = null; 2851 2852 return this; 2853 } 2854 2855 /// ditto 2856 @property Element innerHTML(Html html) { 2857 return this.innerHTML = html.source; 2858 } 2859 2860 private void reparentTreeDocuments() { 2861 foreach(c; this.tree) 2862 c.parentDocument = this.parentDocument; 2863 } 2864 2865 /** 2866 Replaces this node with the given html string, which is parsed 2867 2868 Note: this invalidates the this reference, since it is removed 2869 from the tree. 2870 2871 Returns the new children that replace this. 2872 */ 2873 @property Element[] outerHTML(string html) { 2874 auto doc = new Document(); 2875 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 2876 2877 children = doc.root.children; 2878 foreach(c; children) { 2879 c.parentNode = this; 2880 c.parentDocument = this.parentDocument; 2881 } 2882 2883 2884 reparentTreeDocuments(); 2885 2886 2887 stripOut(); 2888 2889 return doc.root.children; 2890 } 2891 2892 /++ 2893 Returns all the html for this element, including the tag itself. 2894 2895 This is equivalent to calling toString(). 2896 +/ 2897 @property string outerHTML() { 2898 return this.toString(); 2899 } 2900 2901 /// This sets the inner content of the element *without* trying to parse it. 2902 /// You can inject any code in there; this serves as an escape hatch from the dom. 2903 /// 2904 /// The only times you might actually need it are for < style > and < script > tags in html. 2905 /// Other than that, innerHTML and/or innerText should do the job. 2906 @property void innerRawSource(string rawSource) { 2907 children.length = 0; 2908 auto rs = new RawSource(parentDocument, rawSource); 2909 rs.parentNode = this; 2910 2911 children ~= rs; 2912 } 2913 2914 ///. 2915 Element replaceChild(Element find, Element replace) 2916 in { 2917 assert(find !is null); 2918 assert(replace !is null); 2919 assert(replace.parentNode is null); 2920 } 2921 out(ret) { 2922 assert(ret is replace); 2923 assert(replace.parentNode is this); 2924 assert(replace.parentDocument is this.parentDocument); 2925 assert(find.parentNode is null); 2926 } 2927 body { 2928 // FIXME 2929 //if(auto frag = cast(DocumentFragment) replace) 2930 //return this.replaceChild(frag, replace.children); 2931 for(int i = 0; i < children.length; i++) { 2932 if(children[i] is find) { 2933 replace.parentNode = this; 2934 children[i].parentNode = null; 2935 children[i] = replace; 2936 replace.parentDocument = this.parentDocument; 2937 return replace; 2938 } 2939 } 2940 2941 throw new Exception("no such child"); 2942 } 2943 2944 /** 2945 Replaces the given element with a whole group. 2946 */ 2947 void replaceChild(Element find, Element[] replace) 2948 in { 2949 assert(find !is null); 2950 assert(replace !is null); 2951 assert(find.parentNode is this); 2952 debug foreach(r; replace) 2953 assert(r.parentNode is null); 2954 } 2955 out { 2956 assert(find.parentNode is null); 2957 assert(children.length >= replace.length); 2958 debug foreach(child; children) 2959 assert(child !is find); 2960 debug foreach(r; replace) 2961 assert(r.parentNode is this); 2962 } 2963 body { 2964 if(replace.length == 0) { 2965 removeChild(find); 2966 return; 2967 } 2968 assert(replace.length); 2969 for(int i = 0; i < children.length; i++) { 2970 if(children[i] is find) { 2971 children[i].parentNode = null; // this element should now be dead 2972 children[i] = replace[0]; 2973 foreach(e; replace) { 2974 e.parentNode = this; 2975 e.parentDocument = this.parentDocument; 2976 } 2977 2978 children = .insertAfter(children, i, replace[1..$]); 2979 2980 return; 2981 } 2982 } 2983 2984 throw new Exception("no such child"); 2985 } 2986 2987 2988 /** 2989 Removes the given child from this list. 2990 2991 Returns the removed element. 2992 */ 2993 Element removeChild(Element c) 2994 in { 2995 assert(c !is null); 2996 assert(c.parentNode is this); 2997 } 2998 out { 2999 debug foreach(child; children) 3000 assert(child !is c); 3001 assert(c.parentNode is null); 3002 } 3003 body { 3004 foreach(i, e; children) { 3005 if(e is c) { 3006 children = children[0..i] ~ children [i+1..$]; 3007 c.parentNode = null; 3008 return c; 3009 } 3010 } 3011 3012 throw new Exception("no such child"); 3013 } 3014 3015 /// This removes all the children from this element, returning the old list. 3016 Element[] removeChildren() 3017 out (ret) { 3018 assert(children.length == 0); 3019 debug foreach(r; ret) 3020 assert(r.parentNode is null); 3021 } 3022 body { 3023 Element[] oldChildren = children.dup; 3024 foreach(c; oldChildren) 3025 c.parentNode = null; 3026 3027 children.length = 0; 3028 3029 return oldChildren; 3030 } 3031 3032 /** 3033 Fetch the inside text, with all tags stripped out. 3034 3035 <p>cool <b>api</b> & code dude<p> 3036 innerText of that is "cool api & code dude". 3037 3038 This does not match what real innerText does! 3039 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3040 3041 It is more like textContent. 3042 */ 3043 @scriptable 3044 @property string innerText() const { 3045 string s; 3046 foreach(child; children) { 3047 if(child.nodeType != NodeType.Text) 3048 s ~= child.innerText; 3049 else 3050 s ~= child.nodeValue(); 3051 } 3052 return s; 3053 } 3054 3055 /// 3056 alias textContent = innerText; 3057 3058 /** 3059 Sets the inside text, replacing all children. You don't 3060 have to worry about entity encoding. 3061 */ 3062 @scriptable 3063 @property void innerText(string text) { 3064 selfClosed = false; 3065 Element e = new TextNode(parentDocument, text); 3066 e.parentNode = this; 3067 children = [e]; 3068 } 3069 3070 /** 3071 Strips this node out of the document, replacing it with the given text 3072 */ 3073 @property void outerText(string text) { 3074 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3075 } 3076 3077 /** 3078 Same result as innerText; the tag with all inner tags stripped out 3079 */ 3080 @property string outerText() const { 3081 return innerText; 3082 } 3083 3084 3085 /* ******************************* 3086 Miscellaneous 3087 *********************************/ 3088 3089 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3090 @property Element cloned() 3091 /+ 3092 out(ret) { 3093 // FIXME: not sure why these fail... 3094 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3095 assert(ret.tagName == this.tagName); 3096 } 3097 body { 3098 +/ 3099 { 3100 return this.cloneNode(true); 3101 } 3102 3103 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3104 Element cloneNode(bool deepClone) { 3105 auto e = Element.make(this.tagName); 3106 e.parentDocument = this.parentDocument; 3107 e.attributes = this.attributes.aadup; 3108 e.selfClosed = this.selfClosed; 3109 3110 if(deepClone) { 3111 foreach(child; children) { 3112 e.appendChild(child.cloneNode(true)); 3113 } 3114 } 3115 3116 3117 return e; 3118 } 3119 3120 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3121 string nodeValue() const { 3122 return ""; 3123 } 3124 3125 // should return int 3126 ///. 3127 @property int nodeType() const { 3128 return 1; 3129 } 3130 3131 3132 invariant () { 3133 assert(tagName.indexOf(" ") == -1); 3134 3135 if(children !is null) 3136 debug foreach(child; children) { 3137 // assert(parentNode !is null); 3138 assert(child !is null); 3139 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3140 assert(child !is this); 3141 //assert(child !is parentNode); 3142 } 3143 3144 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3145 if(parentNode !is null) { 3146 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3147 auto lol = cast(TextNode) this; 3148 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3149 } 3150 +/ 3151 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3152 // reason is so you can create these without needing a reference to the document 3153 } 3154 3155 /** 3156 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3157 an XML file. 3158 */ 3159 override string toString() const { 3160 return writeToAppender(); 3161 } 3162 3163 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3164 if(indentWith is null) 3165 return null; 3166 string s; 3167 3168 if(insertComments) s ~= "<!--"; 3169 s ~= "\n"; 3170 foreach(indent; 0 .. indentationLevel) 3171 s ~= indentWith; 3172 if(insertComments) s ~= "-->"; 3173 3174 return s; 3175 } 3176 3177 /++ 3178 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3179 for eyeball debugging. 3180 +/ 3181 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3182 3183 // first step is to concatenate any consecutive text nodes to simplify 3184 // the white space analysis. this changes the tree! but i'm allowed since 3185 // the comment always says it changes the comments 3186 // 3187 // actually i'm not allowed cuz it is const so i will cheat and lie 3188 /+ 3189 TextNode lastTextChild = null; 3190 for(int a = 0; a < this.children.length; a++) { 3191 auto child = this.children[a]; 3192 if(auto tn = cast(TextNode) child) { 3193 if(lastTextChild) { 3194 lastTextChild.contents ~= tn.contents; 3195 for(int b = a; b < this.children.length - 1; b++) 3196 this.children[b] = this.children[b + 1]; 3197 this.children = this.children[0 .. $-1]; 3198 } else { 3199 lastTextChild = tn; 3200 } 3201 } else { 3202 lastTextChild = null; 3203 } 3204 } 3205 +/ 3206 3207 const(Element)[] children; 3208 3209 TextNode lastTextChild = null; 3210 for(int a = 0; a < this.children.length; a++) { 3211 auto child = this.children[a]; 3212 if(auto tn = cast(const(TextNode)) child) { 3213 if(lastTextChild !is null) { 3214 lastTextChild.contents ~= tn.contents; 3215 } else { 3216 lastTextChild = new TextNode(""); 3217 lastTextChild.parentNode = cast(Element) this; 3218 lastTextChild.contents ~= tn.contents; 3219 children ~= lastTextChild; 3220 } 3221 } else { 3222 lastTextChild = null; 3223 children ~= child; 3224 } 3225 } 3226 3227 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3228 3229 s ~= "<"; 3230 s ~= tagName; 3231 3232 // i sort these for consistent output. might be more legible 3233 // but especially it keeps it the same for diff purposes. 3234 import std.algorithm : sort; 3235 auto keys = sort(attributes.keys); 3236 foreach(n; keys) { 3237 auto v = attributes[n]; 3238 s ~= " "; 3239 s ~= n; 3240 s ~= "=\""; 3241 s ~= htmlEntitiesEncode(v); 3242 s ~= "\""; 3243 } 3244 3245 if(selfClosed){ 3246 s ~= " />"; 3247 return s; 3248 } 3249 3250 s ~= ">"; 3251 3252 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3253 // just keep them on the same line 3254 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3255 foreach(child; children) { 3256 s ~= child.toString();//toPrettyString(false, 0, null); 3257 } 3258 } else { 3259 foreach(child; children) { 3260 assert(child !is null); 3261 3262 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3263 } 3264 3265 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3266 } 3267 3268 s ~= "</"; 3269 s ~= tagName; 3270 s ~= ">"; 3271 3272 return s; 3273 } 3274 3275 /+ 3276 /// Writes out the opening tag only, if applicable. 3277 string writeTagOnly(Appender!string where = appender!string()) const { 3278 +/ 3279 3280 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3281 /// Note: the ordering of attributes in the string is undefined. 3282 /// Returns the string it creates. 3283 string writeToAppender(Appender!string where = appender!string()) const { 3284 assert(tagName !is null); 3285 3286 where.reserve((this.children.length + 1) * 512); 3287 3288 auto start = where.data.length; 3289 3290 where.put("<"); 3291 where.put(tagName); 3292 3293 import std.algorithm : sort; 3294 auto keys = sort(attributes.keys); 3295 foreach(n; keys) { 3296 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3297 //assert(v !is null); 3298 where.put(" "); 3299 where.put(n); 3300 where.put("=\""); 3301 htmlEntitiesEncode(v, where); 3302 where.put("\""); 3303 } 3304 3305 if(selfClosed){ 3306 where.put(" />"); 3307 return where.data[start .. $]; 3308 } 3309 3310 where.put('>'); 3311 3312 innerHTML(where); 3313 3314 where.put("</"); 3315 where.put(tagName); 3316 where.put('>'); 3317 3318 return where.data[start .. $]; 3319 } 3320 3321 /** 3322 Returns a lazy range of all its children, recursively. 3323 */ 3324 @property ElementStream tree() { 3325 return new ElementStream(this); 3326 } 3327 3328 // I moved these from Form because they are generally useful. 3329 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3330 /// Tags: HTML, HTML5 3331 // FIXME: add overloads for other label types... 3332 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3333 auto fs = this; 3334 auto i = fs.addChild("label"); 3335 3336 if(!(type == "checkbox" || type == "radio")) 3337 i.addChild("span", label); 3338 3339 Element input; 3340 if(type == "textarea") 3341 input = i.addChild("textarea"). 3342 setAttribute("name", name). 3343 setAttribute("rows", "6"); 3344 else 3345 input = i.addChild("input"). 3346 setAttribute("name", name). 3347 setAttribute("type", type); 3348 3349 if(type == "checkbox" || type == "radio") 3350 i.addChild("span", label); 3351 3352 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3353 fieldOptions.applyToElement(input); 3354 return i; 3355 } 3356 3357 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3358 auto fs = this; 3359 auto i = fs.addChild("label"); 3360 i.addChild(label); 3361 Element input; 3362 if(type == "textarea") 3363 input = i.addChild("textarea"). 3364 setAttribute("name", name). 3365 setAttribute("rows", "6"); 3366 else 3367 input = i.addChild("input"). 3368 setAttribute("name", name). 3369 setAttribute("type", type); 3370 3371 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3372 fieldOptions.applyToElement(input); 3373 return i; 3374 } 3375 3376 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3377 return addField(label, name, "text", fieldOptions); 3378 } 3379 3380 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3381 auto fs = this; 3382 auto i = fs.addChild("label"); 3383 i.addChild("span", label); 3384 auto sel = i.addChild("select").setAttribute("name", name); 3385 3386 foreach(k, opt; options) 3387 sel.addChild("option", opt, k); 3388 3389 // FIXME: implement requirements somehow 3390 3391 return i; 3392 } 3393 3394 Element addSubmitButton(string label = null) { 3395 auto t = this; 3396 auto holder = t.addChild("div"); 3397 holder.addClass("submit-holder"); 3398 auto i = holder.addChild("input"); 3399 i.type = "submit"; 3400 if(label.length) 3401 i.value = label; 3402 return holder; 3403 } 3404 3405 } 3406 3407 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3408 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3409 class XmlDocument : Document { 3410 this(string data) { 3411 contentType = "text/xml; charset=utf-8"; 3412 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3413 3414 parseStrict(data); 3415 } 3416 } 3417 3418 3419 3420 3421 import std.string; 3422 3423 /* domconvenience follows { */ 3424 3425 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3426 Element[] findComments(Document document, string txt) { 3427 return findComments(document.root, txt); 3428 } 3429 3430 /// ditto 3431 Element[] findComments(Element element, string txt) { 3432 txt = txt.strip().toLower(); 3433 Element[] ret; 3434 3435 foreach(comment; element.getElementsByTagName("#comment")) { 3436 string t = comment.nodeValue().strip().toLower(); 3437 if(t == txt) 3438 ret ~= comment; 3439 } 3440 3441 return ret; 3442 } 3443 3444 /// An option type that propagates null. See: [Element.optionSelector] 3445 struct MaybeNullElement(SomeElementType) { 3446 this(SomeElementType ele) { 3447 this.element = ele; 3448 } 3449 SomeElementType element; 3450 3451 /// Forwards to the element, wit a null check inserted that propagates null. 3452 auto opDispatch(string method, T...)(T args) { 3453 alias type = typeof(__traits(getMember, element, method)(args)); 3454 static if(is(type : Element)) { 3455 if(element is null) 3456 return MaybeNullElement!type(null); 3457 return __traits(getMember, element, method)(args); 3458 } else static if(is(type == string)) { 3459 if(element is null) 3460 return cast(string) null; 3461 return __traits(getMember, element, method)(args); 3462 } else static if(is(type == void)) { 3463 if(element is null) 3464 return; 3465 __traits(getMember, element, method)(args); 3466 } else { 3467 static assert(0); 3468 } 3469 } 3470 3471 /// Allows implicit casting to the wrapped element. 3472 alias element this; 3473 } 3474 3475 /++ 3476 A collection of elements which forwards methods to the children. 3477 +/ 3478 struct ElementCollection { 3479 /// 3480 this(Element e) { 3481 elements = [e]; 3482 } 3483 3484 /// 3485 this(Element e, string selector) { 3486 elements = e.querySelectorAll(selector); 3487 } 3488 3489 /// 3490 this(Element[] e) { 3491 elements = e; 3492 } 3493 3494 Element[] elements; 3495 //alias elements this; // let it implicitly convert to the underlying array 3496 3497 /// 3498 ElementCollection opIndex(string selector) { 3499 ElementCollection ec; 3500 foreach(e; elements) 3501 ec.elements ~= e.getElementsBySelector(selector); 3502 return ec; 3503 } 3504 3505 /// 3506 Element opIndex(int i) { 3507 return elements[i]; 3508 } 3509 3510 /// if you slice it, give the underlying array for easy forwarding of the 3511 /// collection to range expecting algorithms or looping over. 3512 Element[] opSlice() { 3513 return elements; 3514 } 3515 3516 /// And input range primitives so we can foreach over this 3517 void popFront() { 3518 elements = elements[1..$]; 3519 } 3520 3521 /// ditto 3522 Element front() { 3523 return elements[0]; 3524 } 3525 3526 /// ditto 3527 bool empty() { 3528 return !elements.length; 3529 } 3530 3531 /++ 3532 Collects strings from the collection, concatenating them together 3533 Kinda like running reduce and ~= on it. 3534 3535 --- 3536 document["p"].collect!"innerText"; 3537 --- 3538 +/ 3539 string collect(string method)(string separator = "") { 3540 string text; 3541 foreach(e; elements) { 3542 text ~= mixin("e." ~ method); 3543 text ~= separator; 3544 } 3545 return text; 3546 } 3547 3548 /// Forward method calls to each individual [Element|element] of the collection 3549 /// returns this so it can be chained. 3550 ElementCollection opDispatch(string name, T...)(T t) { 3551 foreach(e; elements) { 3552 mixin("e." ~ name)(t); 3553 } 3554 return this; 3555 } 3556 3557 /++ 3558 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3559 +/ 3560 ElementCollection wrapIn(Element what) { 3561 foreach(e; elements) { 3562 e.wrapIn(what.cloneNode(false)); 3563 } 3564 3565 return this; 3566 } 3567 3568 /// Concatenates two ElementCollection together. 3569 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3570 return ElementCollection(this.elements ~ rhs.elements); 3571 } 3572 } 3573 3574 3575 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3576 mixin template JavascriptStyleDispatch() { 3577 /// 3578 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3579 if(v !is null) 3580 return set(name, v); 3581 return get(name); 3582 } 3583 3584 /// 3585 string opIndex(string key) const { 3586 return get(key); 3587 } 3588 3589 /// 3590 string opIndexAssign(string value, string field) { 3591 return set(field, value); 3592 } 3593 3594 // FIXME: doesn't seem to work 3595 string* opBinary(string op)(string key) if(op == "in") { 3596 return key in fields; 3597 } 3598 } 3599 3600 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3601 /// 3602 /// Do not create this object directly. 3603 struct DataSet { 3604 /// 3605 this(Element e) { 3606 this._element = e; 3607 } 3608 3609 private Element _element; 3610 /// 3611 string set(string name, string value) { 3612 _element.setAttribute("data-" ~ unCamelCase(name), value); 3613 return value; 3614 } 3615 3616 /// 3617 string get(string name) const { 3618 return _element.getAttribute("data-" ~ unCamelCase(name)); 3619 } 3620 3621 /// 3622 mixin JavascriptStyleDispatch!(); 3623 } 3624 3625 /// Proxy object for attributes which will replace the main opDispatch eventually 3626 struct AttributeSet { 3627 /// 3628 this(Element e) { 3629 this._element = e; 3630 } 3631 3632 private Element _element; 3633 /// 3634 string set(string name, string value) { 3635 _element.setAttribute(name, value); 3636 return value; 3637 } 3638 3639 /// 3640 string get(string name) const { 3641 return _element.getAttribute(name); 3642 } 3643 3644 /// 3645 mixin JavascriptStyleDispatch!(); 3646 } 3647 3648 3649 3650 /// for style, i want to be able to set it with a string like a plain attribute, 3651 /// but also be able to do properties Javascript style. 3652 3653 struct ElementStyle { 3654 this(Element parent) { 3655 _element = parent; 3656 } 3657 3658 Element _element; 3659 3660 @property ref inout(string) _attribute() inout { 3661 auto s = "style" in _element.attributes; 3662 if(s is null) { 3663 auto e = cast() _element; // const_cast 3664 e.attributes["style"] = ""; // we need something to reference 3665 s = cast(inout) ("style" in e.attributes); 3666 } 3667 3668 assert(s !is null); 3669 return *s; 3670 } 3671 3672 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3673 3674 string set(string name, string value) { 3675 if(name.length == 0) 3676 return value; 3677 if(name == "cssFloat") 3678 name = "float"; 3679 else 3680 name = unCamelCase(name); 3681 auto r = rules(); 3682 r[name] = value; 3683 3684 _attribute = ""; 3685 foreach(k, v; r) { 3686 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3687 continue; 3688 if(_attribute.length) 3689 _attribute ~= " "; 3690 _attribute ~= k ~ ": " ~ v ~ ";"; 3691 } 3692 3693 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3694 3695 return value; 3696 } 3697 string get(string name) const { 3698 if(name == "cssFloat") 3699 name = "float"; 3700 else 3701 name = unCamelCase(name); 3702 auto r = rules(); 3703 if(name in r) 3704 return r[name]; 3705 return null; 3706 } 3707 3708 string[string] rules() const { 3709 string[string] ret; 3710 foreach(rule; _attribute.split(";")) { 3711 rule = rule.strip(); 3712 if(rule.length == 0) 3713 continue; 3714 auto idx = rule.indexOf(":"); 3715 if(idx == -1) 3716 ret[rule] = ""; 3717 else { 3718 auto name = rule[0 .. idx].strip(); 3719 auto value = rule[idx + 1 .. $].strip(); 3720 3721 ret[name] = value; 3722 } 3723 } 3724 3725 return ret; 3726 } 3727 3728 mixin JavascriptStyleDispatch!(); 3729 } 3730 3731 /// Converts a camel cased propertyName to a css style dashed property-name 3732 string unCamelCase(string a) { 3733 string ret; 3734 foreach(c; a) 3735 if((c >= 'A' && c <= 'Z')) 3736 ret ~= "-" ~ toLower("" ~ c)[0]; 3737 else 3738 ret ~= c; 3739 return ret; 3740 } 3741 3742 /// Translates a css style property-name to a camel cased propertyName 3743 string camelCase(string a) { 3744 string ret; 3745 bool justSawDash = false; 3746 foreach(c; a) 3747 if(c == '-') { 3748 justSawDash = true; 3749 } else { 3750 if(justSawDash) { 3751 justSawDash = false; 3752 ret ~= toUpper("" ~ c); 3753 } else 3754 ret ~= c; 3755 } 3756 return ret; 3757 } 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 // domconvenience ends } 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 // @safe: 3780 3781 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3782 // Instead, override writeToAppender(); 3783 3784 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3785 3786 // Stripping them is useful for reading php as html.... but adding them 3787 // is good for building php. 3788 3789 // I need to maintain compatibility with the way it is now too. 3790 3791 import std.string; 3792 import std.exception; 3793 import std.uri; 3794 import std.array; 3795 import std.range; 3796 3797 //import std.stdio; 3798 3799 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3800 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3801 // most likely a typo so I say kill kill kill. 3802 3803 3804 /++ 3805 This might belong in another module, but it represents a file with a mime type and some data. 3806 Document implements this interface with type = text/html (see Document.contentType for more info) 3807 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3808 +/ 3809 interface FileResource { 3810 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3811 @property string contentType() const; 3812 /// the data 3813 immutable(ubyte)[] getData() const; 3814 } 3815 3816 3817 3818 3819 ///. 3820 enum NodeType { Text = 3 } 3821 3822 3823 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3824 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 3825 in {} 3826 out(ret) { assert(ret !is null); } 3827 body { 3828 auto ret = cast(T) e; 3829 if(ret is null) 3830 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3831 return ret; 3832 } 3833 3834 3835 ///. 3836 class DocumentFragment : Element { 3837 ///. 3838 this(Document _parentDocument) { 3839 tagName = "#fragment"; 3840 super(_parentDocument); 3841 } 3842 3843 /++ 3844 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 3845 3846 Since: March 29, 2018 (or git tagged v2.1.0) 3847 +/ 3848 this(Html html) { 3849 this(null); 3850 3851 this.innerHTML = html.source; 3852 } 3853 3854 ///. 3855 override string writeToAppender(Appender!string where = appender!string()) const { 3856 return this.innerHTML(where); 3857 } 3858 3859 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 3860 string s; 3861 foreach(child; children) 3862 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 3863 return s; 3864 } 3865 3866 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 3867 /* 3868 override inout(Element) parentNode() inout { 3869 return children.length ? children[0].parentNode : null; 3870 } 3871 */ 3872 override Element parentNode(Element p) { 3873 this._parentNode = p; 3874 foreach(child; children) 3875 child.parentNode = p; 3876 return p; 3877 } 3878 } 3879 3880 /// Given text, encode all html entities on it - &, <, >, and ". This function also 3881 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 3882 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 3883 /// 3884 /// The output parameter can be given to append to an existing buffer. You don't have to 3885 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 3886 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 3887 // if there's no entities, we can save a lot of time by not bothering with the 3888 // decoding loop. This check cuts the net toString time by better than half in my test. 3889 // let me know if it made your tests worse though, since if you use an entity in just about 3890 // every location, the check will add time... but I suspect the average experience is like mine 3891 // since the check gives up as soon as it can anyway. 3892 3893 bool shortcut = true; 3894 foreach(char c; data) { 3895 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 3896 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 3897 shortcut = false; // there's actual work to be done 3898 break; 3899 } 3900 } 3901 3902 if(shortcut) { 3903 output.put(data); 3904 return data; 3905 } 3906 3907 auto start = output.data.length; 3908 3909 output.reserve(data.length + 64); // grab some extra space for the encoded entities 3910 3911 foreach(dchar d; data) { 3912 if(d == '&') 3913 output.put("&"); 3914 else if (d == '<') 3915 output.put("<"); 3916 else if (d == '>') 3917 output.put(">"); 3918 else if (d == '\"') 3919 output.put("""); 3920 // else if (d == '\'') 3921 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 3922 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 3923 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 3924 // idk about apostrophes though. Might be worth it, might not. 3925 else if (!encodeNonAscii || (d < 128 && d > 0)) 3926 output.put(d); 3927 else 3928 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 3929 } 3930 3931 //assert(output !is null); // this fails on empty attributes..... 3932 return output.data[start .. $]; 3933 3934 // data = data.replace("\u00a0", " "); 3935 } 3936 3937 /// An alias for htmlEntitiesEncode; it works for xml too 3938 string xmlEntitiesEncode(string data) { 3939 return htmlEntitiesEncode(data); 3940 } 3941 3942 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 3943 dchar parseEntity(in dchar[] entity) { 3944 switch(entity[1..$-1]) { 3945 case "quot": 3946 return '"'; 3947 case "apos": 3948 return '\''; 3949 case "lt": 3950 return '<'; 3951 case "gt": 3952 return '>'; 3953 case "amp": 3954 return '&'; 3955 // the next are html rather than xml 3956 3957 case "Agrave": return '\u00C0'; 3958 case "Aacute": return '\u00C1'; 3959 case "Acirc": return '\u00C2'; 3960 case "Atilde": return '\u00C3'; 3961 case "Auml": return '\u00C4'; 3962 case "Aring": return '\u00C5'; 3963 case "AElig": return '\u00C6'; 3964 case "Ccedil": return '\u00C7'; 3965 case "Egrave": return '\u00C8'; 3966 case "Eacute": return '\u00C9'; 3967 case "Ecirc": return '\u00CA'; 3968 case "Euml": return '\u00CB'; 3969 case "Igrave": return '\u00CC'; 3970 case "Iacute": return '\u00CD'; 3971 case "Icirc": return '\u00CE'; 3972 case "Iuml": return '\u00CF'; 3973 case "ETH": return '\u00D0'; 3974 case "Ntilde": return '\u00D1'; 3975 case "Ograve": return '\u00D2'; 3976 case "Oacute": return '\u00D3'; 3977 case "Ocirc": return '\u00D4'; 3978 case "Otilde": return '\u00D5'; 3979 case "Ouml": return '\u00D6'; 3980 case "Oslash": return '\u00D8'; 3981 case "Ugrave": return '\u00D9'; 3982 case "Uacute": return '\u00DA'; 3983 case "Ucirc": return '\u00DB'; 3984 case "Uuml": return '\u00DC'; 3985 case "Yacute": return '\u00DD'; 3986 case "THORN": return '\u00DE'; 3987 case "szlig": return '\u00DF'; 3988 case "agrave": return '\u00E0'; 3989 case "aacute": return '\u00E1'; 3990 case "acirc": return '\u00E2'; 3991 case "atilde": return '\u00E3'; 3992 case "auml": return '\u00E4'; 3993 case "aring": return '\u00E5'; 3994 case "aelig": return '\u00E6'; 3995 case "ccedil": return '\u00E7'; 3996 case "egrave": return '\u00E8'; 3997 case "eacute": return '\u00E9'; 3998 case "ecirc": return '\u00EA'; 3999 case "euml": return '\u00EB'; 4000 case "igrave": return '\u00EC'; 4001 case "iacute": return '\u00ED'; 4002 case "icirc": return '\u00EE'; 4003 case "iuml": return '\u00EF'; 4004 case "eth": return '\u00F0'; 4005 case "ntilde": return '\u00F1'; 4006 case "ograve": return '\u00F2'; 4007 case "oacute": return '\u00F3'; 4008 case "ocirc": return '\u00F4'; 4009 case "otilde": return '\u00F5'; 4010 case "ouml": return '\u00F6'; 4011 case "oslash": return '\u00F8'; 4012 case "ugrave": return '\u00F9'; 4013 case "uacute": return '\u00FA'; 4014 case "ucirc": return '\u00FB'; 4015 case "uuml": return '\u00FC'; 4016 case "yacute": return '\u00FD'; 4017 case "thorn": return '\u00FE'; 4018 case "yuml": return '\u00FF'; 4019 case "nbsp": return '\u00A0'; 4020 case "iexcl": return '\u00A1'; 4021 case "cent": return '\u00A2'; 4022 case "pound": return '\u00A3'; 4023 case "curren": return '\u00A4'; 4024 case "yen": return '\u00A5'; 4025 case "brvbar": return '\u00A6'; 4026 case "sect": return '\u00A7'; 4027 case "uml": return '\u00A8'; 4028 case "copy": return '\u00A9'; 4029 case "ordf": return '\u00AA'; 4030 case "laquo": return '\u00AB'; 4031 case "not": return '\u00AC'; 4032 case "shy": return '\u00AD'; 4033 case "reg": return '\u00AE'; 4034 case "ldquo": return '\u201c'; 4035 case "rdquo": return '\u201d'; 4036 case "macr": return '\u00AF'; 4037 case "deg": return '\u00B0'; 4038 case "plusmn": return '\u00B1'; 4039 case "sup2": return '\u00B2'; 4040 case "sup3": return '\u00B3'; 4041 case "acute": return '\u00B4'; 4042 case "micro": return '\u00B5'; 4043 case "para": return '\u00B6'; 4044 case "middot": return '\u00B7'; 4045 case "cedil": return '\u00B8'; 4046 case "sup1": return '\u00B9'; 4047 case "ordm": return '\u00BA'; 4048 case "raquo": return '\u00BB'; 4049 case "frac14": return '\u00BC'; 4050 case "frac12": return '\u00BD'; 4051 case "frac34": return '\u00BE'; 4052 case "iquest": return '\u00BF'; 4053 case "times": return '\u00D7'; 4054 case "divide": return '\u00F7'; 4055 case "OElig": return '\u0152'; 4056 case "oelig": return '\u0153'; 4057 case "Scaron": return '\u0160'; 4058 case "scaron": return '\u0161'; 4059 case "Yuml": return '\u0178'; 4060 case "fnof": return '\u0192'; 4061 case "circ": return '\u02C6'; 4062 case "tilde": return '\u02DC'; 4063 case "trade": return '\u2122'; 4064 case "euro": return '\u20AC'; 4065 4066 case "hellip": return '\u2026'; 4067 case "ndash": return '\u2013'; 4068 case "mdash": return '\u2014'; 4069 case "lsquo": return '\u2018'; 4070 case "rsquo": return '\u2019'; 4071 4072 case "Omicron": return '\u039f'; 4073 case "omicron": return '\u03bf'; 4074 4075 // and handling numeric entities 4076 default: 4077 if(entity[1] == '#') { 4078 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 4079 auto hex = entity[3..$-1]; 4080 4081 auto p = intFromHex(to!string(hex).toLower()); 4082 return cast(dchar) p; 4083 } else { 4084 auto decimal = entity[2..$-1]; 4085 4086 // dealing with broken html entities 4087 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 4088 decimal = decimal[1 .. $]; 4089 4090 if(decimal.length == 0) 4091 return ' '; // this is really broken html 4092 // done with dealing with broken stuff 4093 4094 auto p = std.conv.to!int(decimal); 4095 return cast(dchar) p; 4096 } 4097 } else 4098 return '\ufffd'; // replacement character diamond thing 4099 } 4100 4101 assert(0); 4102 } 4103 4104 import std.utf; 4105 import std.stdio; 4106 4107 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 4108 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 4109 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 4110 string htmlEntitiesDecode(string data, bool strict = false) { 4111 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 4112 if(data.indexOf("&") == -1) // all html entities begin with & 4113 return data; // if there are no entities in here, we can return the original slice and save some time 4114 4115 char[] a; // this seems to do a *better* job than appender! 4116 4117 char[4] buffer; 4118 4119 bool tryingEntity = false; 4120 dchar[16] entityBeingTried; 4121 int entityBeingTriedLength = 0; 4122 int entityAttemptIndex = 0; 4123 4124 foreach(dchar ch; data) { 4125 if(tryingEntity) { 4126 entityAttemptIndex++; 4127 entityBeingTried[entityBeingTriedLength++] = ch; 4128 4129 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 4130 if(ch == '&') { 4131 if(strict) 4132 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4133 4134 // if not strict, let's try to parse both. 4135 4136 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 4137 a ~= "&"; // double amp means keep the first one, still try to parse the next one 4138 else 4139 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 4140 4141 // tryingEntity is still true 4142 entityBeingTriedLength = 1; 4143 entityAttemptIndex = 0; // restarting o this 4144 } else 4145 if(ch == ';') { 4146 tryingEntity = false; 4147 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 4148 } else if(ch == ' ') { 4149 // e.g. you & i 4150 if(strict) 4151 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4152 else { 4153 tryingEntity = false; 4154 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 4155 } 4156 } else { 4157 if(entityAttemptIndex >= 9) { 4158 if(strict) 4159 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4160 else { 4161 tryingEntity = false; 4162 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 4163 } 4164 } 4165 } 4166 } else { 4167 if(ch == '&') { 4168 tryingEntity = true; 4169 entityBeingTriedLength = 0; 4170 entityBeingTried[entityBeingTriedLength++] = ch; 4171 entityAttemptIndex = 0; 4172 } else { 4173 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 4174 } 4175 } 4176 } 4177 4178 if(tryingEntity) { 4179 if(strict) 4180 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4181 4182 // otherwise, let's try to recover, at least so we don't drop any data 4183 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 4184 // FIXME: what if we have "cool &"? should we try to parse it? 4185 } 4186 4187 return cast(string) a; // assumeUnique is actually kinda slow, lol 4188 } 4189 4190 abstract class SpecialElement : Element { 4191 this(Document _parentDocument) { 4192 super(_parentDocument); 4193 } 4194 4195 ///. 4196 override Element appendChild(Element e) { 4197 assert(0, "Cannot append to a special node"); 4198 } 4199 4200 ///. 4201 @property override int nodeType() const { 4202 return 100; 4203 } 4204 } 4205 4206 ///. 4207 class RawSource : SpecialElement { 4208 ///. 4209 this(Document _parentDocument, string s) { 4210 super(_parentDocument); 4211 source = s; 4212 tagName = "#raw"; 4213 } 4214 4215 ///. 4216 override string nodeValue() const { 4217 return this.toString(); 4218 } 4219 4220 ///. 4221 override string writeToAppender(Appender!string where = appender!string()) const { 4222 where.put(source); 4223 return source; 4224 } 4225 4226 override string toPrettyString(bool, int, string) const { 4227 return source; 4228 } 4229 4230 4231 override RawSource cloneNode(bool deep) { 4232 return new RawSource(parentDocument, source); 4233 } 4234 4235 ///. 4236 string source; 4237 } 4238 4239 abstract class ServerSideCode : SpecialElement { 4240 this(Document _parentDocument, string type) { 4241 super(_parentDocument); 4242 tagName = "#" ~ type; 4243 } 4244 4245 ///. 4246 override string nodeValue() const { 4247 return this.source; 4248 } 4249 4250 ///. 4251 override string writeToAppender(Appender!string where = appender!string()) const { 4252 auto start = where.data.length; 4253 where.put("<"); 4254 where.put(source); 4255 where.put(">"); 4256 return where.data[start .. $]; 4257 } 4258 4259 override string toPrettyString(bool, int, string) const { 4260 return "<" ~ source ~ ">"; 4261 } 4262 4263 ///. 4264 string source; 4265 } 4266 4267 ///. 4268 class PhpCode : ServerSideCode { 4269 ///. 4270 this(Document _parentDocument, string s) { 4271 super(_parentDocument, "php"); 4272 source = s; 4273 } 4274 4275 override PhpCode cloneNode(bool deep) { 4276 return new PhpCode(parentDocument, source); 4277 } 4278 } 4279 4280 ///. 4281 class AspCode : ServerSideCode { 4282 ///. 4283 this(Document _parentDocument, string s) { 4284 super(_parentDocument, "asp"); 4285 source = s; 4286 } 4287 4288 override AspCode cloneNode(bool deep) { 4289 return new AspCode(parentDocument, source); 4290 } 4291 } 4292 4293 ///. 4294 class BangInstruction : SpecialElement { 4295 ///. 4296 this(Document _parentDocument, string s) { 4297 super(_parentDocument); 4298 source = s; 4299 tagName = "#bpi"; 4300 } 4301 4302 ///. 4303 override string nodeValue() const { 4304 return this.source; 4305 } 4306 4307 override BangInstruction cloneNode(bool deep) { 4308 return new BangInstruction(parentDocument, source); 4309 } 4310 4311 ///. 4312 override string writeToAppender(Appender!string where = appender!string()) const { 4313 auto start = where.data.length; 4314 where.put("<!"); 4315 where.put(source); 4316 where.put(">"); 4317 return where.data[start .. $]; 4318 } 4319 4320 override string toPrettyString(bool, int, string) const { 4321 string s; 4322 s ~= "<!"; 4323 s ~= source; 4324 s ~= ">"; 4325 return s; 4326 } 4327 4328 ///. 4329 string source; 4330 } 4331 4332 ///. 4333 class QuestionInstruction : SpecialElement { 4334 ///. 4335 this(Document _parentDocument, string s) { 4336 super(_parentDocument); 4337 source = s; 4338 tagName = "#qpi"; 4339 } 4340 4341 override QuestionInstruction cloneNode(bool deep) { 4342 return new QuestionInstruction(parentDocument, source); 4343 } 4344 4345 ///. 4346 override string nodeValue() const { 4347 return this.source; 4348 } 4349 4350 ///. 4351 override string writeToAppender(Appender!string where = appender!string()) const { 4352 auto start = where.data.length; 4353 where.put("<"); 4354 where.put(source); 4355 where.put(">"); 4356 return where.data[start .. $]; 4357 } 4358 4359 override string toPrettyString(bool, int, string) const { 4360 string s; 4361 s ~= "<"; 4362 s ~= source; 4363 s ~= ">"; 4364 return s; 4365 } 4366 4367 4368 ///. 4369 string source; 4370 } 4371 4372 ///. 4373 class HtmlComment : SpecialElement { 4374 ///. 4375 this(Document _parentDocument, string s) { 4376 super(_parentDocument); 4377 source = s; 4378 tagName = "#comment"; 4379 } 4380 4381 override HtmlComment cloneNode(bool deep) { 4382 return new HtmlComment(parentDocument, source); 4383 } 4384 4385 ///. 4386 override string nodeValue() const { 4387 return this.source; 4388 } 4389 4390 ///. 4391 override string writeToAppender(Appender!string where = appender!string()) const { 4392 auto start = where.data.length; 4393 where.put("<!--"); 4394 where.put(source); 4395 where.put("-->"); 4396 return where.data[start .. $]; 4397 } 4398 4399 override string toPrettyString(bool, int, string) const { 4400 string s; 4401 s ~= "<!--"; 4402 s ~= source; 4403 s ~= "-->"; 4404 return s; 4405 } 4406 4407 4408 ///. 4409 string source; 4410 } 4411 4412 4413 4414 4415 ///. 4416 class TextNode : Element { 4417 public: 4418 ///. 4419 this(Document _parentDocument, string e) { 4420 super(_parentDocument); 4421 contents = e; 4422 tagName = "#text"; 4423 } 4424 4425 /// 4426 this(string e) { 4427 this(null, e); 4428 } 4429 4430 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 4431 4432 ///. 4433 static TextNode fromUndecodedString(Document _parentDocument, string html) { 4434 auto e = new TextNode(_parentDocument, ""); 4435 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 4436 return e; 4437 } 4438 4439 ///. 4440 override @property TextNode cloneNode(bool deep) { 4441 auto n = new TextNode(parentDocument, contents); 4442 return n; 4443 } 4444 4445 ///. 4446 override string nodeValue() const { 4447 return this.contents; //toString(); 4448 } 4449 4450 ///. 4451 @property override int nodeType() const { 4452 return NodeType.Text; 4453 } 4454 4455 ///. 4456 override string writeToAppender(Appender!string where = appender!string()) const { 4457 string s; 4458 if(contents.length) 4459 s = htmlEntitiesEncode(contents, where); 4460 else 4461 s = ""; 4462 4463 assert(s !is null); 4464 return s; 4465 } 4466 4467 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4468 string s; 4469 4470 string contents = this.contents; 4471 // we will first collapse the whitespace per html 4472 // sort of. note this can break stuff yo!!!! 4473 if(this.parentNode is null || this.parentNode.tagName != "pre") { 4474 string n = ""; 4475 bool lastWasWhitespace = indentationLevel > 0; 4476 foreach(char c; contents) { 4477 if(c.isSimpleWhite) { 4478 if(!lastWasWhitespace) 4479 n ~= ' '; 4480 lastWasWhitespace = true; 4481 } else { 4482 n ~= c; 4483 lastWasWhitespace = false; 4484 } 4485 } 4486 4487 contents = n; 4488 } 4489 4490 if(this.parentNode !is null && this.parentNode.tagName != "p") { 4491 contents = contents.strip; 4492 } 4493 4494 auto e = htmlEntitiesEncode(contents); 4495 import std.algorithm.iteration : splitter; 4496 bool first = true; 4497 foreach(line; splitter(e, "\n")) { 4498 if(first) { 4499 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4500 first = false; 4501 } else { 4502 s ~= "\n"; 4503 if(insertComments) 4504 s ~= "<!--"; 4505 foreach(i; 0 .. indentationLevel) 4506 s ~= "\t"; 4507 if(insertComments) 4508 s ~= "-->"; 4509 } 4510 s ~= line.stripRight; 4511 } 4512 return s; 4513 } 4514 4515 ///. 4516 override Element appendChild(Element e) { 4517 assert(0, "Cannot append to a text node"); 4518 } 4519 4520 ///. 4521 string contents; 4522 // alias contents content; // I just mistype this a lot, 4523 } 4524 4525 /** 4526 There are subclasses of Element offering improved helper 4527 functions for the element in HTML. 4528 */ 4529 4530 ///. 4531 class Link : Element { 4532 4533 ///. 4534 this(Document _parentDocument) { 4535 super(_parentDocument); 4536 this.tagName = "a"; 4537 } 4538 4539 4540 ///. 4541 this(string href, string text) { 4542 super("a"); 4543 setAttribute("href", href); 4544 innerText = text; 4545 } 4546 /+ 4547 /// Returns everything in the href EXCEPT the query string 4548 @property string targetSansQuery() { 4549 4550 } 4551 4552 ///. 4553 @property string domainName() { 4554 4555 } 4556 4557 ///. 4558 @property string path 4559 +/ 4560 /// This gets a variable from the URL's query string. 4561 string getValue(string name) { 4562 auto vars = variablesHash(); 4563 if(name in vars) 4564 return vars[name]; 4565 return null; 4566 } 4567 4568 private string[string] variablesHash() { 4569 string href = getAttribute("href"); 4570 if(href is null) 4571 return null; 4572 4573 auto ques = href.indexOf("?"); 4574 string str = ""; 4575 if(ques != -1) { 4576 str = href[ques+1..$]; 4577 4578 auto fragment = str.indexOf("#"); 4579 if(fragment != -1) 4580 str = str[0..fragment]; 4581 } 4582 4583 string[] variables = str.split("&"); 4584 4585 string[string] hash; 4586 4587 foreach(var; variables) { 4588 auto index = var.indexOf("="); 4589 if(index == -1) 4590 hash[var] = ""; 4591 else { 4592 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 4593 } 4594 } 4595 4596 return hash; 4597 } 4598 4599 ///. 4600 /*private*/ void updateQueryString(string[string] vars) { 4601 string href = getAttribute("href"); 4602 4603 auto question = href.indexOf("?"); 4604 if(question != -1) 4605 href = href[0..question]; 4606 4607 string frag = ""; 4608 auto fragment = href.indexOf("#"); 4609 if(fragment != -1) { 4610 frag = href[fragment..$]; 4611 href = href[0..fragment]; 4612 } 4613 4614 string query = "?"; 4615 bool first = true; 4616 foreach(name, value; vars) { 4617 if(!first) 4618 query ~= "&"; 4619 else 4620 first = false; 4621 4622 query ~= encodeComponent(name); 4623 if(value.length) 4624 query ~= "=" ~ encodeComponent(value); 4625 } 4626 4627 if(query != "?") 4628 href ~= query; 4629 4630 href ~= frag; 4631 4632 setAttribute("href", href); 4633 } 4634 4635 /// Sets or adds the variable with the given name to the given value 4636 /// It automatically URI encodes the values and takes care of the ? and &. 4637 override void setValue(string name, string variable) { 4638 auto vars = variablesHash(); 4639 vars[name] = variable; 4640 4641 updateQueryString(vars); 4642 } 4643 4644 /// Removes the given variable from the query string 4645 void removeValue(string name) { 4646 auto vars = variablesHash(); 4647 vars.remove(name); 4648 4649 updateQueryString(vars); 4650 } 4651 4652 /* 4653 ///. 4654 override string toString() { 4655 4656 } 4657 4658 ///. 4659 override string getAttribute(string name) { 4660 if(name == "href") { 4661 4662 } else 4663 return super.getAttribute(name); 4664 } 4665 */ 4666 } 4667 4668 ///. 4669 class Form : Element { 4670 4671 ///. 4672 this(Document _parentDocument) { 4673 super(_parentDocument); 4674 tagName = "form"; 4675 } 4676 4677 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4678 auto t = this.querySelector("fieldset div"); 4679 if(t is null) 4680 return super.addField(label, name, type, fieldOptions); 4681 else 4682 return t.addField(label, name, type, fieldOptions); 4683 } 4684 4685 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 4686 auto type = "text"; 4687 auto t = this.querySelector("fieldset div"); 4688 if(t is null) 4689 return super.addField(label, name, type, fieldOptions); 4690 else 4691 return t.addField(label, name, type, fieldOptions); 4692 } 4693 4694 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4695 auto t = this.querySelector("fieldset div"); 4696 if(t is null) 4697 return super.addField(label, name, options, fieldOptions); 4698 else 4699 return t.addField(label, name, options, fieldOptions); 4700 } 4701 4702 override void setValue(string field, string value) { 4703 setValue(field, value, true); 4704 } 4705 4706 // FIXME: doesn't handle arrays; multiple fields can have the same name 4707 4708 /// Set's the form field's value. For input boxes, this sets the value attribute. For 4709 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 4710 /// the checked/selected attribute from all, and adds it to the one matching the value. 4711 /// For checkboxes, if the value is non-null and not empty, it checks the box. 4712 4713 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 4714 /// Otherwise, it makes a new input with type=hidden to keep the value. 4715 void setValue(string field, string value, bool makeNew) { 4716 auto eles = getField(field); 4717 if(eles.length == 0) { 4718 if(makeNew) { 4719 addInput(field, value); 4720 return; 4721 } else 4722 throw new Exception("form field does not exist"); 4723 } 4724 4725 if(eles.length == 1) { 4726 auto e = eles[0]; 4727 switch(e.tagName) { 4728 default: assert(0); 4729 case "textarea": 4730 e.innerText = value; 4731 break; 4732 case "input": 4733 string type = e.getAttribute("type"); 4734 if(type is null) { 4735 e.value = value; 4736 return; 4737 } 4738 switch(type) { 4739 case "checkbox": 4740 case "radio": 4741 if(value.length && value != "false") 4742 e.setAttribute("checked", "checked"); 4743 else 4744 e.removeAttribute("checked"); 4745 break; 4746 default: 4747 e.value = value; 4748 return; 4749 } 4750 break; 4751 case "select": 4752 bool found = false; 4753 foreach(child; e.tree) { 4754 if(child.tagName != "option") 4755 continue; 4756 string val = child.getAttribute("value"); 4757 if(val is null) 4758 val = child.innerText; 4759 if(val == value) { 4760 child.setAttribute("selected", "selected"); 4761 found = true; 4762 } else 4763 child.removeAttribute("selected"); 4764 } 4765 4766 if(!found) { 4767 e.addChild("option", value) 4768 .setAttribute("selected", "selected"); 4769 } 4770 break; 4771 } 4772 } else { 4773 // assume radio boxes 4774 foreach(e; eles) { 4775 string val = e.getAttribute("value"); 4776 //if(val is null) 4777 // throw new Exception("don't know what to do with radio boxes with null value"); 4778 if(val == value) 4779 e.setAttribute("checked", "checked"); 4780 else 4781 e.removeAttribute("checked"); 4782 } 4783 } 4784 } 4785 4786 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 4787 /// it makes no attempt to find and modify existing elements in the form to the new values. 4788 void addValueArray(string key, string[] arrayOfValues) { 4789 foreach(arr; arrayOfValues) 4790 addChild("input", key, arr); 4791 } 4792 4793 /// Gets the value of the field; what would be given if it submitted right now. (so 4794 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 4795 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 4796 string getValue(string field) { 4797 auto eles = getField(field); 4798 if(eles.length == 0) 4799 return ""; 4800 if(eles.length == 1) { 4801 auto e = eles[0]; 4802 switch(e.tagName) { 4803 default: assert(0); 4804 case "input": 4805 if(e.type == "checkbox") { 4806 if(e.checked) 4807 return e.value.length ? e.value : "checked"; 4808 return ""; 4809 } else 4810 return e.value; 4811 case "textarea": 4812 return e.innerText; 4813 case "select": 4814 foreach(child; e.tree) { 4815 if(child.tagName != "option") 4816 continue; 4817 if(child.selected) 4818 return child.value; 4819 } 4820 break; 4821 } 4822 } else { 4823 // assuming radio 4824 foreach(e; eles) { 4825 if(e.checked) 4826 return e.value; 4827 } 4828 } 4829 4830 return ""; 4831 } 4832 4833 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 4834 ///. 4835 string getPostableData() { 4836 bool[string] namesDone; 4837 4838 string ret; 4839 bool outputted = false; 4840 4841 foreach(e; getElementsBySelector("[name]")) { 4842 if(e.name in namesDone) 4843 continue; 4844 4845 if(outputted) 4846 ret ~= "&"; 4847 else 4848 outputted = true; 4849 4850 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 4851 4852 namesDone[e.name] = true; 4853 } 4854 4855 return ret; 4856 } 4857 4858 /// Gets the actual elements with the given name 4859 Element[] getField(string name) { 4860 Element[] ret; 4861 foreach(e; tree) { 4862 if(e.name == name) 4863 ret ~= e; 4864 } 4865 return ret; 4866 } 4867 4868 /// Grabs the <label> with the given for tag, if there is one. 4869 Element getLabel(string forId) { 4870 foreach(e; tree) 4871 if(e.tagName == "label" && e.getAttribute("for") == forId) 4872 return e; 4873 return null; 4874 } 4875 4876 /// Adds a new INPUT field to the end of the form with the given attributes. 4877 Element addInput(string name, string value, string type = "hidden") { 4878 auto e = new Element(parentDocument, "input", null, true); 4879 e.name = name; 4880 e.value = value; 4881 e.type = type; 4882 4883 appendChild(e); 4884 4885 return e; 4886 } 4887 4888 /// Removes the given field from the form. It finds the element and knocks it right out. 4889 void removeField(string name) { 4890 foreach(e; getField(name)) 4891 e.parentNode.removeChild(e); 4892 } 4893 4894 /+ 4895 /// Returns all form members. 4896 @property Element[] elements() { 4897 4898 } 4899 4900 ///. 4901 string opDispatch(string name)(string v = null) 4902 // filter things that should actually be attributes on the form 4903 if( name != "method" && name != "action" && name != "enctype" 4904 && name != "style" && name != "name" && name != "id" && name != "class") 4905 { 4906 4907 } 4908 +/ 4909 /+ 4910 void submit() { 4911 // take its elements and submit them through http 4912 } 4913 +/ 4914 } 4915 4916 import std.conv; 4917 4918 ///. 4919 class Table : Element { 4920 4921 ///. 4922 this(Document _parentDocument) { 4923 super(_parentDocument); 4924 tagName = "table"; 4925 } 4926 4927 /// Creates an element with the given type and content. 4928 Element th(T)(T t) { 4929 Element e; 4930 if(parentDocument !is null) 4931 e = parentDocument.createElement("th"); 4932 else 4933 e = Element.make("th"); 4934 static if(is(T == Html)) 4935 e.innerHTML = t; 4936 else 4937 e.innerText = to!string(t); 4938 return e; 4939 } 4940 4941 /// ditto 4942 Element td(T)(T t) { 4943 Element e; 4944 if(parentDocument !is null) 4945 e = parentDocument.createElement("td"); 4946 else 4947 e = Element.make("td"); 4948 static if(is(T == Html)) 4949 e.innerHTML = t; 4950 else 4951 e.innerText = to!string(t); 4952 return e; 4953 } 4954 4955 /// . 4956 Element appendHeaderRow(T...)(T t) { 4957 return appendRowInternal("th", "thead", t); 4958 } 4959 4960 /// . 4961 Element appendFooterRow(T...)(T t) { 4962 return appendRowInternal("td", "tfoot", t); 4963 } 4964 4965 /// . 4966 Element appendRow(T...)(T t) { 4967 return appendRowInternal("td", "tbody", t); 4968 } 4969 4970 void addColumnClasses(string[] classes...) { 4971 auto grid = getGrid(); 4972 foreach(row; grid) 4973 foreach(i, cl; classes) { 4974 if(cl.length) 4975 if(i < row.length) 4976 row[i].addClass(cl); 4977 } 4978 } 4979 4980 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 4981 Element row = Element.make("tr"); 4982 4983 foreach(e; t) { 4984 static if(is(typeof(e) : Element)) { 4985 if(e.tagName == "td" || e.tagName == "th") 4986 row.appendChild(e); 4987 else { 4988 Element a = Element.make(innerType); 4989 4990 a.appendChild(e); 4991 4992 row.appendChild(a); 4993 } 4994 } else static if(is(typeof(e) == Html)) { 4995 Element a = Element.make(innerType); 4996 a.innerHTML = e.source; 4997 row.appendChild(a); 4998 } else static if(is(typeof(e) == Element[])) { 4999 Element a = Element.make(innerType); 5000 foreach(ele; e) 5001 a.appendChild(ele); 5002 row.appendChild(a); 5003 } else static if(is(typeof(e) == string[])) { 5004 foreach(ele; e) { 5005 Element a = Element.make(innerType); 5006 a.innerText = to!string(ele); 5007 row.appendChild(a); 5008 } 5009 } else { 5010 Element a = Element.make(innerType); 5011 a.innerText = to!string(e); 5012 row.appendChild(a); 5013 } 5014 } 5015 5016 foreach(e; children) { 5017 if(e.tagName == findType) { 5018 e.appendChild(row); 5019 return row; 5020 } 5021 } 5022 5023 // the type was not found if we are here... let's add it so it is well-formed 5024 auto lol = this.addChild(findType); 5025 lol.appendChild(row); 5026 5027 return row; 5028 } 5029 5030 ///. 5031 Element captionElement() { 5032 Element cap; 5033 foreach(c; children) { 5034 if(c.tagName == "caption") { 5035 cap = c; 5036 break; 5037 } 5038 } 5039 5040 if(cap is null) { 5041 cap = Element.make("caption"); 5042 appendChild(cap); 5043 } 5044 5045 return cap; 5046 } 5047 5048 ///. 5049 @property string caption() { 5050 return captionElement().innerText; 5051 } 5052 5053 ///. 5054 @property void caption(string text) { 5055 captionElement().innerText = text; 5056 } 5057 5058 /// Gets the logical layout of the table as a rectangular grid of 5059 /// cells. It considers rowspan and colspan. A cell with a large 5060 /// span is represented in the grid by being referenced several times. 5061 /// The tablePortition parameter can get just a <thead>, <tbody>, or 5062 /// <tfoot> portion if you pass one. 5063 /// 5064 /// Note: the rectangular grid might include null cells. 5065 /// 5066 /// This is kinda expensive so you should call once when you want the grid, 5067 /// then do lookups on the returned array. 5068 TableCell[][] getGrid(Element tablePortition = null) 5069 in { 5070 if(tablePortition is null) 5071 assert(tablePortition is null); 5072 else { 5073 assert(tablePortition !is null); 5074 assert(tablePortition.parentNode is this); 5075 assert( 5076 tablePortition.tagName == "tbody" 5077 || 5078 tablePortition.tagName == "tfoot" 5079 || 5080 tablePortition.tagName == "thead" 5081 ); 5082 } 5083 } 5084 body { 5085 if(tablePortition is null) 5086 tablePortition = this; 5087 5088 TableCell[][] ret; 5089 5090 // FIXME: will also return rows of sub tables! 5091 auto rows = tablePortition.getElementsByTagName("tr"); 5092 ret.length = rows.length; 5093 5094 int maxLength = 0; 5095 5096 int insertCell(int row, int position, TableCell cell) { 5097 if(row >= ret.length) 5098 return position; // not supposed to happen - a rowspan is prolly too big. 5099 5100 if(position == -1) { 5101 position++; 5102 foreach(item; ret[row]) { 5103 if(item is null) 5104 break; 5105 position++; 5106 } 5107 } 5108 5109 if(position < ret[row].length) 5110 ret[row][position] = cell; 5111 else 5112 foreach(i; ret[row].length .. position + 1) { 5113 if(i == position) 5114 ret[row] ~= cell; 5115 else 5116 ret[row] ~= null; 5117 } 5118 return position; 5119 } 5120 5121 foreach(i, rowElement; rows) { 5122 auto row = cast(TableRow) rowElement; 5123 assert(row !is null); 5124 assert(i < ret.length); 5125 5126 int position = 0; 5127 foreach(cellElement; rowElement.childNodes) { 5128 auto cell = cast(TableCell) cellElement; 5129 if(cell is null) 5130 continue; 5131 5132 // FIXME: colspan == 0 or rowspan == 0 5133 // is supposed to mean fill in the rest of 5134 // the table, not skip it 5135 foreach(int j; 0 .. cell.colspan) { 5136 foreach(int k; 0 .. cell.rowspan) 5137 // if the first row, always append. 5138 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 5139 position++; 5140 } 5141 } 5142 5143 if(ret[i].length > maxLength) 5144 maxLength = cast(int) ret[i].length; 5145 } 5146 5147 // want to ensure it's rectangular 5148 foreach(ref r; ret) { 5149 foreach(i; r.length .. maxLength) 5150 r ~= null; 5151 } 5152 5153 return ret; 5154 } 5155 } 5156 5157 /// Represents a table row element - a <tr> 5158 class TableRow : Element { 5159 ///. 5160 this(Document _parentDocument) { 5161 super(_parentDocument); 5162 tagName = "tr"; 5163 } 5164 5165 // FIXME: the standard says there should be a lot more in here, 5166 // but meh, I never use it and it's a pain to implement. 5167 } 5168 5169 /// Represents anything that can be a table cell - <td> or <th> html. 5170 class TableCell : Element { 5171 ///. 5172 this(Document _parentDocument, string _tagName) { 5173 super(_parentDocument, _tagName); 5174 } 5175 5176 @property int rowspan() const { 5177 int ret = 1; 5178 auto it = getAttribute("rowspan"); 5179 if(it.length) 5180 ret = to!int(it); 5181 return ret; 5182 } 5183 5184 @property int colspan() const { 5185 int ret = 1; 5186 auto it = getAttribute("colspan"); 5187 if(it.length) 5188 ret = to!int(it); 5189 return ret; 5190 } 5191 5192 @property int rowspan(int i) { 5193 setAttribute("rowspan", to!string(i)); 5194 return i; 5195 } 5196 5197 @property int colspan(int i) { 5198 setAttribute("colspan", to!string(i)); 5199 return i; 5200 } 5201 5202 } 5203 5204 5205 ///. 5206 class MarkupException : Exception { 5207 5208 ///. 5209 this(string message, string file = __FILE__, size_t line = __LINE__) { 5210 super(message, file, line); 5211 } 5212 } 5213 5214 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 5215 class ElementNotFoundException : Exception { 5216 5217 /// type == kind of element you were looking for and search == a selector describing the search. 5218 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 5219 this.searchContext = searchContext; 5220 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 5221 } 5222 5223 Element searchContext; 5224 } 5225 5226 /// The html struct is used to differentiate between regular text nodes and html in certain functions 5227 /// 5228 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 5229 struct Html { 5230 /// This string holds the actual html. Use it to retrieve the contents. 5231 string source; 5232 } 5233 5234 // for the observers 5235 enum DomMutationOperations { 5236 setAttribute, 5237 removeAttribute, 5238 appendChild, // tagname, attributes[], innerHTML 5239 insertBefore, 5240 truncateChildren, 5241 removeChild, 5242 appendHtml, 5243 replaceHtml, 5244 appendText, 5245 replaceText, 5246 replaceTextOnly 5247 } 5248 5249 // and for observers too 5250 struct DomMutationEvent { 5251 DomMutationOperations operation; 5252 Element target; 5253 Element related; // what this means differs with the operation 5254 Element related2; 5255 string relatedString; 5256 string relatedString2; 5257 } 5258 5259 5260 private immutable static string[] selfClosedElements = [ 5261 // html 4 5262 "img", "hr", "input", "br", "col", "link", "meta", 5263 // html 5 5264 "source" ]; 5265 5266 private immutable static string[] inlineElements = [ 5267 "span", "strong", "em", "b", "i", "a" 5268 ]; 5269 5270 5271 static import std.conv; 5272 5273 ///. 5274 int intFromHex(string hex) { 5275 int place = 1; 5276 int value = 0; 5277 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 5278 int v; 5279 char q = hex[a]; 5280 if( q >= '0' && q <= '9') 5281 v = q - '0'; 5282 else if (q >= 'a' && q <= 'f') 5283 v = q - 'a' + 10; 5284 else throw new Exception("Illegal hex character: " ~ q); 5285 5286 value += v * place; 5287 5288 place *= 16; 5289 } 5290 5291 return value; 5292 } 5293 5294 5295 // CSS selector handling 5296 5297 // EXTENSIONS 5298 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 5299 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 5300 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 5301 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 5302 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 5303 5304 5305 5306 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 5307 // That might be useful to implement, though I do have parent selectors too. 5308 5309 ///. 5310 static immutable string[] selectorTokens = [ 5311 // It is important that the 2 character possibilities go first here for accurate lexing 5312 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 5313 "::", ">>", 5314 "<<", // my any-parent extension (reciprocal of whitespace) 5315 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 5316 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 5317 ]; // other is white space or a name. 5318 5319 ///. 5320 sizediff_t idToken(string str, sizediff_t position) { 5321 sizediff_t tid = -1; 5322 char c = str[position]; 5323 foreach(a, token; selectorTokens) 5324 5325 if(c == token[0]) { 5326 if(token.length > 1) { 5327 if(position + 1 >= str.length || str[position+1] != token[1]) 5328 continue; // not this token 5329 } 5330 tid = a; 5331 break; 5332 } 5333 return tid; 5334 } 5335 5336 ///. 5337 // look, ma, no phobos! 5338 // new lexer by ketmar 5339 string[] lexSelector (string selstr) { 5340 5341 static sizediff_t idToken (string str, size_t stpos) { 5342 char c = str[stpos]; 5343 foreach (sizediff_t tidx, immutable token; selectorTokens) { 5344 if (c == token[0]) { 5345 if (token.length > 1) { 5346 assert(token.length == 2, token); // we don't have 3-char tokens yet 5347 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 5348 } 5349 return tidx; 5350 } 5351 } 5352 return -1; 5353 } 5354 5355 // skip spaces and comments 5356 static string removeLeadingBlanks (string str) { 5357 size_t curpos = 0; 5358 while (curpos < str.length) { 5359 immutable char ch = str[curpos]; 5360 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 5361 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 5362 // comment 5363 curpos += 2; 5364 while (curpos < str.length) { 5365 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 5366 curpos += 2; 5367 break; 5368 } 5369 ++curpos; 5370 } 5371 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 5372 ++curpos; 5373 5374 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 5375 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 5376 // That is not the same as ".foo.bar". If the space is stripped, important 5377 // information is lost, despite the tokens being separatable anyway. 5378 // 5379 // The parser really needs to be aware of the presence of a space. 5380 } else { 5381 break; 5382 } 5383 } 5384 return str[curpos..$]; 5385 } 5386 5387 static bool isBlankAt() (string str, size_t pos) { 5388 // we should consider unicode spaces too, but... unicode sux anyway. 5389 return 5390 (pos < str.length && // in string 5391 (str[pos] <= 32 || // space 5392 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 5393 } 5394 5395 string[] tokens; 5396 // lexx it! 5397 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 5398 if(selstr[0] == '\"' || selstr[0] == '\'') { 5399 auto end = selstr[0]; 5400 auto pos = 1; 5401 bool escaping; 5402 while(pos < selstr.length && !escaping && selstr[pos] != end) { 5403 if(escaping) 5404 escaping = false; 5405 else if(selstr[pos] == '\\') 5406 escaping = true; 5407 pos++; 5408 } 5409 5410 // FIXME: do better unescaping 5411 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 5412 if(pos+1 >= selstr.length) 5413 assert(0, selstr); 5414 selstr = selstr[pos + 1.. $]; 5415 continue; 5416 } 5417 5418 5419 // no tokens starts with escape 5420 immutable tid = idToken(selstr, 0); 5421 if (tid >= 0) { 5422 // special token 5423 tokens ~= selectorTokens[tid]; // it's funnier this way 5424 selstr = selstr[selectorTokens[tid].length..$]; 5425 continue; 5426 } 5427 // from start to space or special token 5428 size_t escapePos = size_t.max; 5429 size_t curpos = 0; // i can has chizburger^w escape at the start 5430 while (curpos < selstr.length) { 5431 if (selstr[curpos] == '\\') { 5432 // this is escape, just skip it and next char 5433 if (escapePos == size_t.max) escapePos = curpos; 5434 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 5435 } else { 5436 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 5437 ++curpos; 5438 } 5439 } 5440 // identifier 5441 if (escapePos != size_t.max) { 5442 // i hate it when it happens 5443 string id = selstr[0..escapePos]; 5444 while (escapePos < curpos) { 5445 if (curpos-escapePos < 2) break; 5446 id ~= selstr[escapePos+1]; // escaped char 5447 escapePos += 2; 5448 immutable stp = escapePos; 5449 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 5450 if (escapePos > stp) id ~= selstr[stp..escapePos]; 5451 } 5452 if (id.length > 0) tokens ~= id; 5453 } else { 5454 tokens ~= selstr[0..curpos]; 5455 } 5456 selstr = selstr[curpos..$]; 5457 } 5458 return tokens; 5459 } 5460 version(unittest_domd_lexer) unittest { 5461 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 5462 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 5463 assert(lexSelector(r" < <") == ["<", "<"]); 5464 assert(lexSelector(r" <<") == ["<<"]); 5465 assert(lexSelector(r" <</") == ["<<", "/"]); 5466 assert(lexSelector(r" <</*") == ["<<"]); 5467 assert(lexSelector(r" <\</*") == ["<", "<"]); 5468 assert(lexSelector(r"heh\") == ["heh"]); 5469 assert(lexSelector(r"alice \") == ["alice"]); 5470 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 5471 } 5472 5473 ///. 5474 struct SelectorPart { 5475 string tagNameFilter; ///. 5476 string[] attributesPresent; /// [attr] 5477 string[2][] attributesEqual; /// [attr=value] 5478 string[2][] attributesStartsWith; /// [attr^=value] 5479 string[2][] attributesEndsWith; /// [attr$=value] 5480 // split it on space, then match to these 5481 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 5482 // split it on dash, then match to these 5483 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 5484 string[2][] attributesInclude; /// [attr*=value] 5485 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 5486 5487 string[] hasSelectors; /// :has(this) 5488 string[] notSelectors; /// :not(this) 5489 5490 ParsedNth[] nthOfType; /// . 5491 ParsedNth[] nthLastOfType; /// . 5492 ParsedNth[] nthChild; /// . 5493 5494 bool firstChild; ///. 5495 bool lastChild; ///. 5496 5497 bool firstOfType; /// . 5498 bool lastOfType; /// . 5499 5500 bool emptyElement; ///. 5501 bool whitespaceOnly; /// 5502 bool oddChild; ///. 5503 bool evenChild; ///. 5504 5505 bool rootElement; ///. 5506 5507 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 5508 5509 bool isCleanSlateExceptSeparation() { 5510 auto cp = this; 5511 cp.separation = -1; 5512 return cp is SelectorPart.init; 5513 } 5514 5515 ///. 5516 string toString() { 5517 string ret; 5518 switch(separation) { 5519 default: assert(0); 5520 case -1: break; 5521 case 0: ret ~= " "; break; 5522 case 1: ret ~= " > "; break; 5523 case 2: ret ~= " + "; break; 5524 case 3: ret ~= " ~ "; break; 5525 case 4: ret ~= " < "; break; 5526 } 5527 ret ~= tagNameFilter; 5528 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 5529 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 5530 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 5531 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 5532 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 5533 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 5534 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 5535 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 5536 5537 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 5538 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 5539 5540 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 5541 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 5542 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 5543 5544 if(firstChild) ret ~= ":first-child"; 5545 if(lastChild) ret ~= ":last-child"; 5546 if(firstOfType) ret ~= ":first-of-type"; 5547 if(lastOfType) ret ~= ":last-of-type"; 5548 if(emptyElement) ret ~= ":empty"; 5549 if(whitespaceOnly) ret ~= ":whitespace-only"; 5550 if(oddChild) ret ~= ":odd-child"; 5551 if(evenChild) ret ~= ":even-child"; 5552 if(rootElement) ret ~= ":root"; 5553 5554 return ret; 5555 } 5556 5557 // USEFUL 5558 ///. 5559 bool matchElement(Element e) { 5560 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 5561 // Each individual call is reasonably fast already, but it adds up. 5562 if(e is null) return false; 5563 if(e.nodeType != 1) return false; 5564 5565 if(tagNameFilter != "" && tagNameFilter != "*") 5566 if(e.tagName != tagNameFilter) 5567 return false; 5568 if(firstChild) { 5569 if(e.parentNode is null) 5570 return false; 5571 if(e.parentNode.childElements[0] !is e) 5572 return false; 5573 } 5574 if(lastChild) { 5575 if(e.parentNode is null) 5576 return false; 5577 auto ce = e.parentNode.childElements; 5578 if(ce[$-1] !is e) 5579 return false; 5580 } 5581 if(firstOfType) { 5582 if(e.parentNode is null) 5583 return false; 5584 auto ce = e.parentNode.childElements; 5585 foreach(c; ce) { 5586 if(c.tagName == e.tagName) { 5587 if(c is e) 5588 return true; 5589 else 5590 return false; 5591 } 5592 } 5593 } 5594 if(lastOfType) { 5595 if(e.parentNode is null) 5596 return false; 5597 auto ce = e.parentNode.childElements; 5598 foreach_reverse(c; ce) { 5599 if(c.tagName == e.tagName) { 5600 if(c is e) 5601 return true; 5602 else 5603 return false; 5604 } 5605 } 5606 } 5607 if(emptyElement) { 5608 if(e.children.length) 5609 return false; 5610 } 5611 if(whitespaceOnly) { 5612 if(e.innerText.strip.length) 5613 return false; 5614 } 5615 if(rootElement) { 5616 if(e.parentNode !is null) 5617 return false; 5618 } 5619 if(oddChild || evenChild) { 5620 if(e.parentNode is null) 5621 return false; 5622 foreach(i, child; e.parentNode.childElements) { 5623 if(child is e) { 5624 if(oddChild && !(i&1)) 5625 return false; 5626 if(evenChild && (i&1)) 5627 return false; 5628 break; 5629 } 5630 } 5631 } 5632 5633 bool matchWithSeparator(string attr, string value, string separator) { 5634 foreach(s; attr.split(separator)) 5635 if(s == value) 5636 return true; 5637 return false; 5638 } 5639 5640 foreach(a; attributesPresent) 5641 if(a !in e.attributes) 5642 return false; 5643 foreach(a; attributesEqual) 5644 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 5645 return false; 5646 foreach(a; attributesNotEqual) 5647 // FIXME: maybe it should say null counts... this just bit me. 5648 // I did [attr][attr!=value] to work around. 5649 // 5650 // if it's null, it's not equal, right? 5651 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 5652 if(e.getAttribute(a[0]) == a[1]) 5653 return false; 5654 foreach(a; attributesInclude) 5655 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 5656 return false; 5657 foreach(a; attributesStartsWith) 5658 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 5659 return false; 5660 foreach(a; attributesEndsWith) 5661 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 5662 return false; 5663 foreach(a; attributesIncludesSeparatedBySpaces) 5664 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 5665 return false; 5666 foreach(a; attributesIncludesSeparatedByDashes) 5667 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 5668 return false; 5669 foreach(a; hasSelectors) { 5670 if(e.querySelector(a) is null) 5671 return false; 5672 } 5673 foreach(a; notSelectors) { 5674 auto sel = Selector(a); 5675 if(sel.matchesElement(e)) 5676 return false; 5677 } 5678 5679 foreach(a; nthChild) { 5680 if(e.parentNode is null) 5681 return false; 5682 5683 auto among = e.parentNode.childElements; 5684 5685 if(!a.solvesFor(among, e)) 5686 return false; 5687 } 5688 foreach(a; nthOfType) { 5689 if(e.parentNode is null) 5690 return false; 5691 5692 auto among = e.parentNode.childElements(e.tagName); 5693 5694 if(!a.solvesFor(among, e)) 5695 return false; 5696 } 5697 foreach(a; nthLastOfType) { 5698 if(e.parentNode is null) 5699 return false; 5700 5701 auto among = retro(e.parentNode.childElements(e.tagName)); 5702 5703 if(!a.solvesFor(among, e)) 5704 return false; 5705 } 5706 5707 return true; 5708 } 5709 } 5710 5711 struct ParsedNth { 5712 int multiplier; 5713 int adder; 5714 5715 string of; 5716 5717 this(string text) { 5718 auto original = text; 5719 consumeWhitespace(text); 5720 if(text.startsWith("odd")) { 5721 multiplier = 2; 5722 adder = 1; 5723 5724 text = text[3 .. $]; 5725 } else if(text.startsWith("even")) { 5726 multiplier = 2; 5727 adder = 1; 5728 5729 text = text[4 .. $]; 5730 } else { 5731 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 5732 consumeWhitespace(text); 5733 if(text.length && text[0] == 'n') { 5734 multiplier = n; 5735 text = text[1 .. $]; 5736 consumeWhitespace(text); 5737 if(text.length) { 5738 if(text[0] == '+') { 5739 text = text[1 .. $]; 5740 adder = parseNumber(text); 5741 } else if(text[0] == '-') { 5742 text = text[1 .. $]; 5743 adder = -parseNumber(text); 5744 } else if(text[0] == 'o') { 5745 // continue, this is handled below 5746 } else 5747 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 5748 } 5749 } else { 5750 adder = n; 5751 } 5752 } 5753 5754 consumeWhitespace(text); 5755 if(text.startsWith("of")) { 5756 text = text[2 .. $]; 5757 consumeWhitespace(text); 5758 of = text[0 .. $]; 5759 } 5760 } 5761 5762 string toString() { 5763 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 5764 } 5765 5766 bool solvesFor(R)(R elements, Element e) { 5767 int idx = 1; 5768 bool found = false; 5769 foreach(ele; elements) { 5770 if(of.length) { 5771 auto sel = Selector(of); 5772 if(!sel.matchesElement(ele)) 5773 continue; 5774 } 5775 if(ele is e) { 5776 found = true; 5777 break; 5778 } 5779 idx++; 5780 } 5781 if(!found) return false; 5782 5783 // multiplier* n + adder = idx 5784 // if there is a solution for integral n, it matches 5785 5786 idx -= adder; 5787 if(multiplier) { 5788 if(idx % multiplier == 0) 5789 return true; 5790 } else { 5791 return idx == 0; 5792 } 5793 return false; 5794 } 5795 5796 private void consumeWhitespace(ref string text) { 5797 while(text.length && text[0] == ' ') 5798 text = text[1 .. $]; 5799 } 5800 5801 private int parseNumber(ref string text) { 5802 consumeWhitespace(text); 5803 if(text.length == 0) return 0; 5804 bool negative = text[0] == '-'; 5805 if(text[0] == '+') 5806 text = text[1 .. $]; 5807 if(negative) text = text[1 .. $]; 5808 int i = 0; 5809 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 5810 i++; 5811 if(i == 0) 5812 return 0; 5813 int cool = to!int(text[0 .. i]); 5814 text = text[i .. $]; 5815 return negative ? -cool : cool; 5816 } 5817 } 5818 5819 // USEFUL 5820 ///. 5821 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 5822 Element[] ret; 5823 if(!parts.length) { 5824 return [start]; // the null selector only matches the start point; it 5825 // is what terminates the recursion 5826 } 5827 5828 auto part = parts[0]; 5829 switch(part.separation) { 5830 default: assert(0); 5831 case -1: 5832 case 0: // tree 5833 foreach(e; start.tree) { 5834 if(part.separation == 0 && start is e) 5835 continue; // space doesn't match itself! 5836 if(part.matchElement(e)) { 5837 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5838 } 5839 } 5840 break; 5841 case 1: // children 5842 foreach(e; start.childNodes) { 5843 if(part.matchElement(e)) { 5844 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5845 } 5846 } 5847 break; 5848 case 2: // next-sibling 5849 auto e = start.nextSibling("*"); 5850 if(part.matchElement(e)) 5851 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5852 break; 5853 case 3: // younger sibling 5854 auto tmp = start.parentNode; 5855 if(tmp !is null) { 5856 sizediff_t pos = -1; 5857 auto children = tmp.childElements; 5858 foreach(i, child; children) { 5859 if(child is start) { 5860 pos = i; 5861 break; 5862 } 5863 } 5864 assert(pos != -1); 5865 foreach(e; children[pos+1..$]) { 5866 if(part.matchElement(e)) 5867 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5868 } 5869 } 5870 break; 5871 case 4: // immediate parent node, an extension of mine to walk back up the tree 5872 auto e = start.parentNode; 5873 if(part.matchElement(e)) { 5874 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5875 } 5876 /* 5877 Example of usefulness: 5878 5879 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 5880 5881 table th < tr 5882 5883 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 5884 */ 5885 break; 5886 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 5887 /* 5888 Like with the < operator, this is best used to find some parent of a particular known element. 5889 5890 Say you have an anchor inside a 5891 */ 5892 } 5893 5894 return ret; 5895 } 5896 5897 /++ 5898 Represents a parsed CSS selector. 5899 5900 See_Also: 5901 [Element.querySelector] 5902 [Element.querySelectorAll] 5903 [Document.querySelector] 5904 [Document.querySelectorAll] 5905 +/ 5906 struct Selector { 5907 SelectorComponent[] components; 5908 string original; 5909 /++ 5910 Parses the selector string and returns the usable structure. 5911 +/ 5912 this(string cssSelector) { 5913 components = parseSelectorString(cssSelector); 5914 original = cssSelector; 5915 } 5916 5917 /++ 5918 Returns true if the given element matches this selector, 5919 considered relative to an arbitrary element. 5920 5921 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 5922 with [std.algorithm.iteration.filter]: 5923 5924 --- 5925 Selector sel = Selector("foo > bar"); 5926 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 5927 --- 5928 +/ 5929 bool matchesElement(Element e, Element relativeTo = null) { 5930 foreach(component; components) 5931 if(component.matchElement(e, relativeTo)) 5932 return true; 5933 5934 return false; 5935 } 5936 5937 /++ 5938 Reciprocal of [Element.querySelectorAll] 5939 +/ 5940 Element[] getMatchingElements(Element start) { 5941 Element[] ret; 5942 foreach(component; components) 5943 ret ~= getElementsBySelectorParts(start, component.parts); 5944 return removeDuplicates(ret); 5945 } 5946 5947 /++ 5948 Like [getMatchingElements], but returns a lazy range. Be careful 5949 about mutating the dom as you iterate through this. 5950 +/ 5951 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 5952 import std.algorithm.iteration; 5953 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 5954 } 5955 5956 5957 /// Returns the string this was built from 5958 string toString() { 5959 return original; 5960 } 5961 5962 /++ 5963 Returns a string from the parsed result 5964 5965 5966 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 5967 +/ 5968 string parsedToString() { 5969 string ret; 5970 5971 foreach(idx, component; components) { 5972 if(idx) ret ~= ", "; 5973 ret ~= component.toString(); 5974 } 5975 5976 return ret; 5977 } 5978 } 5979 5980 ///. 5981 struct SelectorComponent { 5982 ///. 5983 SelectorPart[] parts; 5984 5985 ///. 5986 string toString() { 5987 string ret; 5988 foreach(part; parts) 5989 ret ~= part.toString(); 5990 return ret; 5991 } 5992 5993 // USEFUL 5994 ///. 5995 Element[] getElements(Element start) { 5996 return removeDuplicates(getElementsBySelectorParts(start, parts)); 5997 } 5998 5999 // USEFUL (but not implemented) 6000 /// If relativeTo == null, it assumes the root of the parent document. 6001 bool matchElement(Element e, Element relativeTo = null) { 6002 if(e is null) return false; 6003 Element where = e; 6004 int lastSeparation = -1; 6005 foreach(part; retro(parts)) { 6006 6007 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 6008 6009 if(lastSeparation == -1) { 6010 if(!part.matchElement(where)) 6011 return false; 6012 } else if(lastSeparation == 0) { // generic parent 6013 // need to go up the whole chain 6014 where = where.parentNode; 6015 while(where !is null) { 6016 if(part.matchElement(where)) 6017 break; 6018 6019 if(where is relativeTo) 6020 return false; 6021 6022 where = where.parentNode; 6023 } 6024 6025 if(where is null) 6026 return false; 6027 } else if(lastSeparation == 1) { // the > operator 6028 where = where.parentNode; 6029 6030 if(!part.matchElement(where)) 6031 return false; 6032 } else if(lastSeparation == 2) { // the + operator 6033 where = where.previousSibling("*"); 6034 6035 if(!part.matchElement(where)) 6036 return false; 6037 } else if(lastSeparation == 3) { // the ~ operator 6038 where = where.previousSibling("*"); 6039 while(where !is null) { 6040 if(part.matchElement(where)) 6041 break; 6042 6043 if(where is relativeTo) 6044 return false; 6045 6046 where = where.previousSibling("*"); 6047 } 6048 6049 if(where is null) 6050 return false; 6051 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 6052 // FIXME 6053 } 6054 6055 lastSeparation = part.separation; 6056 6057 if(where is relativeTo) 6058 return false; // at end of line, if we aren't done by now, the match fails 6059 } 6060 return true; // if we got here, it is a success 6061 } 6062 6063 // the string should NOT have commas. Use parseSelectorString for that instead 6064 ///. 6065 static SelectorComponent fromString(string selector) { 6066 return parseSelector(lexSelector(selector)); 6067 } 6068 } 6069 6070 ///. 6071 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 6072 SelectorComponent[] ret; 6073 auto tokens = lexSelector(selector); // this will parse commas too 6074 // and now do comma-separated slices (i haz phobosophobia!) 6075 while (tokens.length > 0) { 6076 size_t end = 0; 6077 while (end < tokens.length && tokens[end] != ",") ++end; 6078 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 6079 if (tokens.length-end < 2) break; 6080 tokens = tokens[end+1..$]; 6081 } 6082 return ret; 6083 } 6084 6085 ///. 6086 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 6087 SelectorComponent s; 6088 6089 SelectorPart current; 6090 void commit() { 6091 // might as well skip null items 6092 if(!current.isCleanSlateExceptSeparation()) { 6093 s.parts ~= current; 6094 current = current.init; // start right over 6095 } 6096 } 6097 enum State { 6098 Starting, 6099 ReadingClass, 6100 ReadingId, 6101 ReadingAttributeSelector, 6102 ReadingAttributeComparison, 6103 ExpectingAttributeCloser, 6104 ReadingPseudoClass, 6105 ReadingAttributeValue, 6106 6107 SkippingFunctionalSelector, 6108 } 6109 State state = State.Starting; 6110 string attributeName, attributeValue, attributeComparison; 6111 int parensCount; 6112 foreach(idx, token; tokens) { 6113 string readFunctionalSelector() { 6114 string s; 6115 if(tokens[idx + 1] != "(") 6116 throw new Exception("parse error"); 6117 int pc = 1; 6118 foreach(t; tokens[idx + 2 .. $]) { 6119 if(t == "(") 6120 pc++; 6121 if(t == ")") 6122 pc--; 6123 if(pc == 0) 6124 break; 6125 s ~= t; 6126 } 6127 6128 return s; 6129 } 6130 6131 sizediff_t tid = -1; 6132 foreach(i, item; selectorTokens) 6133 if(token == item) { 6134 tid = i; 6135 break; 6136 } 6137 final switch(state) { 6138 case State.Starting: // fresh, might be reading an operator or a tagname 6139 if(tid == -1) { 6140 if(!caseSensitiveTags) 6141 token = token.toLower(); 6142 6143 if(current.isCleanSlateExceptSeparation()) { 6144 current.tagNameFilter = token; 6145 } else { 6146 // if it was already set, we must see two thingies 6147 // separated by whitespace... 6148 commit(); 6149 current.separation = 0; // tree 6150 current.tagNameFilter = token; 6151 } 6152 } else { 6153 // Selector operators 6154 switch(token) { 6155 case "*": 6156 current.tagNameFilter = "*"; 6157 break; 6158 case " ": 6159 // If some other separation has already been set, 6160 // this is irrelevant whitespace, so we should skip it. 6161 // this happens in the case of "foo > bar" for example. 6162 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 6163 continue; 6164 commit(); 6165 current.separation = 0; // tree 6166 break; 6167 case ">>": 6168 commit(); 6169 current.separation = 0; // alternate syntax for tree from html5 css 6170 break; 6171 case ">": 6172 commit(); 6173 current.separation = 1; // child 6174 break; 6175 case "+": 6176 commit(); 6177 current.separation = 2; // sibling directly after 6178 break; 6179 case "~": 6180 commit(); 6181 current.separation = 3; // any sibling after 6182 break; 6183 case "<": 6184 commit(); 6185 current.separation = 4; // immediate parent of 6186 break; 6187 case "[": 6188 state = State.ReadingAttributeSelector; 6189 break; 6190 case ".": 6191 state = State.ReadingClass; 6192 break; 6193 case "#": 6194 state = State.ReadingId; 6195 break; 6196 case ":": 6197 case "::": 6198 state = State.ReadingPseudoClass; 6199 break; 6200 6201 default: 6202 assert(0, token); 6203 } 6204 } 6205 break; 6206 case State.ReadingClass: 6207 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 6208 state = State.Starting; 6209 break; 6210 case State.ReadingId: 6211 current.attributesEqual ~= ["id", token]; 6212 state = State.Starting; 6213 break; 6214 case State.ReadingPseudoClass: 6215 switch(token) { 6216 case "first-of-type": 6217 current.firstOfType = true; 6218 break; 6219 case "last-of-type": 6220 current.lastOfType = true; 6221 break; 6222 case "only-of-type": 6223 current.firstOfType = true; 6224 current.lastOfType = true; 6225 break; 6226 case "first-child": 6227 current.firstChild = true; 6228 break; 6229 case "last-child": 6230 current.lastChild = true; 6231 break; 6232 case "only-child": 6233 current.firstChild = true; 6234 current.lastChild = true; 6235 break; 6236 case "empty": 6237 // one with no children 6238 current.emptyElement = true; 6239 break; 6240 case "whitespace-only": 6241 current.whitespaceOnly = true; 6242 break; 6243 case "link": 6244 current.attributesPresent ~= "href"; 6245 break; 6246 case "root": 6247 current.rootElement = true; 6248 break; 6249 case "nth-child": 6250 current.nthChild ~= ParsedNth(readFunctionalSelector()); 6251 state = State.SkippingFunctionalSelector; 6252 continue; 6253 case "nth-of-type": 6254 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 6255 state = State.SkippingFunctionalSelector; 6256 continue; 6257 case "nth-last-of-type": 6258 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 6259 state = State.SkippingFunctionalSelector; 6260 continue; 6261 case "not": 6262 state = State.SkippingFunctionalSelector; 6263 current.notSelectors ~= readFunctionalSelector(); 6264 continue; // now the rest of the parser skips past the parens we just handled 6265 case "has": 6266 state = State.SkippingFunctionalSelector; 6267 current.hasSelectors ~= readFunctionalSelector(); 6268 continue; // now the rest of the parser skips past the parens we just handled 6269 // back to standards though not quite right lol 6270 case "disabled": 6271 current.attributesPresent ~= "disabled"; 6272 break; 6273 case "checked": 6274 current.attributesPresent ~= "checked"; 6275 break; 6276 6277 case "visited", "active", "hover", "target", "focus", "selected": 6278 current.attributesPresent ~= "nothing"; 6279 // FIXME 6280 /* 6281 // defined in the standard, but I don't implement it 6282 case "not": 6283 */ 6284 /+ 6285 // extensions not implemented 6286 //case "text": // takes the text in the element and wraps it in an element, returning it 6287 +/ 6288 goto case; 6289 case "before", "after": 6290 current.attributesPresent ~= "FIXME"; 6291 6292 break; 6293 // My extensions 6294 case "odd-child": 6295 current.oddChild = true; 6296 break; 6297 case "even-child": 6298 current.evenChild = true; 6299 break; 6300 default: 6301 //if(token.indexOf("lang") == -1) 6302 //assert(0, token); 6303 break; 6304 } 6305 state = State.Starting; 6306 break; 6307 case State.SkippingFunctionalSelector: 6308 if(token == "(") { 6309 parensCount++; 6310 } else if(token == ")") { 6311 parensCount--; 6312 } 6313 6314 if(parensCount == 0) 6315 state = State.Starting; 6316 break; 6317 case State.ReadingAttributeSelector: 6318 attributeName = token; 6319 attributeComparison = null; 6320 attributeValue = null; 6321 state = State.ReadingAttributeComparison; 6322 break; 6323 case State.ReadingAttributeComparison: 6324 // FIXME: these things really should be quotable in the proper lexer... 6325 if(token != "]") { 6326 if(token.indexOf("=") == -1) { 6327 // not a comparison; consider it 6328 // part of the attribute 6329 attributeValue ~= token; 6330 } else { 6331 attributeComparison = token; 6332 state = State.ReadingAttributeValue; 6333 } 6334 break; 6335 } 6336 goto case; 6337 case State.ExpectingAttributeCloser: 6338 if(token != "]") { 6339 // not the closer; consider it part of comparison 6340 if(attributeComparison == "") 6341 attributeName ~= token; 6342 else 6343 attributeValue ~= token; 6344 break; 6345 } 6346 6347 // Selector operators 6348 switch(attributeComparison) { 6349 default: assert(0); 6350 case "": 6351 current.attributesPresent ~= attributeName; 6352 break; 6353 case "=": 6354 current.attributesEqual ~= [attributeName, attributeValue]; 6355 break; 6356 case "|=": 6357 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 6358 break; 6359 case "~=": 6360 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 6361 break; 6362 case "$=": 6363 current.attributesEndsWith ~= [attributeName, attributeValue]; 6364 break; 6365 case "^=": 6366 current.attributesStartsWith ~= [attributeName, attributeValue]; 6367 break; 6368 case "*=": 6369 current.attributesInclude ~= [attributeName, attributeValue]; 6370 break; 6371 case "!=": 6372 current.attributesNotEqual ~= [attributeName, attributeValue]; 6373 break; 6374 } 6375 6376 state = State.Starting; 6377 break; 6378 case State.ReadingAttributeValue: 6379 attributeValue = token; 6380 state = State.ExpectingAttributeCloser; 6381 break; 6382 } 6383 } 6384 6385 commit(); 6386 6387 return s; 6388 } 6389 6390 ///. 6391 Element[] removeDuplicates(Element[] input) { 6392 Element[] ret; 6393 6394 bool[Element] already; 6395 foreach(e; input) { 6396 if(e in already) continue; 6397 already[e] = true; 6398 ret ~= e; 6399 } 6400 6401 return ret; 6402 } 6403 6404 // done with CSS selector handling 6405 6406 6407 // FIXME: use the better parser from html.d 6408 /// This is probably not useful to you unless you're writing a browser or something like that. 6409 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 6410 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 6411 class CssStyle { 6412 ///. 6413 this(string rule, string content) { 6414 rule = rule.strip(); 6415 content = content.strip(); 6416 6417 if(content.length == 0) 6418 return; 6419 6420 originatingRule = rule; 6421 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 6422 6423 foreach(part; content.split(";")) { 6424 part = part.strip(); 6425 if(part.length == 0) 6426 continue; 6427 auto idx = part.indexOf(":"); 6428 if(idx == -1) 6429 continue; 6430 //throw new Exception("Bad css rule (no colon): " ~ part); 6431 6432 Property p; 6433 6434 p.name = part[0 .. idx].strip(); 6435 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 6436 p.givenExplicitly = true; 6437 p.specificity = originatingSpecificity; 6438 6439 properties ~= p; 6440 } 6441 6442 foreach(property; properties) 6443 expandShortForm(property, originatingSpecificity); 6444 } 6445 6446 ///. 6447 Specificity getSpecificityOfRule(string rule) { 6448 Specificity s; 6449 if(rule.length == 0) { // inline 6450 // s.important = 2; 6451 } else { 6452 // FIXME 6453 } 6454 6455 return s; 6456 } 6457 6458 string originatingRule; ///. 6459 Specificity originatingSpecificity; ///. 6460 6461 ///. 6462 union Specificity { 6463 uint score; ///. 6464 // version(little_endian) 6465 ///. 6466 struct { 6467 ubyte tags; ///. 6468 ubyte classes; ///. 6469 ubyte ids; ///. 6470 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 6471 } 6472 } 6473 6474 ///. 6475 struct Property { 6476 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 6477 string name; ///. 6478 string value; ///. 6479 Specificity specificity; ///. 6480 // do we care about the original source rule? 6481 } 6482 6483 ///. 6484 Property[] properties; 6485 6486 ///. 6487 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 6488 string name = unCamelCase(nameGiven); 6489 if(value is null) 6490 return getValue(name); 6491 else 6492 return setValue(name, value, 0x02000000 /* inline specificity */); 6493 } 6494 6495 /// takes dash style name 6496 string getValue(string name) { 6497 foreach(property; properties) 6498 if(property.name == name) 6499 return property.value; 6500 return null; 6501 } 6502 6503 /// takes dash style name 6504 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 6505 value = value.replace("! important", "!important"); 6506 if(value.indexOf("!important") != -1) { 6507 newSpecificity.important = 1; // FIXME 6508 value = value.replace("!important", "").strip(); 6509 } 6510 6511 foreach(ref property; properties) 6512 if(property.name == name) { 6513 if(newSpecificity.score >= property.specificity.score) { 6514 property.givenExplicitly = explicit; 6515 expandShortForm(property, newSpecificity); 6516 return (property.value = value); 6517 } else { 6518 if(name == "display") 6519 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 6520 return value; // do nothing - the specificity is too low 6521 } 6522 } 6523 6524 // it's not here... 6525 6526 Property p; 6527 p.givenExplicitly = true; 6528 p.name = name; 6529 p.value = value; 6530 p.specificity = originatingSpecificity; 6531 6532 properties ~= p; 6533 expandShortForm(p, originatingSpecificity); 6534 6535 return value; 6536 } 6537 6538 private void expandQuadShort(string name, string value, Specificity specificity) { 6539 auto parts = value.split(" "); 6540 switch(parts.length) { 6541 case 1: 6542 setValue(name ~"-left", parts[0], specificity, false); 6543 setValue(name ~"-right", parts[0], specificity, false); 6544 setValue(name ~"-top", parts[0], specificity, false); 6545 setValue(name ~"-bottom", parts[0], specificity, false); 6546 break; 6547 case 2: 6548 setValue(name ~"-left", parts[1], specificity, false); 6549 setValue(name ~"-right", parts[1], specificity, false); 6550 setValue(name ~"-top", parts[0], specificity, false); 6551 setValue(name ~"-bottom", parts[0], specificity, false); 6552 break; 6553 case 3: 6554 setValue(name ~"-top", parts[0], specificity, false); 6555 setValue(name ~"-right", parts[1], specificity, false); 6556 setValue(name ~"-bottom", parts[2], specificity, false); 6557 setValue(name ~"-left", parts[2], specificity, false); 6558 6559 break; 6560 case 4: 6561 setValue(name ~"-top", parts[0], specificity, false); 6562 setValue(name ~"-right", parts[1], specificity, false); 6563 setValue(name ~"-bottom", parts[2], specificity, false); 6564 setValue(name ~"-left", parts[3], specificity, false); 6565 break; 6566 default: 6567 assert(0, value); 6568 } 6569 } 6570 6571 ///. 6572 void expandShortForm(Property p, Specificity specificity) { 6573 switch(p.name) { 6574 case "margin": 6575 case "padding": 6576 expandQuadShort(p.name, p.value, specificity); 6577 break; 6578 case "border": 6579 case "outline": 6580 setValue(p.name ~ "-left", p.value, specificity, false); 6581 setValue(p.name ~ "-right", p.value, specificity, false); 6582 setValue(p.name ~ "-top", p.value, specificity, false); 6583 setValue(p.name ~ "-bottom", p.value, specificity, false); 6584 break; 6585 6586 case "border-top": 6587 case "border-bottom": 6588 case "border-left": 6589 case "border-right": 6590 case "outline-top": 6591 case "outline-bottom": 6592 case "outline-left": 6593 case "outline-right": 6594 6595 default: {} 6596 } 6597 } 6598 6599 ///. 6600 override string toString() { 6601 string ret; 6602 if(originatingRule.length) 6603 ret = originatingRule ~ " {"; 6604 6605 foreach(property; properties) { 6606 if(!property.givenExplicitly) 6607 continue; // skip the inferred shit 6608 6609 if(originatingRule.length) 6610 ret ~= "\n\t"; 6611 else 6612 ret ~= " "; 6613 6614 ret ~= property.name ~ ": " ~ property.value ~ ";"; 6615 } 6616 6617 if(originatingRule.length) 6618 ret ~= "\n}\n"; 6619 6620 return ret; 6621 } 6622 } 6623 6624 string cssUrl(string url) { 6625 return "url(\"" ~ url ~ "\")"; 6626 } 6627 6628 /// This probably isn't useful, unless you're writing a browser or something like that. 6629 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 6630 /// as text. 6631 /// 6632 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 6633 /// that you can apply to your documents to build the complete computedStyle object. 6634 class StyleSheet { 6635 ///. 6636 CssStyle[] rules; 6637 6638 ///. 6639 this(string source) { 6640 // FIXME: handle @ rules and probably could improve lexer 6641 // add nesting? 6642 int state; 6643 string currentRule; 6644 string currentValue; 6645 6646 string* currentThing = ¤tRule; 6647 foreach(c; source) { 6648 handle: switch(state) { 6649 default: assert(0); 6650 case 0: // starting - we assume we're reading a rule 6651 switch(c) { 6652 case '@': 6653 state = 4; 6654 break; 6655 case '/': 6656 state = 1; 6657 break; 6658 case '{': 6659 currentThing = ¤tValue; 6660 break; 6661 case '}': 6662 if(currentThing is ¤tValue) { 6663 rules ~= new CssStyle(currentRule, currentValue); 6664 6665 currentRule = ""; 6666 currentValue = ""; 6667 6668 currentThing = ¤tRule; 6669 } else { 6670 // idk what is going on here. 6671 // check sveit.com to reproduce 6672 currentRule = ""; 6673 currentValue = ""; 6674 } 6675 break; 6676 default: 6677 (*currentThing) ~= c; 6678 } 6679 break; 6680 case 1: // expecting * 6681 if(c == '*') 6682 state = 2; 6683 else { 6684 state = 0; 6685 (*currentThing) ~= "/" ~ c; 6686 } 6687 break; 6688 case 2: // inside comment 6689 if(c == '*') 6690 state = 3; 6691 break; 6692 case 3: // expecting / to end comment 6693 if(c == '/') 6694 state = 0; 6695 else 6696 state = 2; // it's just a comment so no need to append 6697 break; 6698 case 4: 6699 if(c == '{') 6700 state = 5; 6701 if(c == ';') 6702 state = 0; // just skipping import 6703 break; 6704 case 5: 6705 if(c == '}') 6706 state = 0; // skipping font face probably 6707 } 6708 } 6709 } 6710 6711 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 6712 void apply(Document document) { 6713 foreach(rule; rules) { 6714 if(rule.originatingRule.length == 0) 6715 continue; // this shouldn't happen here in a stylesheet 6716 foreach(element; document.querySelectorAll(rule.originatingRule)) { 6717 // note: this should be a different object than the inline style 6718 // since givenExplicitly is likely destroyed here 6719 auto current = element.computedStyle; 6720 6721 foreach(item; rule.properties) 6722 current.setValue(item.name, item.value, item.specificity); 6723 } 6724 } 6725 } 6726 } 6727 6728 6729 /// This is kinda private; just a little utility container for use by the ElementStream class. 6730 final class Stack(T) { 6731 this() { 6732 internalLength = 0; 6733 arr = initialBuffer[]; 6734 } 6735 6736 ///. 6737 void push(T t) { 6738 if(internalLength >= arr.length) { 6739 auto oldarr = arr; 6740 if(arr.length < 4096) 6741 arr = new T[arr.length * 2]; 6742 else 6743 arr = new T[arr.length + 4096]; 6744 arr[0 .. oldarr.length] = oldarr[]; 6745 } 6746 6747 arr[internalLength] = t; 6748 internalLength++; 6749 } 6750 6751 ///. 6752 T pop() { 6753 assert(internalLength); 6754 internalLength--; 6755 return arr[internalLength]; 6756 } 6757 6758 ///. 6759 T peek() { 6760 assert(internalLength); 6761 return arr[internalLength - 1]; 6762 } 6763 6764 ///. 6765 @property bool empty() { 6766 return internalLength ? false : true; 6767 } 6768 6769 ///. 6770 private T[] arr; 6771 private size_t internalLength; 6772 private T[64] initialBuffer; 6773 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 6774 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 6775 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 6776 } 6777 6778 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 6779 final class ElementStream { 6780 6781 ///. 6782 @property Element front() { 6783 return current.element; 6784 } 6785 6786 /// Use Element.tree instead. 6787 this(Element start) { 6788 current.element = start; 6789 current.childPosition = -1; 6790 isEmpty = false; 6791 stack = new Stack!(Current); 6792 } 6793 6794 /* 6795 Handle it 6796 handle its children 6797 6798 */ 6799 6800 ///. 6801 void popFront() { 6802 more: 6803 if(isEmpty) return; 6804 6805 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 6806 6807 current.childPosition++; 6808 if(current.childPosition >= current.element.children.length) { 6809 if(stack.empty()) 6810 isEmpty = true; 6811 else { 6812 current = stack.pop(); 6813 goto more; 6814 } 6815 } else { 6816 stack.push(current); 6817 current.element = current.element.children[current.childPosition]; 6818 current.childPosition = -1; 6819 } 6820 } 6821 6822 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 6823 void currentKilled() { 6824 if(stack.empty) // should never happen 6825 isEmpty = true; 6826 else { 6827 current = stack.pop(); 6828 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 6829 } 6830 } 6831 6832 ///. 6833 @property bool empty() { 6834 return isEmpty; 6835 } 6836 6837 private: 6838 6839 struct Current { 6840 Element element; 6841 int childPosition; 6842 } 6843 6844 Current current; 6845 6846 Stack!(Current) stack; 6847 6848 bool isEmpty; 6849 } 6850 6851 6852 6853 // unbelievable. 6854 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 6855 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 6856 static import std.algorithm; 6857 auto found = std.algorithm.find(haystack, needle); 6858 if(found.length == 0) 6859 return -1; 6860 return haystack.length - found.length; 6861 } 6862 6863 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 6864 assert(position < arr.length); 6865 T[] ret; 6866 ret.length = arr.length + what.length; 6867 int a = 0; 6868 foreach(i; arr[0..position+1]) 6869 ret[a++] = i; 6870 6871 foreach(i; what) 6872 ret[a++] = i; 6873 6874 foreach(i; arr[position+1..$]) 6875 ret[a++] = i; 6876 6877 return ret; 6878 } 6879 6880 package bool isInArray(T)(T item, T[] arr) { 6881 foreach(i; arr) 6882 if(item == i) 6883 return true; 6884 return false; 6885 } 6886 6887 private string[string] aadup(in string[string] arr) { 6888 string[string] ret; 6889 foreach(k, v; arr) 6890 ret[k] = v; 6891 return ret; 6892 } 6893 6894 // dom event support, if you want to use it 6895 6896 /// used for DOM events 6897 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 6898 6899 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 6900 class Event { 6901 this(string eventName, Element target) { 6902 this.eventName = eventName; 6903 this.srcElement = target; 6904 } 6905 6906 /// Prevents the default event handler (if there is one) from being called 6907 void preventDefault() { 6908 defaultPrevented = true; 6909 } 6910 6911 /// Stops the event propagation immediately. 6912 void stopPropagation() { 6913 propagationStopped = true; 6914 } 6915 6916 bool defaultPrevented; 6917 bool propagationStopped; 6918 string eventName; 6919 6920 Element srcElement; 6921 alias srcElement target; 6922 6923 Element relatedTarget; 6924 6925 int clientX; 6926 int clientY; 6927 6928 int button; 6929 6930 bool isBubbling; 6931 6932 /// this sends it only to the target. If you want propagation, use dispatch() instead. 6933 void send() { 6934 if(srcElement is null) 6935 return; 6936 6937 auto e = srcElement; 6938 6939 if(eventName in e.bubblingEventHandlers) 6940 foreach(handler; e.bubblingEventHandlers[eventName]) 6941 handler(e, this); 6942 6943 if(!defaultPrevented) 6944 if(eventName in e.defaultEventHandlers) 6945 e.defaultEventHandlers[eventName](e, this); 6946 } 6947 6948 /// this dispatches the element using the capture -> target -> bubble process 6949 void dispatch() { 6950 if(srcElement is null) 6951 return; 6952 6953 // first capture, then bubble 6954 6955 Element[] chain; 6956 Element curr = srcElement; 6957 while(curr) { 6958 auto l = curr; 6959 chain ~= l; 6960 curr = curr.parentNode; 6961 6962 } 6963 6964 isBubbling = false; 6965 6966 foreach(e; chain.retro()) { 6967 if(eventName in e.capturingEventHandlers) 6968 foreach(handler; e.capturingEventHandlers[eventName]) 6969 handler(e, this); 6970 6971 // the default on capture should really be to always do nothing 6972 6973 //if(!defaultPrevented) 6974 // if(eventName in e.defaultEventHandlers) 6975 // e.defaultEventHandlers[eventName](e.element, this); 6976 6977 if(propagationStopped) 6978 break; 6979 } 6980 6981 isBubbling = true; 6982 if(!propagationStopped) 6983 foreach(e; chain) { 6984 if(eventName in e.bubblingEventHandlers) 6985 foreach(handler; e.bubblingEventHandlers[eventName]) 6986 handler(e, this); 6987 6988 if(propagationStopped) 6989 break; 6990 } 6991 6992 if(!defaultPrevented) 6993 foreach(e; chain) { 6994 if(eventName in e.defaultEventHandlers) 6995 e.defaultEventHandlers[eventName](e, this); 6996 } 6997 } 6998 } 6999 7000 struct FormFieldOptions { 7001 // usable for any 7002 7003 /// this is a regex pattern used to validate the field 7004 string pattern; 7005 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 7006 bool isRequired; 7007 /// this is displayed as an example to the user 7008 string placeholder; 7009 7010 // usable for numeric ones 7011 7012 7013 // convenience methods to quickly get some options 7014 @property static FormFieldOptions none() { 7015 FormFieldOptions f; 7016 return f; 7017 } 7018 7019 static FormFieldOptions required() { 7020 FormFieldOptions f; 7021 f.isRequired = true; 7022 return f; 7023 } 7024 7025 static FormFieldOptions regex(string pattern, bool required = false) { 7026 FormFieldOptions f; 7027 f.pattern = pattern; 7028 f.isRequired = required; 7029 return f; 7030 } 7031 7032 static FormFieldOptions fromElement(Element e) { 7033 FormFieldOptions f; 7034 if(e.hasAttribute("required")) 7035 f.isRequired = true; 7036 if(e.hasAttribute("pattern")) 7037 f.pattern = e.pattern; 7038 if(e.hasAttribute("placeholder")) 7039 f.placeholder = e.placeholder; 7040 return f; 7041 } 7042 7043 Element applyToElement(Element e) { 7044 if(this.isRequired) 7045 e.required = "required"; 7046 if(this.pattern.length) 7047 e.pattern = this.pattern; 7048 if(this.placeholder.length) 7049 e.placeholder = this.placeholder; 7050 return e; 7051 } 7052 } 7053 7054 // this needs to look just like a string, but can expand as needed 7055 version(no_dom_stream) 7056 alias string Utf8Stream; 7057 else 7058 class Utf8Stream { 7059 protected: 7060 // these two should be overridden in subclasses to actually do the stream magic 7061 string getMore() { 7062 if(getMoreHelper !is null) 7063 return getMoreHelper(); 7064 return null; 7065 } 7066 7067 bool hasMore() { 7068 if(hasMoreHelper !is null) 7069 return hasMoreHelper(); 7070 return false; 7071 } 7072 // the rest should be ok 7073 7074 public: 7075 this(string d) { 7076 this.data = d; 7077 } 7078 7079 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 7080 this.getMoreHelper = getMoreHelper; 7081 this.hasMoreHelper = hasMoreHelper; 7082 7083 if(hasMore()) 7084 this.data ~= getMore(); 7085 7086 stdout.flush(); 7087 } 7088 7089 @property final size_t length() { 7090 // the parser checks length primarily directly before accessing the next character 7091 // so this is the place we'll hook to append more if possible and needed. 7092 if(lastIdx + 1 >= data.length && hasMore()) { 7093 data ~= getMore(); 7094 } 7095 return data.length; 7096 } 7097 7098 final char opIndex(size_t idx) { 7099 if(idx > lastIdx) 7100 lastIdx = idx; 7101 return data[idx]; 7102 } 7103 7104 final string opSlice(size_t start, size_t end) { 7105 if(end > lastIdx) 7106 lastIdx = end; 7107 return data[start .. end]; 7108 } 7109 7110 final size_t opDollar() { 7111 return length(); 7112 } 7113 7114 final Utf8Stream opBinary(string op : "~")(string s) { 7115 this.data ~= s; 7116 return this; 7117 } 7118 7119 final Utf8Stream opOpAssign(string op : "~")(string s) { 7120 this.data ~= s; 7121 return this; 7122 } 7123 7124 final Utf8Stream opAssign(string rhs) { 7125 this.data = rhs; 7126 return this; 7127 } 7128 private: 7129 string data; 7130 7131 size_t lastIdx; 7132 7133 bool delegate() hasMoreHelper; 7134 string delegate() getMoreHelper; 7135 7136 7137 /+ 7138 // used to maybe clear some old stuff 7139 // you might have to remove elements parsed with it too since they can hold slices into the 7140 // old stuff, preventing gc 7141 void dropFront(int bytes) { 7142 posAdjustment += bytes; 7143 data = data[bytes .. $]; 7144 } 7145 7146 int posAdjustment; 7147 +/ 7148 } 7149 7150 void fillForm(T)(Form form, T obj, string name) { 7151 import arsd.database; 7152 fillData((k, v) => form.setValue(k, v), obj, name); 7153 } 7154 7155 7156 /+ 7157 /+ 7158 Syntax: 7159 7160 Tag: tagname#id.class 7161 Tree: Tag(Children, comma, separated...) 7162 Children: Tee or Variable 7163 Variable: $varname with optional |funcname following. 7164 7165 If a variable has a tree after it, it breaks the variable down: 7166 * if array, foreach it does the tree 7167 * if struct, it breaks down the member variables 7168 7169 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 7170 +/ 7171 struct Stringplate { 7172 /++ 7173 7174 +/ 7175 this(string s) { 7176 7177 } 7178 7179 /++ 7180 7181 +/ 7182 Element expand(T...)(T vars) { 7183 return null; 7184 } 7185 } 7186 /// 7187 unittest { 7188 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 7189 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 7190 } 7191 +/ 7192 7193 bool allAreInlineHtml(const(Element)[] children) { 7194 foreach(child; children) { 7195 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 7196 // cool 7197 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 7198 // cool 7199 } else { 7200 // prolly block 7201 return false; 7202 } 7203 } 7204 return true; 7205 } 7206 7207 private bool isSimpleWhite(dchar c) { 7208 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 7209 } 7210 7211 /* 7212 Copyright: Adam D. Ruppe, 2010 - 2019 7213 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 7214 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 7215 7216 Copyright Adam D. Ruppe 2010-2019. 7217 Distributed under the Boost Software License, Version 1.0. 7218 (See accompanying file LICENSE_1_0.txt or copy at 7219 http://www.boost.org/LICENSE_1_0.txt) 7220 */ 7221 7222 7223 unittest { 7224 // Test for issue #120 7225 string s = `<html> 7226 <body> 7227 <P>AN 7228 <P>bubbles</P> 7229 <P>giggles</P> 7230 </body> 7231 </html>`; 7232 auto doc = new Document(); 7233 doc.parseUtf8(s, false, false); 7234 auto s2 = doc.toString(); 7235 assert( 7236 s2.indexOf("bubbles") < s2.indexOf("giggles"), 7237 "paragraph order incorrect:\n" ~ s2); 7238 }