1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource { 98 /// Convenience method for web scraping. Requires [arsd.http2] to be 99 /// included in the build as well as [arsd.characterencodings]. 100 static Document fromUrl()(string url, bool strictMode = false) { 101 import arsd.http2; 102 auto client = new HttpClient(); 103 104 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 105 auto res = req.waitForCompletion(); 106 107 auto document = new Document(); 108 if(strictMode) { 109 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 110 } else { 111 document.parseGarbage(cast(string) res.content); 112 } 113 114 return document; 115 } 116 117 ///. 118 this(string data, bool caseSensitive = false, bool strict = false) { 119 parseUtf8(data, caseSensitive, strict); 120 } 121 122 /** 123 Creates an empty document. It has *nothing* in it at all. 124 */ 125 this() { 126 127 } 128 129 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 130 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 131 /// can chain it. 132 /// 133 /// Example: document["p"].innerText("hello").addClass("modified"); 134 /// 135 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 136 /// 137 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 138 /// 139 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 140 /// you could put in some kind of custom filter function tho. 141 ElementCollection opIndex(string selector) { 142 auto e = ElementCollection(this.root); 143 return e[selector]; 144 } 145 146 string _contentType = "text/html; charset=utf-8"; 147 148 /// If you're using this for some other kind of XML, you can 149 /// set the content type here. 150 /// 151 /// Note: this has no impact on the function of this class. 152 /// It is only used if the document is sent via a protocol like HTTP. 153 /// 154 /// This may be called by parse() if it recognizes the data. Otherwise, 155 /// if you don't set it, it assumes text/html; charset=utf-8. 156 @property string contentType(string mimeType) { 157 _contentType = mimeType; 158 return _contentType; 159 } 160 161 /// implementing the FileResource interface, useful for sending via 162 /// http automatically. 163 override @property string contentType() const { 164 return _contentType; 165 } 166 167 /// implementing the FileResource interface; it calls toString. 168 override immutable(ubyte)[] getData() const { 169 return cast(immutable(ubyte)[]) this.toString(); 170 } 171 172 173 /// Concatenates any consecutive text nodes 174 /* 175 void normalize() { 176 177 } 178 */ 179 180 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 181 /// Call this before calling parse(). 182 183 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 184 void enableAddingSpecialTagsToDom() { 185 parseSawComment = (string) => true; 186 parseSawAspCode = (string) => true; 187 parseSawPhpCode = (string) => true; 188 parseSawQuestionInstruction = (string) => true; 189 parseSawBangInstruction = (string) => true; 190 } 191 192 /// If the parser sees a html comment, it will call this callback 193 /// <!-- comment --> will call parseSawComment(" comment ") 194 /// Return true if you want the node appended to the document. 195 bool delegate(string) parseSawComment; 196 197 /// If the parser sees <% asp code... %>, it will call this callback. 198 /// It will be passed "% asp code... %" or "%= asp code .. %" 199 /// Return true if you want the node appended to the document. 200 bool delegate(string) parseSawAspCode; 201 202 /// If the parser sees <?php php code... ?>, it will call this callback. 203 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 204 /// Note: dom.d cannot identify the other php <? code ?> short format. 205 /// Return true if you want the node appended to the document. 206 bool delegate(string) parseSawPhpCode; 207 208 /// if it sees a <?xxx> that is not php or asp 209 /// it calls this function with the contents. 210 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 211 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 212 /// Return true if you want the node appended to the document. 213 bool delegate(string) parseSawQuestionInstruction; 214 215 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 216 /// it calls this function with the contents. 217 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 218 /// Return true if you want the node appended to the document. 219 bool delegate(string) parseSawBangInstruction; 220 221 /// Given the kind of garbage you find on the Internet, try to make sense of it. 222 /// Equivalent to document.parse(data, false, false, null); 223 /// (Case-insensitive, non-strict, determine character encoding from the data.) 224 225 /// NOTE: this makes no attempt at added security. 226 /// 227 /// It is a template so it lazily imports characterencodings. 228 void parseGarbage()(string data) { 229 parse(data, false, false, null); 230 } 231 232 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 233 /// Will throw exceptions on things like unclosed tags. 234 void parseStrict(string data) { 235 parseStream(toUtf8Stream(data), true, true); 236 } 237 238 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 239 /// tag soup, but does NOT try to correct bad character encodings. 240 /// 241 /// They will still throw an exception. 242 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 243 parseStream(toUtf8Stream(data), caseSensitive, strict); 244 } 245 246 // this is a template so we get lazy import behavior 247 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 248 import arsd.characterencodings; 249 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 250 if(dataEncoding is null) { 251 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 252 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 253 // Now, XML and HTML can both list encoding in the document, but we can't really parse 254 // it here without changing a lot of code until we know the encoding. So I'm going to 255 // do some hackish string checking. 256 if(dataEncoding is null) { 257 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 258 // first, look for an XML prolog 259 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 260 if(idx != -1) { 261 idx += "encoding=\"".length; 262 // we're probably past the prolog if it's this far in; we might be looking at 263 // content. Forget about it. 264 if(idx > 100) 265 idx = -1; 266 } 267 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 268 if(idx == -1) { 269 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 270 if(idx != -1) { 271 idx += "charset=".length; 272 if(dataAsBytes[idx] == '"') 273 idx++; 274 } 275 } 276 277 // found something in either branch... 278 if(idx != -1) { 279 // read till a quote or about 12 chars, whichever comes first... 280 auto end = idx; 281 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 282 end++; 283 284 dataEncoding = cast(string) dataAsBytes[idx .. end]; 285 } 286 // otherwise, we just don't know. 287 } 288 } 289 290 if(dataEncoding is null) { 291 if(strict) 292 throw new MarkupException("I couldn't figure out the encoding of this document."); 293 else 294 // if we really don't know by here, it means we already tried UTF-8, 295 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 296 // tags... let's assume it's Windows-1252, since that's probably the most 297 // common aside from utf that wouldn't be labeled. 298 299 dataEncoding = "Windows 1252"; 300 } 301 302 // and now, go ahead and convert it. 303 304 string data; 305 306 if(!strict) { 307 // if we're in non-strict mode, we need to check 308 // the document for mislabeling too; sometimes 309 // web documents will say they are utf-8, but aren't 310 // actually properly encoded. If it fails to validate, 311 // we'll assume it's actually Windows encoding - the most 312 // likely candidate for mislabeled garbage. 313 dataEncoding = dataEncoding.toLower(); 314 dataEncoding = dataEncoding.replace(" ", ""); 315 dataEncoding = dataEncoding.replace("-", ""); 316 dataEncoding = dataEncoding.replace("_", ""); 317 if(dataEncoding == "utf8") { 318 try { 319 validate(rawdata); 320 } catch(UTFException e) { 321 dataEncoding = "Windows 1252"; 322 } 323 } 324 } 325 326 if(dataEncoding != "UTF-8") { 327 if(strict) 328 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 329 else { 330 try { 331 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 332 } catch(Exception e) { 333 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 334 } 335 } 336 } else 337 data = rawdata; 338 339 return toUtf8Stream(data); 340 } 341 342 private 343 Utf8Stream toUtf8Stream(in string rawdata) { 344 string data = rawdata; 345 static if(is(Utf8Stream == string)) 346 return data; 347 else 348 return new Utf8Stream(data); 349 } 350 351 /** 352 Take XMLish data and try to make the DOM tree out of it. 353 354 The goal isn't to be perfect, but to just be good enough to 355 approximate Javascript's behavior. 356 357 If strict, it throws on something that doesn't make sense. 358 (Examples: mismatched tags. It doesn't validate!) 359 If not strict, it tries to recover anyway, and only throws 360 when something is REALLY unworkable. 361 362 If strict is false, it uses a magic list of tags that needn't 363 be closed. If you are writing a document specifically for this, 364 try to avoid such - use self closed tags at least. Easier to parse. 365 366 The dataEncoding argument can be used to pass a specific 367 charset encoding for automatic conversion. If null (which is NOT 368 the default!), it tries to determine from the data itself, 369 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 370 371 If this assumption is wrong, it can throw on non-ascii 372 characters! 373 374 375 Note that it previously assumed the data was encoded as UTF-8, which 376 is why the dataEncoding argument defaults to that. 377 378 So it shouldn't break backward compatibility. 379 380 But, if you want the best behavior on wild data - figuring it out from the document 381 instead of assuming - you'll probably want to change that argument to null. 382 383 This is a template so it lazily imports arsd.characterencodings, which is required 384 to fix up data encodings. 385 386 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 387 dependency. If it is data from the Internet though, a random website, the encoding 388 is often a lie. This function, if dataEncoding == null, can correct for that, or 389 you can try parseGarbage. In those cases, arsd.characterencodings is required to 390 compile. 391 */ 392 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 393 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 394 parseStream(data, caseSensitive, strict); 395 } 396 397 // note: this work best in strict mode, unless data is just a simple string wrapper 398 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 399 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 400 // of my big app. 401 402 assert(data !is null); 403 404 // go through character by character. 405 // if you see a <, consider it a tag. 406 // name goes until the first non tagname character 407 // then see if it self closes or has an attribute 408 409 // if not in a tag, anything not a tag is a big text 410 // node child. It ends as soon as it sees a < 411 412 // Whitespace in text or attributes is preserved, but not between attributes 413 414 // & and friends are converted when I know them, left the same otherwise 415 416 417 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 418 //validate(data); // it *must* be UTF-8 for this to work correctly 419 420 sizediff_t pos = 0; 421 422 clear(); 423 424 loose = !caseSensitive; 425 426 bool sawImproperNesting = false; 427 bool paragraphHackfixRequired = false; 428 429 int getLineNumber(sizediff_t p) { 430 int line = 1; 431 foreach(c; data[0..p]) 432 if(c == '\n') 433 line++; 434 return line; 435 } 436 437 void parseError(string message) { 438 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 439 } 440 441 bool eatWhitespace() { 442 bool ateAny = false; 443 while(pos < data.length && data[pos].isSimpleWhite) { 444 pos++; 445 ateAny = true; 446 } 447 return ateAny; 448 } 449 450 string readTagName() { 451 // remember to include : for namespaces 452 // basically just keep going until >, /, or whitespace 453 auto start = pos; 454 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 455 { 456 pos++; 457 if(pos == data.length) { 458 if(strict) 459 throw new Exception("tag name incomplete when file ended"); 460 else 461 break; 462 } 463 } 464 465 if(!caseSensitive) 466 return toLower(data[start..pos]); 467 else 468 return data[start..pos]; 469 } 470 471 string readAttributeName() { 472 // remember to include : for namespaces 473 // basically just keep going until >, /, or whitespace 474 auto start = pos; 475 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 476 { 477 if(data[pos] == '<') { 478 if(strict) 479 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 480 else 481 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 482 } 483 pos++; 484 if(pos == data.length) { 485 if(strict) 486 throw new Exception("unterminated attribute name"); 487 else 488 break; 489 } 490 } 491 492 if(!caseSensitive) 493 return toLower(data[start..pos]); 494 else 495 return data[start..pos]; 496 } 497 498 string readAttributeValue() { 499 if(pos >= data.length) { 500 if(strict) 501 throw new Exception("no attribute value before end of file"); 502 else 503 return null; 504 } 505 switch(data[pos]) { 506 case '\'': 507 case '"': 508 auto started = pos; 509 char end = data[pos]; 510 pos++; 511 auto start = pos; 512 while(pos < data.length && data[pos] != end) 513 pos++; 514 if(strict && pos == data.length) 515 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 516 string v = htmlEntitiesDecode(data[start..pos], strict); 517 pos++; // skip over the end 518 return v; 519 default: 520 if(strict) 521 parseError("Attributes must be quoted"); 522 // read until whitespace or terminator (/> or >) 523 auto start = pos; 524 while( 525 pos < data.length && 526 data[pos] != '>' && 527 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 528 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 529 !data[pos].isSimpleWhite) 530 pos++; 531 532 string v = htmlEntitiesDecode(data[start..pos], strict); 533 // don't skip the end - we'll need it later 534 return v; 535 } 536 } 537 538 TextNode readTextNode() { 539 auto start = pos; 540 while(pos < data.length && data[pos] != '<') { 541 pos++; 542 } 543 544 return TextNode.fromUndecodedString(this, data[start..pos]); 545 } 546 547 // this is obsolete! 548 RawSource readCDataNode() { 549 auto start = pos; 550 while(pos < data.length && data[pos] != '<') { 551 pos++; 552 } 553 554 return new RawSource(this, data[start..pos]); 555 } 556 557 558 struct Ele { 559 int type; // element or closing tag or nothing 560 /* 561 type == 0 means regular node, self-closed (element is valid) 562 type == 1 means closing tag (payload is the tag name, element may be valid) 563 type == 2 means you should ignore it completely 564 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 565 type == 4 means the document was totally empty 566 */ 567 Element element; // for type == 0 or type == 3 568 string payload; // for type == 1 569 } 570 // recursively read a tag 571 Ele readElement(string[] parentChain = null) { 572 // FIXME: this is the slowest function in this module, by far, even in strict mode. 573 // Loose mode should perform decently, but strict mode is the important one. 574 if(!strict && parentChain is null) 575 parentChain = []; 576 577 static string[] recentAutoClosedTags; 578 579 if(pos >= data.length) 580 { 581 if(strict) { 582 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 583 } else { 584 if(parentChain.length) 585 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 586 else 587 return Ele(4); // signal emptiness upstream 588 } 589 } 590 591 if(data[pos] != '<') { 592 return Ele(0, readTextNode(), null); 593 } 594 595 enforce(data[pos] == '<'); 596 pos++; 597 if(pos == data.length) { 598 if(strict) 599 throw new MarkupException("Found trailing < at end of file"); 600 // if not strict, we'll just skip the switch 601 } else 602 switch(data[pos]) { 603 // I don't care about these, so I just want to skip them 604 case '!': // might be a comment, a doctype, or a special instruction 605 pos++; 606 607 // FIXME: we should store these in the tree too 608 // though I like having it stripped out tbh. 609 610 if(pos == data.length) { 611 if(strict) 612 throw new MarkupException("<! opened at end of file"); 613 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 614 // comment 615 pos += 2; 616 617 // FIXME: technically, a comment is anything 618 // between -- and -- inside a <!> block. 619 // so in <!-- test -- lol> , the " lol" is NOT a comment 620 // and should probably be handled differently in here, but for now 621 // I'll just keep running until --> since that's the common way 622 623 auto commentStart = pos; 624 while(pos+3 < data.length && data[pos..pos+3] != "-->") 625 pos++; 626 627 auto end = commentStart; 628 629 if(pos + 3 >= data.length) { 630 if(strict) 631 throw new MarkupException("unclosed comment"); 632 end = data.length; 633 pos = data.length; 634 } else { 635 end = pos; 636 assert(data[pos] == '-'); 637 pos++; 638 assert(data[pos] == '-'); 639 pos++; 640 assert(data[pos] == '>'); 641 pos++; 642 } 643 644 if(parseSawComment !is null) 645 if(parseSawComment(data[commentStart .. end])) { 646 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 647 } 648 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 649 pos += 7; 650 651 auto cdataStart = pos; 652 653 ptrdiff_t end = -1; 654 typeof(end) cdataEnd; 655 656 if(pos < data.length) { 657 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 658 end = data[pos .. $].indexOf("]]>"); 659 } 660 661 if(end == -1) { 662 if(strict) 663 throw new MarkupException("Unclosed CDATA section"); 664 end = pos; 665 cdataEnd = pos; 666 } else { 667 cdataEnd = pos + end; 668 pos = cdataEnd + 3; 669 } 670 671 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 672 } else { 673 auto start = pos; 674 while(pos < data.length && data[pos] != '>') 675 pos++; 676 677 auto bangEnds = pos; 678 if(pos == data.length) { 679 if(strict) 680 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 681 } else pos++; // skipping the > 682 683 if(parseSawBangInstruction !is null) 684 if(parseSawBangInstruction(data[start .. bangEnds])) { 685 // FIXME: these should be able to modify the parser state, 686 // doing things like adding entities, somehow. 687 688 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 689 } 690 } 691 692 /* 693 if(pos < data.length && data[pos] == '>') 694 pos++; // skip the > 695 else 696 assert(!strict); 697 */ 698 break; 699 case '%': 700 case '?': 701 /* 702 Here's what we want to support: 703 704 <% asp code %> 705 <%= asp code %> 706 <?php php code ?> 707 <?= php code ?> 708 709 The contents don't really matter, just if it opens with 710 one of the above for, it ends on the two char terminator. 711 712 <?something> 713 this is NOT php code 714 because I've seen this in the wild: <?EM-dummyText> 715 716 This could be php with shorttags which would be cut off 717 prematurely because if(a >) - that > counts as the close 718 of the tag, but since dom.d can't tell the difference 719 between that and the <?EM> real world example, it will 720 not try to look for the ?> ending. 721 722 The difference between this and the asp/php stuff is that it 723 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 724 on >. 725 */ 726 727 char end = data[pos]; 728 auto started = pos; 729 bool isAsp = end == '%'; 730 int currentIndex = 0; 731 bool isPhp = false; 732 bool isEqualTag = false; 733 int phpCount = 0; 734 735 more: 736 pos++; // skip the start 737 if(pos == data.length) { 738 if(strict) 739 throw new MarkupException("Unclosed <"~end~" by end of file"); 740 } else { 741 currentIndex++; 742 if(currentIndex == 1 && data[pos] == '=') { 743 if(!isAsp) 744 isPhp = true; 745 isEqualTag = true; 746 goto more; 747 } 748 if(currentIndex == 1 && data[pos] == 'p') 749 phpCount++; 750 if(currentIndex == 2 && data[pos] == 'h') 751 phpCount++; 752 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 753 isPhp = true; 754 755 if(data[pos] == '>') { 756 if((isAsp || isPhp) && data[pos - 1] != end) 757 goto more; 758 // otherwise we're done 759 } else 760 goto more; 761 } 762 763 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 764 auto code = data[started .. pos]; 765 766 767 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 768 if(pos < data.length) 769 pos++; // get past the > 770 771 if(isAsp && parseSawAspCode !is null) { 772 if(parseSawAspCode(code)) { 773 return Ele(3, new AspCode(this, code), null); 774 } 775 } else if(isPhp && parseSawPhpCode !is null) { 776 if(parseSawPhpCode(code)) { 777 return Ele(3, new PhpCode(this, code), null); 778 } 779 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 780 if(parseSawQuestionInstruction(code)) { 781 return Ele(3, new QuestionInstruction(this, code), null); 782 } 783 } 784 break; 785 case '/': // closing an element 786 pos++; // skip the start 787 auto p = pos; 788 while(pos < data.length && data[pos] != '>') 789 pos++; 790 //writefln("</%s>", data[p..pos]); 791 if(pos == data.length && data[pos-1] != '>') { 792 if(strict) 793 throw new MarkupException("File ended before closing tag had a required >"); 794 else 795 data ~= ">"; // just hack it in 796 } 797 pos++; // skip the '>' 798 799 string tname = data[p..pos-1]; 800 if(!caseSensitive) 801 tname = tname.toLower(); 802 803 return Ele(1, null, tname); // closing tag reports itself here 804 case ' ': // assume it isn't a real element... 805 if(strict) { 806 parseError("bad markup - improperly placed <"); 807 assert(0); // parseError always throws 808 } else 809 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 810 default: 811 812 if(!strict) { 813 // what about something that kinda looks like a tag, but isn't? 814 auto nextTag = data[pos .. $].indexOf("<"); 815 auto closeTag = data[pos .. $].indexOf(">"); 816 if(closeTag != -1 && nextTag != -1) 817 if(nextTag < closeTag) { 818 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 819 820 auto equal = data[pos .. $].indexOf("=\""); 821 if(equal != -1 && equal < closeTag) { 822 // this MIGHT be ok, soldier on 823 } else { 824 // definitely no good, this must be a (horribly distorted) text node 825 pos++; // skip the < we're on - don't want text node to end prematurely 826 auto node = readTextNode(); 827 node.contents = "<" ~ node.contents; // put this back 828 return Ele(0, node, null); 829 } 830 } 831 } 832 833 string tagName = readTagName(); 834 string[string] attributes; 835 836 Ele addTag(bool selfClosed) { 837 if(selfClosed) 838 pos++; 839 else { 840 if(!strict) 841 if(tagName.isInArray(selfClosedElements)) 842 // these are de-facto self closed 843 selfClosed = true; 844 } 845 846 if(strict) 847 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100])); 848 else { 849 // if we got here, it's probably because a slash was in an 850 // unquoted attribute - don't trust the selfClosed value 851 if(!selfClosed) 852 selfClosed = tagName.isInArray(selfClosedElements); 853 854 while(pos < data.length && data[pos] != '>') 855 pos++; 856 857 if(pos >= data.length) { 858 // the tag never closed 859 assert(data.length != 0); 860 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 861 } 862 } 863 864 auto whereThisTagStarted = pos; // for better error messages 865 866 pos++; 867 868 auto e = createElement(tagName); 869 e.attributes = attributes; 870 version(dom_node_indexes) { 871 if(e.dataset.nodeIndex.length == 0) 872 e.dataset.nodeIndex = to!string(&(e.attributes)); 873 } 874 e.selfClosed = selfClosed; 875 e.parseAttributes(); 876 877 878 // HACK to handle script and style as a raw data section as it is in HTML browsers 879 if(tagName == "script" || tagName == "style") { 880 if(!selfClosed) { 881 string closer = "</" ~ tagName ~ ">"; 882 ptrdiff_t ending; 883 if(pos >= data.length) 884 ending = -1; 885 else 886 ending = indexOf(data[pos..$], closer); 887 888 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 889 /* 890 if(loose && ending == -1 && pos < data.length) 891 ending = indexOf(data[pos..$], closer.toUpper()); 892 */ 893 if(ending == -1) { 894 if(strict) 895 throw new Exception("tag " ~ tagName ~ " never closed"); 896 else { 897 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 898 if(pos < data.length) { 899 e = new TextNode(this, data[pos .. $]); 900 pos = data.length; 901 } 902 } 903 } else { 904 ending += pos; 905 e.innerRawSource = data[pos..ending]; 906 pos = ending + closer.length; 907 } 908 } 909 return Ele(0, e, null); 910 } 911 912 bool closed = selfClosed; 913 914 void considerHtmlParagraphHack(Element n) { 915 assert(!strict); 916 if(e.tagName == "p" && e.tagName == n.tagName) { 917 // html lets you write <p> para 1 <p> para 1 918 // but in the dom tree, they should be siblings, not children. 919 paragraphHackfixRequired = true; 920 } 921 } 922 923 //writef("<%s>", tagName); 924 while(!closed) { 925 Ele n; 926 if(strict) 927 n = readElement(); 928 else 929 n = readElement(parentChain ~ tagName); 930 931 if(n.type == 4) return n; // the document is empty 932 933 if(n.type == 3 && n.element !is null) { 934 // special node, append if possible 935 if(e !is null) 936 e.appendChild(n.element); 937 else 938 piecesBeforeRoot ~= n.element; 939 } else if(n.type == 0) { 940 if(!strict) 941 considerHtmlParagraphHack(n.element); 942 e.appendChild(n.element); 943 } else if(n.type == 1) { 944 bool found = false; 945 if(n.payload != tagName) { 946 if(strict) 947 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 948 else { 949 sawImproperNesting = true; 950 // this is so we don't drop several levels of awful markup 951 if(n.element) { 952 if(!strict) 953 considerHtmlParagraphHack(n.element); 954 e.appendChild(n.element); 955 n.element = null; 956 } 957 958 // is the element open somewhere up the chain? 959 foreach(i, parent; parentChain) 960 if(parent == n.payload) { 961 recentAutoClosedTags ~= tagName; 962 // just rotating it so we don't inadvertently break stuff with vile crap 963 if(recentAutoClosedTags.length > 4) 964 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 965 966 n.element = e; 967 return n; 968 } 969 970 // if not, this is a text node; we can't fix it up... 971 972 // If it's already in the tree somewhere, assume it is closed by algorithm 973 // and we shouldn't output it - odds are the user just flipped a couple tags 974 foreach(ele; e.tree) { 975 if(ele.tagName == n.payload) { 976 found = true; 977 break; 978 } 979 } 980 981 foreach(ele; recentAutoClosedTags) { 982 if(ele == n.payload) { 983 found = true; 984 break; 985 } 986 } 987 988 if(!found) // if not found in the tree though, it's probably just text 989 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 990 } 991 } else { 992 if(n.element) { 993 if(!strict) 994 considerHtmlParagraphHack(n.element); 995 e.appendChild(n.element); 996 } 997 } 998 999 if(n.payload == tagName) // in strict mode, this is always true 1000 closed = true; 1001 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1002 } 1003 //writef("</%s>\n", tagName); 1004 return Ele(0, e, null); 1005 } 1006 1007 // if a tag was opened but not closed by end of file, we can arrive here 1008 if(!strict && pos >= data.length) 1009 return addTag(false); 1010 //else if(strict) assert(0); // should be caught before 1011 1012 switch(data[pos]) { 1013 default: assert(0); 1014 case '/': // self closing tag 1015 return addTag(true); 1016 case '>': 1017 return addTag(false); 1018 case ' ': 1019 case '\t': 1020 case '\n': 1021 case '\r': 1022 // there might be attributes... 1023 moreAttributes: 1024 eatWhitespace(); 1025 1026 // same deal as above the switch.... 1027 if(!strict && pos >= data.length) 1028 return addTag(false); 1029 1030 if(strict && pos >= data.length) 1031 throw new MarkupException("tag open, didn't find > before end of file"); 1032 1033 switch(data[pos]) { 1034 case '/': // self closing tag 1035 return addTag(true); 1036 case '>': // closed tag; open -- we now read the contents 1037 return addTag(false); 1038 default: // it is an attribute 1039 string attrName = readAttributeName(); 1040 string attrValue = attrName; 1041 1042 bool ateAny = eatWhitespace(); 1043 if(strict && ateAny) 1044 throw new MarkupException("inappropriate whitespace after attribute name"); 1045 1046 if(pos >= data.length) { 1047 if(strict) 1048 assert(0, "this should have thrown in readAttributeName"); 1049 else { 1050 data ~= ">"; 1051 goto blankValue; 1052 } 1053 } 1054 if(data[pos] == '=') { 1055 pos++; 1056 1057 ateAny = eatWhitespace(); 1058 if(strict && ateAny) 1059 throw new MarkupException("inappropriate whitespace after attribute equals"); 1060 1061 attrValue = readAttributeValue(); 1062 1063 eatWhitespace(); 1064 } 1065 1066 blankValue: 1067 1068 if(strict && attrName in attributes) 1069 throw new MarkupException("Repeated attribute: " ~ attrName); 1070 1071 if(attrName.strip().length) 1072 attributes[attrName] = attrValue; 1073 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1074 1075 if(!strict && pos < data.length && data[pos] == '<') { 1076 // this is the broken tag that doesn't have a > at the end 1077 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1078 // let's insert one as a hack 1079 goto case '>'; 1080 } 1081 1082 goto moreAttributes; 1083 } 1084 } 1085 } 1086 1087 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1088 //assert(0); 1089 } 1090 1091 eatWhitespace(); 1092 Ele r; 1093 do { 1094 r = readElement(); // there SHOULD only be one element... 1095 1096 if(r.type == 3 && r.element !is null) 1097 piecesBeforeRoot ~= r.element; 1098 1099 if(r.type == 4) 1100 break; // the document is completely empty... 1101 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1102 1103 root = r.element; 1104 1105 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1106 while(r.type != 4) { 1107 r = readElement(); 1108 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1109 if(r.element !is null) 1110 piecesAfterRoot ~= r.element; 1111 } 1112 } 1113 1114 if(root is null) 1115 { 1116 if(strict) 1117 assert(0, "empty document should be impossible in strict mode"); 1118 else 1119 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1120 } 1121 1122 if(paragraphHackfixRequired) { 1123 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1124 1125 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1126 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1127 1128 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1129 // Kind of inefficient because we can't detect when we recurse back out of a node. 1130 Element[Element] insertLocations; 1131 auto iterator = root.tree; 1132 foreach(ele; iterator) { 1133 if(ele.parentNode is null) 1134 continue; 1135 1136 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1137 auto shouldBePreviousSibling = ele.parentNode; 1138 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1139 if (auto p = holder in insertLocations) { 1140 shouldBePreviousSibling = *p; 1141 assert(shouldBePreviousSibling.parentNode is holder); 1142 } 1143 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1144 insertLocations[holder] = ele; 1145 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1146 } 1147 } 1148 } 1149 } 1150 1151 /* end massive parse function */ 1152 1153 /// Gets the <title> element's innerText, if one exists 1154 @property string title() { 1155 bool doesItMatch(Element e) { 1156 return (e.tagName == "title"); 1157 } 1158 1159 auto e = findFirst(&doesItMatch); 1160 if(e) 1161 return e.innerText(); 1162 return ""; 1163 } 1164 1165 /// Sets the title of the page, creating a <title> element if needed. 1166 @property void title(string t) { 1167 bool doesItMatch(Element e) { 1168 return (e.tagName == "title"); 1169 } 1170 1171 auto e = findFirst(&doesItMatch); 1172 1173 if(!e) { 1174 e = createElement("title"); 1175 auto heads = getElementsByTagName("head"); 1176 if(heads.length) 1177 heads[0].appendChild(e); 1178 } 1179 1180 if(e) 1181 e.innerText = t; 1182 } 1183 1184 // FIXME: would it work to alias root this; ???? might be a good idea 1185 /// These functions all forward to the root element. See the documentation in the Element class. 1186 Element getElementById(string id) { 1187 return root.getElementById(id); 1188 } 1189 1190 /// ditto 1191 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1192 if( is(SomeElementType : Element)) 1193 out(ret) { assert(ret !is null); } 1194 body { 1195 return root.requireElementById!(SomeElementType)(id, file, line); 1196 } 1197 1198 /// ditto 1199 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1200 if( is(SomeElementType : Element)) 1201 out(ret) { assert(ret !is null); } 1202 body { 1203 auto e = cast(SomeElementType) querySelector(selector); 1204 if(e is null) 1205 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1206 return e; 1207 } 1208 1209 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1210 if(is(SomeElementType : Element)) 1211 { 1212 auto e = cast(SomeElementType) querySelector(selector); 1213 return MaybeNullElement!SomeElementType(e); 1214 } 1215 1216 /// ditto 1217 @scriptable 1218 Element querySelector(string selector) { 1219 // see comment below on Document.querySelectorAll 1220 auto s = Selector(selector);//, !loose); 1221 foreach(ref comp; s.components) 1222 if(comp.parts.length && comp.parts[0].separation == 0) 1223 comp.parts[0].separation = -1; 1224 foreach(e; s.getMatchingElementsLazy(this.root)) 1225 return e; 1226 return null; 1227 1228 } 1229 1230 /// ditto 1231 @scriptable 1232 Element[] querySelectorAll(string selector) { 1233 // In standards-compliant code, the document is slightly magical 1234 // in that it is a pseudoelement at top level. It should actually 1235 // match the root as one of its children. 1236 // 1237 // In versions of dom.d before Dec 29 2019, this worked because 1238 // querySelectorAll was willing to return itself. With that bug fix 1239 // (search "arbitrary id asduiwh" in this file for associated unittest) 1240 // this would have failed. Hence adding back the root if it matches the 1241 // selector itself. 1242 // 1243 // I'd love to do this better later. 1244 1245 auto s = Selector(selector);//, !loose); 1246 foreach(ref comp; s.components) 1247 if(comp.parts.length && comp.parts[0].separation == 0) 1248 comp.parts[0].separation = -1; 1249 return s.getMatchingElements(this.root); 1250 } 1251 1252 /// ditto 1253 deprecated("use querySelectorAll instead") 1254 Element[] getElementsBySelector(string selector) { 1255 return root.getElementsBySelector(selector); 1256 } 1257 1258 /// ditto 1259 @scriptable 1260 Element[] getElementsByTagName(string tag) { 1261 return root.getElementsByTagName(tag); 1262 } 1263 1264 /// ditto 1265 @scriptable 1266 Element[] getElementsByClassName(string tag) { 1267 return root.getElementsByClassName(tag); 1268 } 1269 1270 /** FIXME: btw, this could just be a lazy range...... */ 1271 Element getFirstElementByTagName(string tag) { 1272 if(loose) 1273 tag = tag.toLower(); 1274 bool doesItMatch(Element e) { 1275 return e.tagName == tag; 1276 } 1277 return findFirst(&doesItMatch); 1278 } 1279 1280 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1281 Element mainBody() { 1282 return getFirstElementByTagName("body"); 1283 } 1284 1285 /// this uses a weird thing... it's [name=] if no colon and 1286 /// [property=] if colon 1287 string getMeta(string name) { 1288 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1289 auto e = querySelector("head meta["~thing~"="~name~"]"); 1290 if(e is null) 1291 return null; 1292 return e.content; 1293 } 1294 1295 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1296 void setMeta(string name, string value) { 1297 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1298 auto e = querySelector("head meta["~thing~"="~name~"]"); 1299 if(e is null) { 1300 e = requireSelector("head").addChild("meta"); 1301 e.setAttribute(thing, name); 1302 } 1303 1304 e.content = value; 1305 } 1306 1307 ///. 1308 Form[] forms() { 1309 return cast(Form[]) getElementsByTagName("form"); 1310 } 1311 1312 ///. 1313 Form createForm() 1314 out(ret) { 1315 assert(ret !is null); 1316 } 1317 body { 1318 return cast(Form) createElement("form"); 1319 } 1320 1321 ///. 1322 Element createElement(string name) { 1323 if(loose) 1324 name = name.toLower(); 1325 1326 auto e = Element.make(name); 1327 e.parentDocument = this; 1328 1329 return e; 1330 1331 // return new Element(this, name, null, selfClosed); 1332 } 1333 1334 ///. 1335 Element createFragment() { 1336 return new DocumentFragment(this); 1337 } 1338 1339 ///. 1340 Element createTextNode(string content) { 1341 return new TextNode(this, content); 1342 } 1343 1344 1345 ///. 1346 Element findFirst(bool delegate(Element) doesItMatch) { 1347 Element result; 1348 1349 bool goThroughElement(Element e) { 1350 if(doesItMatch(e)) { 1351 result = e; 1352 return true; 1353 } 1354 1355 foreach(child; e.children) { 1356 if(goThroughElement(child)) 1357 return true; 1358 } 1359 1360 return false; 1361 } 1362 1363 goThroughElement(root); 1364 1365 return result; 1366 } 1367 1368 ///. 1369 void clear() { 1370 root = null; 1371 loose = false; 1372 } 1373 1374 ///. 1375 void setProlog(string d) { 1376 _prolog = d; 1377 prologWasSet = true; 1378 } 1379 1380 ///. 1381 private string _prolog = "<!DOCTYPE html>\n"; 1382 private bool prologWasSet = false; // set to true if the user changed it 1383 1384 @property string prolog() const { 1385 // if the user explicitly changed it, do what they want 1386 // or if we didn't keep/find stuff from the document itself, 1387 // we'll use the builtin one as a default. 1388 if(prologWasSet || piecesBeforeRoot.length == 0) 1389 return _prolog; 1390 1391 string p; 1392 foreach(e; piecesBeforeRoot) 1393 p ~= e.toString() ~ "\n"; 1394 return p; 1395 } 1396 1397 ///. 1398 override string toString() const { 1399 return prolog ~ root.toString(); 1400 } 1401 1402 /++ 1403 Writes it out with whitespace for easier eyeball debugging 1404 1405 Do NOT use for anything other than eyeball debugging, 1406 because whitespace may be significant content in XML. 1407 +/ 1408 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1409 string s = prolog; 1410 1411 if(insertComments) s ~= "<!--"; 1412 s ~= "\n"; 1413 if(insertComments) s ~= "-->"; 1414 1415 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1416 foreach(a; piecesAfterRoot) 1417 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1418 return s; 1419 } 1420 1421 ///. 1422 Element root; 1423 1424 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1425 Element[] piecesBeforeRoot; 1426 1427 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1428 Element[] piecesAfterRoot; 1429 1430 ///. 1431 bool loose; 1432 1433 1434 1435 // what follows are for mutation events that you can observe 1436 void delegate(DomMutationEvent)[] eventObservers; 1437 1438 void dispatchMutationEvent(DomMutationEvent e) { 1439 foreach(o; eventObservers) 1440 o(e); 1441 } 1442 } 1443 1444 /// This represents almost everything in the DOM. 1445 /// Group: core_functionality 1446 class Element { 1447 /// Returns a collection of elements by selector. 1448 /// See: [Document.opIndex] 1449 ElementCollection opIndex(string selector) { 1450 auto e = ElementCollection(this); 1451 return e[selector]; 1452 } 1453 1454 /++ 1455 Returns the child node with the particular index. 1456 1457 Be aware that child nodes include text nodes, including 1458 whitespace-only nodes. 1459 +/ 1460 Element opIndex(size_t index) { 1461 if(index >= children.length) 1462 return null; 1463 return this.children[index]; 1464 } 1465 1466 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1467 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1468 if( 1469 is(SomeElementType : Element) 1470 ) 1471 out(ret) { 1472 assert(ret !is null); 1473 } 1474 body { 1475 auto e = cast(SomeElementType) getElementById(id); 1476 if(e is null) 1477 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1478 return e; 1479 } 1480 1481 /// ditto but with selectors instead of ids 1482 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1483 if( 1484 is(SomeElementType : Element) 1485 ) 1486 out(ret) { 1487 assert(ret !is null); 1488 } 1489 body { 1490 auto e = cast(SomeElementType) querySelector(selector); 1491 if(e is null) 1492 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1493 return e; 1494 } 1495 1496 1497 /++ 1498 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1499 +/ 1500 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1501 if(is(SomeElementType : Element)) 1502 { 1503 auto e = cast(SomeElementType) querySelector(selector); 1504 return MaybeNullElement!SomeElementType(e); 1505 } 1506 1507 1508 1509 /// get all the classes on this element 1510 @property string[] classes() { 1511 return split(className, " "); 1512 } 1513 1514 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1515 @scriptable 1516 Element addClass(string c) { 1517 if(hasClass(c)) 1518 return this; // don't add it twice 1519 1520 string cn = getAttribute("class"); 1521 if(cn.length == 0) { 1522 setAttribute("class", c); 1523 return this; 1524 } else { 1525 setAttribute("class", cn ~ " " ~ c); 1526 } 1527 1528 return this; 1529 } 1530 1531 /// Removes a particular class name. 1532 @scriptable 1533 Element removeClass(string c) { 1534 if(!hasClass(c)) 1535 return this; 1536 string n; 1537 foreach(name; classes) { 1538 if(c == name) 1539 continue; // cut it out 1540 if(n.length) 1541 n ~= " "; 1542 n ~= name; 1543 } 1544 1545 className = n.strip(); 1546 1547 return this; 1548 } 1549 1550 /// Returns whether the given class appears in this element. 1551 bool hasClass(string c) { 1552 string cn = className; 1553 1554 auto idx = cn.indexOf(c); 1555 if(idx == -1) 1556 return false; 1557 1558 foreach(cla; cn.split(" ")) 1559 if(cla == c) 1560 return true; 1561 return false; 1562 1563 /* 1564 int rightSide = idx + c.length; 1565 1566 bool checkRight() { 1567 if(rightSide == cn.length) 1568 return true; // it's the only class 1569 else if(iswhite(cn[rightSide])) 1570 return true; 1571 return false; // this is a substring of something else.. 1572 } 1573 1574 if(idx == 0) { 1575 return checkRight(); 1576 } else { 1577 if(!iswhite(cn[idx - 1])) 1578 return false; // substring 1579 return checkRight(); 1580 } 1581 1582 assert(0); 1583 */ 1584 } 1585 1586 1587 /* ******************************* 1588 DOM Mutation 1589 *********************************/ 1590 /// convenience function to quickly add a tag with some text or 1591 /// other relevant info (for example, it's a src for an <img> element 1592 /// instead of inner text) 1593 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1594 in { 1595 assert(tagName !is null); 1596 } 1597 out(e) { 1598 //assert(e.parentNode is this); 1599 //assert(e.parentDocument is this.parentDocument); 1600 } 1601 body { 1602 auto e = Element.make(tagName, childInfo, childInfo2); 1603 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1604 // return the parent. That will break existing code though. 1605 return appendChild(e); 1606 } 1607 1608 /// Another convenience function. Adds a child directly after the current one, returning 1609 /// the new child. 1610 /// 1611 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1612 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1613 in { 1614 assert(tagName !is null); 1615 assert(parentNode !is null); 1616 } 1617 out(e) { 1618 assert(e.parentNode is this.parentNode); 1619 assert(e.parentDocument is this.parentDocument); 1620 } 1621 body { 1622 auto e = Element.make(tagName, childInfo, childInfo2); 1623 return parentNode.insertAfter(this, e); 1624 } 1625 1626 /// 1627 Element addSibling(Element e) { 1628 return parentNode.insertAfter(this, e); 1629 } 1630 1631 /// 1632 Element addChild(Element e) { 1633 return this.appendChild(e); 1634 } 1635 1636 /// Convenience function to append text intermixed with other children. 1637 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1638 /// or div.addChildren("Hello, ", user.name, "!"); 1639 1640 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1641 void addChildren(T...)(T t) { 1642 foreach(item; t) { 1643 static if(is(item : Element)) 1644 appendChild(item); 1645 else static if (is(isSomeString!(item))) 1646 appendText(to!string(item)); 1647 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1648 } 1649 } 1650 1651 ///. 1652 Element addChild(string tagName, Element firstChild, string info2 = null) 1653 in { 1654 assert(firstChild !is null); 1655 } 1656 out(ret) { 1657 assert(ret !is null); 1658 assert(ret.parentNode is this); 1659 assert(firstChild.parentNode is ret); 1660 1661 assert(ret.parentDocument is this.parentDocument); 1662 //assert(firstChild.parentDocument is this.parentDocument); 1663 } 1664 body { 1665 auto e = Element.make(tagName, "", info2); 1666 e.appendChild(firstChild); 1667 this.appendChild(e); 1668 return e; 1669 } 1670 1671 /// 1672 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1673 in { 1674 } 1675 out(ret) { 1676 assert(ret !is null); 1677 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1678 assert(ret.parentDocument is this.parentDocument); 1679 } 1680 body { 1681 auto e = Element.make(tagName, "", info2); 1682 this.appendChild(e); 1683 e.innerHTML = innerHtml.source; 1684 return e; 1685 } 1686 1687 1688 /// . 1689 void appendChildren(Element[] children) { 1690 foreach(ele; children) 1691 appendChild(ele); 1692 } 1693 1694 ///. 1695 void reparent(Element newParent) 1696 in { 1697 assert(newParent !is null); 1698 assert(parentNode !is null); 1699 } 1700 out { 1701 assert(this.parentNode is newParent); 1702 //assert(isInArray(this, newParent.children)); 1703 } 1704 body { 1705 parentNode.removeChild(this); 1706 newParent.appendChild(this); 1707 } 1708 1709 /** 1710 Strips this tag out of the document, putting its inner html 1711 as children of the parent. 1712 1713 For example, given: `<p>hello <b>there</b></p>`, if you 1714 call `stripOut` on the `b` element, you'll be left with 1715 `<p>hello there<p>`. 1716 1717 The idea here is to make it easy to get rid of garbage 1718 markup you aren't interested in. 1719 */ 1720 void stripOut() 1721 in { 1722 assert(parentNode !is null); 1723 } 1724 out { 1725 assert(parentNode is null); 1726 assert(children.length == 0); 1727 } 1728 body { 1729 foreach(c; children) 1730 c.parentNode = null; // remove the parent 1731 if(children.length) 1732 parentNode.replaceChild(this, this.children); 1733 else 1734 parentNode.removeChild(this); 1735 this.children.length = 0; // we reparented them all above 1736 } 1737 1738 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1739 /// if the element already isn't in a tree, it does nothing. 1740 Element removeFromTree() 1741 in { 1742 1743 } 1744 out(var) { 1745 assert(this.parentNode is null); 1746 assert(var is this); 1747 } 1748 body { 1749 if(this.parentNode is null) 1750 return this; 1751 1752 this.parentNode.removeChild(this); 1753 1754 return this; 1755 } 1756 1757 /++ 1758 Wraps this element inside the given element. 1759 It's like `this.replaceWith(what); what.appendchild(this);` 1760 1761 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1762 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1763 +/ 1764 Element wrapIn(Element what) 1765 in { 1766 assert(what !is null); 1767 } 1768 out(ret) { 1769 assert(this.parentNode is what); 1770 assert(ret is what); 1771 } 1772 body { 1773 this.replaceWith(what); 1774 what.appendChild(this); 1775 1776 return what; 1777 } 1778 1779 /// Replaces this element with something else in the tree. 1780 Element replaceWith(Element e) 1781 in { 1782 assert(this.parentNode !is null); 1783 } 1784 body { 1785 e.removeFromTree(); 1786 this.parentNode.replaceChild(this, e); 1787 return e; 1788 } 1789 1790 /** 1791 Splits the className into an array of each class given 1792 */ 1793 string[] classNames() const { 1794 return className().split(" "); 1795 } 1796 1797 /** 1798 Fetches the first consecutive text nodes concatenated together. 1799 1800 1801 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1802 1803 See_also: [directText], [innerText] 1804 */ 1805 string firstInnerText() const { 1806 string s; 1807 foreach(child; children) { 1808 if(child.nodeType != NodeType.Text) 1809 break; 1810 1811 s ~= child.nodeValue(); 1812 } 1813 return s; 1814 } 1815 1816 1817 /** 1818 Returns the text directly under this element. 1819 1820 1821 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1822 past child tags. So, `<example>some <b>bold</b> text</example>` 1823 will return `some text` because it only gets the text, skipping non-text children. 1824 1825 See_also: [firstInnerText], [innerText] 1826 */ 1827 @property string directText() { 1828 string ret; 1829 foreach(e; children) { 1830 if(e.nodeType == NodeType.Text) 1831 ret ~= e.nodeValue(); 1832 } 1833 1834 return ret; 1835 } 1836 1837 /** 1838 Sets the direct text, without modifying other child nodes. 1839 1840 1841 Unlike [innerText], this does *not* remove existing elements in the element. 1842 1843 It only replaces the first text node it sees. 1844 1845 If there are no text nodes, it calls [appendText]. 1846 1847 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1848 */ 1849 @property void directText(string text) { 1850 foreach(e; children) { 1851 if(e.nodeType == NodeType.Text) { 1852 auto it = cast(TextNode) e; 1853 it.contents = text; 1854 return; 1855 } 1856 } 1857 1858 appendText(text); 1859 } 1860 1861 // do nothing, this is primarily a virtual hook 1862 // for links and forms 1863 void setValue(string field, string value) { } 1864 1865 1866 // this is a thing so i can remove observer support if it gets slow 1867 // I have not implemented all these yet 1868 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1869 if(parentDocument is null) return; 1870 DomMutationEvent me; 1871 me.operation = operation; 1872 me.target = this; 1873 me.relatedString = s1; 1874 me.relatedString2 = s2; 1875 me.related = r; 1876 me.related2 = r2; 1877 parentDocument.dispatchMutationEvent(me); 1878 } 1879 1880 // putting all the members up front 1881 1882 // this ought to be private. don't use it directly. 1883 Element[] children; 1884 1885 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1886 string tagName; 1887 1888 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1889 string[string] attributes; 1890 1891 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1892 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1893 private bool selfClosed; 1894 1895 /// Get the parent Document object that contains this element. 1896 /// It may be null, so remember to check for that. 1897 Document parentDocument; 1898 1899 ///. 1900 inout(Element) parentNode() inout { 1901 auto p = _parentNode; 1902 1903 if(cast(DocumentFragment) p) 1904 return p._parentNode; 1905 1906 return p; 1907 } 1908 1909 //protected 1910 Element parentNode(Element e) { 1911 return _parentNode = e; 1912 } 1913 1914 private Element _parentNode; 1915 1916 // the next few methods are for implementing interactive kind of things 1917 private CssStyle _computedStyle; 1918 1919 // these are here for event handlers. Don't forget that this library never fires events. 1920 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1921 EventHandler[][string] bubblingEventHandlers; 1922 EventHandler[][string] capturingEventHandlers; 1923 EventHandler[string] defaultEventHandlers; 1924 1925 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1926 if(event.length > 2 && event[0..2] == "on") 1927 event = event[2 .. $]; 1928 1929 if(useCapture) 1930 capturingEventHandlers[event] ~= handler; 1931 else 1932 bubblingEventHandlers[event] ~= handler; 1933 } 1934 1935 1936 // and now methods 1937 1938 /// Convenience function to try to do the right thing for HTML. This is the main 1939 /// way I create elements. 1940 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 1941 bool selfClosed = tagName.isInArray(selfClosedElements); 1942 1943 Element e; 1944 // want to create the right kind of object for the given tag... 1945 switch(tagName) { 1946 case "#text": 1947 e = new TextNode(null, childInfo); 1948 return e; 1949 // break; 1950 case "table": 1951 e = new Table(null); 1952 break; 1953 case "a": 1954 e = new Link(null); 1955 break; 1956 case "form": 1957 e = new Form(null); 1958 break; 1959 case "tr": 1960 e = new TableRow(null); 1961 break; 1962 case "td", "th": 1963 e = new TableCell(null, tagName); 1964 break; 1965 default: 1966 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1967 } 1968 1969 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1970 e.tagName = tagName; 1971 e.selfClosed = selfClosed; 1972 1973 if(childInfo !is null) 1974 switch(tagName) { 1975 /* html5 convenience tags */ 1976 case "audio": 1977 if(childInfo.length) 1978 e.addChild("source", childInfo); 1979 if(childInfo2 !is null) 1980 e.appendText(childInfo2); 1981 break; 1982 case "source": 1983 e.src = childInfo; 1984 if(childInfo2 !is null) 1985 e.type = childInfo2; 1986 break; 1987 /* regular html 4 stuff */ 1988 case "img": 1989 e.src = childInfo; 1990 if(childInfo2 !is null) 1991 e.alt = childInfo2; 1992 break; 1993 case "link": 1994 e.href = childInfo; 1995 if(childInfo2 !is null) 1996 e.rel = childInfo2; 1997 break; 1998 case "option": 1999 e.innerText = childInfo; 2000 if(childInfo2 !is null) 2001 e.value = childInfo2; 2002 break; 2003 case "input": 2004 e.type = "hidden"; 2005 e.name = childInfo; 2006 if(childInfo2 !is null) 2007 e.value = childInfo2; 2008 break; 2009 case "button": 2010 e.innerText = childInfo; 2011 if(childInfo2 !is null) 2012 e.type = childInfo2; 2013 break; 2014 case "a": 2015 e.innerText = childInfo; 2016 if(childInfo2 !is null) 2017 e.href = childInfo2; 2018 break; 2019 case "script": 2020 case "style": 2021 e.innerRawSource = childInfo; 2022 break; 2023 case "meta": 2024 e.name = childInfo; 2025 if(childInfo2 !is null) 2026 e.content = childInfo2; 2027 break; 2028 /* generically, assume we were passed text and perhaps class */ 2029 default: 2030 e.innerText = childInfo; 2031 if(childInfo2.length) 2032 e.className = childInfo2; 2033 } 2034 2035 return e; 2036 } 2037 2038 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2039 // FIXME: childInfo2 is ignored when info1 is null 2040 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2041 m.innerHTML = innerHtml.source; 2042 return m; 2043 } 2044 2045 static Element make(string tagName, Element child, string childInfo2 = null) { 2046 auto m = Element.make(tagName, cast(string) null, childInfo2); 2047 m.appendChild(child); 2048 return m; 2049 } 2050 2051 2052 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2053 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2054 parentDocument = _parentDocument; 2055 tagName = _tagName; 2056 if(_attributes !is null) 2057 attributes = _attributes; 2058 selfClosed = _selfClosed; 2059 2060 version(dom_node_indexes) 2061 this.dataset.nodeIndex = to!string(&(this.attributes)); 2062 2063 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2064 } 2065 2066 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2067 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 2068 this(string _tagName, string[string] _attributes = null) { 2069 tagName = _tagName; 2070 if(_attributes !is null) 2071 attributes = _attributes; 2072 selfClosed = tagName.isInArray(selfClosedElements); 2073 2074 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2075 //children.length = 8; 2076 //children.length = 0; 2077 2078 version(dom_node_indexes) 2079 this.dataset.nodeIndex = to!string(&(this.attributes)); 2080 } 2081 2082 private this(Document _parentDocument) { 2083 parentDocument = _parentDocument; 2084 2085 version(dom_node_indexes) 2086 this.dataset.nodeIndex = to!string(&(this.attributes)); 2087 } 2088 2089 2090 /* ******************************* 2091 Navigating the DOM 2092 *********************************/ 2093 2094 /// Returns the first child of this element. If it has no children, returns null. 2095 /// Remember, text nodes are children too. 2096 @property Element firstChild() { 2097 return children.length ? children[0] : null; 2098 } 2099 2100 /// 2101 @property Element lastChild() { 2102 return children.length ? children[$ - 1] : null; 2103 } 2104 2105 /// UNTESTED 2106 /// the next element you would encounter if you were reading it in the source 2107 Element nextInSource() { 2108 auto n = firstChild; 2109 if(n is null) 2110 n = nextSibling(); 2111 if(n is null) { 2112 auto p = this.parentNode; 2113 while(p !is null && n is null) { 2114 n = p.nextSibling; 2115 } 2116 } 2117 2118 return n; 2119 } 2120 2121 /// UNTESTED 2122 /// ditto 2123 Element previousInSource() { 2124 auto p = previousSibling; 2125 if(p is null) { 2126 auto par = parentNode; 2127 if(par) 2128 p = par.lastChild; 2129 if(p is null) 2130 p = par; 2131 } 2132 return p; 2133 } 2134 2135 ///. 2136 @property Element previousElementSibling() { 2137 return previousSibling("*"); 2138 } 2139 2140 ///. 2141 @property Element previousSibling(string tagName = null) { 2142 if(this.parentNode is null) 2143 return null; 2144 Element ps = null; 2145 foreach(e; this.parentNode.childNodes) { 2146 if(e is this) 2147 break; 2148 if(tagName == "*" && e.nodeType != NodeType.Text) { 2149 ps = e; 2150 } else if(tagName is null || e.tagName == tagName) 2151 ps = e; 2152 } 2153 2154 return ps; 2155 } 2156 2157 ///. 2158 @property Element nextElementSibling() { 2159 return nextSibling("*"); 2160 } 2161 2162 ///. 2163 @property Element nextSibling(string tagName = null) { 2164 if(this.parentNode is null) 2165 return null; 2166 Element ns = null; 2167 bool mightBe = false; 2168 foreach(e; this.parentNode.childNodes) { 2169 if(e is this) { 2170 mightBe = true; 2171 continue; 2172 } 2173 if(mightBe) { 2174 if(tagName == "*" && e.nodeType != NodeType.Text) { 2175 ns = e; 2176 break; 2177 } 2178 if(tagName is null || e.tagName == tagName) { 2179 ns = e; 2180 break; 2181 } 2182 } 2183 } 2184 2185 return ns; 2186 } 2187 2188 2189 /// Gets the nearest node, going up the chain, with the given tagName 2190 /// May return null or throw. 2191 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2192 if(tagName is null) { 2193 static if(is(T == Form)) 2194 tagName = "form"; 2195 else static if(is(T == Table)) 2196 tagName = "table"; 2197 else static if(is(T == Link)) 2198 tagName == "a"; 2199 } 2200 2201 auto par = this.parentNode; 2202 while(par !is null) { 2203 if(tagName is null || par.tagName == tagName) 2204 break; 2205 par = par.parentNode; 2206 } 2207 2208 static if(!is(T == Element)) { 2209 auto t = cast(T) par; 2210 if(t is null) 2211 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2212 } else 2213 auto t = par; 2214 2215 return t; 2216 } 2217 2218 ///. 2219 Element getElementById(string id) { 2220 // FIXME: I use this function a lot, and it's kinda slow 2221 // not terribly slow, but not great. 2222 foreach(e; tree) 2223 if(e.id == id) 2224 return e; 2225 return null; 2226 } 2227 2228 /++ 2229 Returns a child element that matches the given `selector`. 2230 2231 Note: you can give multiple selectors, separated by commas. 2232 It will return the first match it finds. 2233 +/ 2234 @scriptable 2235 Element querySelector(string selector) { 2236 Selector s = Selector(selector); 2237 foreach(ele; tree) 2238 if(s.matchesElement(ele)) 2239 return ele; 2240 return null; 2241 } 2242 2243 /// a more standards-compliant alias for getElementsBySelector 2244 @scriptable 2245 Element[] querySelectorAll(string selector) { 2246 return getElementsBySelector(selector); 2247 } 2248 2249 /// If the element matches the given selector. Previously known as `matchesSelector`. 2250 @scriptable 2251 bool matches(string selector) { 2252 /+ 2253 bool caseSensitiveTags = true; 2254 if(parentDocument && parentDocument.loose) 2255 caseSensitiveTags = false; 2256 +/ 2257 2258 Selector s = Selector(selector); 2259 return s.matchesElement(this); 2260 } 2261 2262 /// Returns itself or the closest parent that matches the given selector, or null if none found 2263 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2264 @scriptable 2265 Element closest(string selector) { 2266 Element e = this; 2267 while(e !is null) { 2268 if(e.matches(selector)) 2269 return e; 2270 e = e.parentNode; 2271 } 2272 return null; 2273 } 2274 2275 /** 2276 Returns elements that match the given CSS selector 2277 2278 * -- all, default if nothing else is there 2279 2280 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2281 2282 It is all additive 2283 2284 OP 2285 2286 space = descendant 2287 > = direct descendant 2288 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2289 2290 [foo] Foo is present as an attribute 2291 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2292 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2293 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2294 2295 [item$=sdas] ends with 2296 [item^-sdsad] begins with 2297 2298 Quotes are optional here. 2299 2300 Pseudos: 2301 :first-child 2302 :last-child 2303 :link (same as a[href] for our purposes here) 2304 2305 2306 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2307 2308 2309 2310 This ONLY cares about elements. text, etc, are ignored 2311 2312 2313 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2314 */ 2315 Element[] getElementsBySelector(string selector) { 2316 // FIXME: this function could probably use some performance attention 2317 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2318 2319 2320 bool caseSensitiveTags = true; 2321 if(parentDocument && parentDocument.loose) 2322 caseSensitiveTags = false; 2323 2324 Element[] ret; 2325 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2326 ret ~= sel.getElements(this); 2327 return ret; 2328 } 2329 2330 /// . 2331 Element[] getElementsByClassName(string cn) { 2332 // is this correct? 2333 return getElementsBySelector("." ~ cn); 2334 } 2335 2336 ///. 2337 Element[] getElementsByTagName(string tag) { 2338 if(parentDocument && parentDocument.loose) 2339 tag = tag.toLower(); 2340 Element[] ret; 2341 foreach(e; tree) 2342 if(e.tagName == tag) 2343 ret ~= e; 2344 return ret; 2345 } 2346 2347 2348 /* ******************************* 2349 Attributes 2350 *********************************/ 2351 2352 /** 2353 Gets the given attribute value, or null if the 2354 attribute is not set. 2355 2356 Note that the returned string is decoded, so it no longer contains any xml entities. 2357 */ 2358 @scriptable 2359 string getAttribute(string name) const { 2360 if(parentDocument && parentDocument.loose) 2361 name = name.toLower(); 2362 auto e = name in attributes; 2363 if(e) 2364 return *e; 2365 else 2366 return null; 2367 } 2368 2369 /** 2370 Sets an attribute. Returns this for easy chaining 2371 */ 2372 @scriptable 2373 Element setAttribute(string name, string value) { 2374 if(parentDocument && parentDocument.loose) 2375 name = name.toLower(); 2376 2377 // I never use this shit legitimately and neither should you 2378 auto it = name.toLower(); 2379 if(it == "href" || it == "src") { 2380 auto v = value.strip().toLower(); 2381 if(v.startsWith("vbscript:")) 2382 value = value[9..$]; 2383 if(v.startsWith("javascript:")) 2384 value = value[11..$]; 2385 } 2386 2387 attributes[name] = value; 2388 2389 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2390 2391 return this; 2392 } 2393 2394 /** 2395 Returns if the attribute exists. 2396 */ 2397 @scriptable 2398 bool hasAttribute(string name) { 2399 if(parentDocument && parentDocument.loose) 2400 name = name.toLower(); 2401 2402 if(name in attributes) 2403 return true; 2404 else 2405 return false; 2406 } 2407 2408 /** 2409 Removes the given attribute from the element. 2410 */ 2411 @scriptable 2412 Element removeAttribute(string name) 2413 out(ret) { 2414 assert(ret is this); 2415 } 2416 body { 2417 if(parentDocument && parentDocument.loose) 2418 name = name.toLower(); 2419 if(name in attributes) 2420 attributes.remove(name); 2421 2422 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2423 return this; 2424 } 2425 2426 /** 2427 Gets the class attribute's contents. Returns 2428 an empty string if it has no class. 2429 */ 2430 @property string className() const { 2431 auto c = getAttribute("class"); 2432 if(c is null) 2433 return ""; 2434 return c; 2435 } 2436 2437 ///. 2438 @property Element className(string c) { 2439 setAttribute("class", c); 2440 return this; 2441 } 2442 2443 /** 2444 Provides easy access to common HTML attributes, object style. 2445 2446 --- 2447 auto element = Element.make("a"); 2448 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2449 string where = a.href; // same as a.getAttribute("href"); 2450 --- 2451 2452 */ 2453 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2454 if(v !is null) 2455 setAttribute(name, v); 2456 return getAttribute(name); 2457 } 2458 2459 /** 2460 Old access to attributes. Use [attrs] instead. 2461 2462 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2463 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2464 2465 Instead, use element.attrs.attribute, element.attrs["attribute"], 2466 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2467 */ 2468 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2469 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2470 } 2471 2472 /* 2473 // this would be nice for convenience, but it broke the getter above. 2474 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2475 if(boolean) 2476 setAttribute(name, name); 2477 else 2478 removeAttribute(name); 2479 } 2480 */ 2481 2482 /** 2483 Returns the element's children. 2484 */ 2485 @property const(Element[]) childNodes() const { 2486 return children; 2487 } 2488 2489 /// Mutable version of the same 2490 @property Element[] childNodes() { // FIXME: the above should be inout 2491 return children; 2492 } 2493 2494 /++ 2495 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2496 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2497 +/ 2498 @property DataSet dataset() { 2499 return DataSet(this); 2500 } 2501 2502 /++ 2503 Gives dot/opIndex access to attributes 2504 --- 2505 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2506 --- 2507 +/ 2508 @property AttributeSet attrs() { 2509 return AttributeSet(this); 2510 } 2511 2512 /++ 2513 Provides both string and object style (like in Javascript) access to the style attribute. 2514 2515 --- 2516 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2517 --- 2518 +/ 2519 @property ElementStyle style() { 2520 return ElementStyle(this); 2521 } 2522 2523 /++ 2524 This sets the style attribute with a string. 2525 +/ 2526 @property ElementStyle style(string s) { 2527 this.setAttribute("style", s); 2528 return this.style; 2529 } 2530 2531 private void parseAttributes(string[] whichOnes = null) { 2532 /+ 2533 if(whichOnes is null) 2534 whichOnes = attributes.keys; 2535 foreach(attr; whichOnes) { 2536 switch(attr) { 2537 case "id": 2538 2539 break; 2540 case "class": 2541 2542 break; 2543 case "style": 2544 2545 break; 2546 default: 2547 // we don't care about it 2548 } 2549 } 2550 +/ 2551 } 2552 2553 2554 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2555 /// Don't use this. 2556 @property CssStyle computedStyle() { 2557 if(_computedStyle is null) { 2558 auto style = this.getAttribute("style"); 2559 /* we'll treat shitty old html attributes as css here */ 2560 if(this.hasAttribute("width")) 2561 style ~= "; width: " ~ this.attrs.width; 2562 if(this.hasAttribute("height")) 2563 style ~= "; height: " ~ this.attrs.height; 2564 if(this.hasAttribute("bgcolor")) 2565 style ~= "; background-color: " ~ this.attrs.bgcolor; 2566 if(this.tagName == "body" && this.hasAttribute("text")) 2567 style ~= "; color: " ~ this.attrs.text; 2568 if(this.hasAttribute("color")) 2569 style ~= "; color: " ~ this.attrs.color; 2570 /* done */ 2571 2572 2573 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2574 } 2575 return _computedStyle; 2576 } 2577 2578 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2579 version(browser) { 2580 void* expansionHook; ///ditto 2581 int offsetWidth; ///ditto 2582 int offsetHeight; ///ditto 2583 int offsetLeft; ///ditto 2584 int offsetTop; ///ditto 2585 Element offsetParent; ///ditto 2586 bool hasLayout; ///ditto 2587 int zIndex; ///ditto 2588 2589 ///ditto 2590 int absoluteLeft() { 2591 int a = offsetLeft; 2592 auto p = offsetParent; 2593 while(p) { 2594 a += p.offsetLeft; 2595 p = p.offsetParent; 2596 } 2597 2598 return a; 2599 } 2600 2601 ///ditto 2602 int absoluteTop() { 2603 int a = offsetTop; 2604 auto p = offsetParent; 2605 while(p) { 2606 a += p.offsetTop; 2607 p = p.offsetParent; 2608 } 2609 2610 return a; 2611 } 2612 } 2613 2614 // Back to the regular dom functions 2615 2616 public: 2617 2618 2619 /* ******************************* 2620 DOM Mutation 2621 *********************************/ 2622 2623 /// Removes all inner content from the tag; all child text and elements are gone. 2624 void removeAllChildren() 2625 out { 2626 assert(this.children.length == 0); 2627 } 2628 body { 2629 children = null; 2630 } 2631 2632 /// History: added June 13, 2020 2633 Element appendSibling(Element e) { 2634 parentNode.insertAfter(this, e); 2635 return e; 2636 } 2637 2638 /// History: added June 13, 2020 2639 Element prependSibling(Element e) { 2640 parentNode.insertBefore(this, e); 2641 return e; 2642 } 2643 2644 2645 /++ 2646 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2647 2648 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2649 2650 History: 2651 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2652 +/ 2653 Element appendChild(Element e) 2654 in { 2655 assert(e !is null); 2656 } 2657 out (ret) { 2658 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2659 assert(e.parentDocument is this.parentDocument); 2660 assert(e is ret); 2661 } 2662 body { 2663 if(e.parentNode !is null) 2664 e.parentNode.removeChild(e); 2665 2666 selfClosed = false; 2667 e.parentNode = this; 2668 e.parentDocument = this.parentDocument; 2669 if(auto frag = cast(DocumentFragment) e) 2670 children ~= frag.children; 2671 else 2672 children ~= e; 2673 2674 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2675 2676 return e; 2677 } 2678 2679 /// Inserts the second element to this node, right before the first param 2680 Element insertBefore(in Element where, Element what) 2681 in { 2682 assert(where !is null); 2683 assert(where.parentNode is this); 2684 assert(what !is null); 2685 assert(what.parentNode is null); 2686 } 2687 out (ret) { 2688 assert(where.parentNode is this); 2689 assert(what.parentNode is this); 2690 2691 assert(what.parentDocument is this.parentDocument); 2692 assert(ret is what); 2693 } 2694 body { 2695 foreach(i, e; children) { 2696 if(e is where) { 2697 if(auto frag = cast(DocumentFragment) what) 2698 children = children[0..i] ~ frag.children ~ children[i..$]; 2699 else 2700 children = children[0..i] ~ what ~ children[i..$]; 2701 what.parentDocument = this.parentDocument; 2702 what.parentNode = this; 2703 return what; 2704 } 2705 } 2706 2707 return what; 2708 2709 assert(0); 2710 } 2711 2712 /++ 2713 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2714 +/ 2715 Element insertAfter(in Element where, Element what) 2716 in { 2717 assert(where !is null); 2718 assert(where.parentNode is this); 2719 assert(what !is null); 2720 assert(what.parentNode is null); 2721 } 2722 out (ret) { 2723 assert(where.parentNode is this); 2724 assert(what.parentNode is this); 2725 assert(what.parentDocument is this.parentDocument); 2726 assert(ret is what); 2727 } 2728 body { 2729 foreach(i, e; children) { 2730 if(e is where) { 2731 if(auto frag = cast(DocumentFragment) what) 2732 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2733 else 2734 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2735 what.parentNode = this; 2736 what.parentDocument = this.parentDocument; 2737 return what; 2738 } 2739 } 2740 2741 return what; 2742 2743 assert(0); 2744 } 2745 2746 /// swaps one child for a new thing. Returns the old child which is now parentless. 2747 Element swapNode(Element child, Element replacement) 2748 in { 2749 assert(child !is null); 2750 assert(replacement !is null); 2751 assert(child.parentNode is this); 2752 } 2753 out(ret) { 2754 assert(ret is child); 2755 assert(ret.parentNode is null); 2756 assert(replacement.parentNode is this); 2757 assert(replacement.parentDocument is this.parentDocument); 2758 } 2759 body { 2760 foreach(ref c; this.children) 2761 if(c is child) { 2762 c.parentNode = null; 2763 c = replacement; 2764 c.parentNode = this; 2765 c.parentDocument = this.parentDocument; 2766 return child; 2767 } 2768 assert(0); 2769 } 2770 2771 2772 /++ 2773 Appends the given to the node. 2774 2775 2776 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2777 yields `<example>text <b>bold</b> hi</example>`. 2778 2779 See_Also: 2780 [firstInnerText], [directText], [innerText], [appendChild] 2781 +/ 2782 @scriptable 2783 Element appendText(string text) { 2784 Element e = new TextNode(parentDocument, text); 2785 appendChild(e); 2786 return this; 2787 } 2788 2789 /++ 2790 Returns child elements which are of a tag type (excludes text, comments, etc.). 2791 2792 2793 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2794 2795 Params: 2796 tagName = filter results to only the child elements with the given tag name. 2797 +/ 2798 @property Element[] childElements(string tagName = null) { 2799 Element[] ret; 2800 foreach(c; children) 2801 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2802 ret ~= c; 2803 return ret; 2804 } 2805 2806 /++ 2807 Appends the given html to the element, returning the elements appended 2808 2809 2810 This is similar to `element.innerHTML += "html string";` in Javascript. 2811 +/ 2812 @scriptable 2813 Element[] appendHtml(string html) { 2814 Document d = new Document("<root>" ~ html ~ "</root>"); 2815 return stealChildren(d.root); 2816 } 2817 2818 2819 ///. 2820 void insertChildAfter(Element child, Element where) 2821 in { 2822 assert(child !is null); 2823 assert(where !is null); 2824 assert(where.parentNode is this); 2825 assert(!selfClosed); 2826 //assert(isInArray(where, children)); 2827 } 2828 out { 2829 assert(child.parentNode is this); 2830 assert(where.parentNode is this); 2831 //assert(isInArray(where, children)); 2832 //assert(isInArray(child, children)); 2833 } 2834 body { 2835 foreach(ref i, c; children) { 2836 if(c is where) { 2837 i++; 2838 if(auto frag = cast(DocumentFragment) child) 2839 children = children[0..i] ~ child.children ~ children[i..$]; 2840 else 2841 children = children[0..i] ~ child ~ children[i..$]; 2842 child.parentNode = this; 2843 child.parentDocument = this.parentDocument; 2844 break; 2845 } 2846 } 2847 } 2848 2849 /++ 2850 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2851 2852 Params: 2853 e = the element whose children you want to steal 2854 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2855 +/ 2856 Element[] stealChildren(Element e, Element position = null) 2857 in { 2858 assert(!selfClosed); 2859 assert(e !is null); 2860 //if(position !is null) 2861 //assert(isInArray(position, children)); 2862 } 2863 out (ret) { 2864 assert(e.children.length == 0); 2865 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2866 version(none) 2867 debug foreach(child; ret) { 2868 assert(child.parentNode is this); 2869 assert(child.parentDocument is this.parentDocument); 2870 } 2871 } 2872 body { 2873 foreach(c; e.children) { 2874 c.parentNode = this; 2875 c.parentDocument = this.parentDocument; 2876 } 2877 if(position is null) 2878 children ~= e.children; 2879 else { 2880 foreach(i, child; children) { 2881 if(child is position) { 2882 children = children[0..i] ~ 2883 e.children ~ 2884 children[i..$]; 2885 break; 2886 } 2887 } 2888 } 2889 2890 auto ret = e.children[]; 2891 e.children.length = 0; 2892 2893 return ret; 2894 } 2895 2896 /// Puts the current element first in our children list. The given element must not have a parent already. 2897 Element prependChild(Element e) 2898 in { 2899 assert(e.parentNode is null); 2900 assert(!selfClosed); 2901 } 2902 out { 2903 assert(e.parentNode is this); 2904 assert(e.parentDocument is this.parentDocument); 2905 assert(children[0] is e); 2906 } 2907 body { 2908 e.parentNode = this; 2909 e.parentDocument = this.parentDocument; 2910 if(auto frag = cast(DocumentFragment) e) 2911 children = e.children ~ children; 2912 else 2913 children = e ~ children; 2914 return e; 2915 } 2916 2917 2918 /** 2919 Returns a string containing all child elements, formatted such that it could be pasted into 2920 an XML file. 2921 */ 2922 @property string innerHTML(Appender!string where = appender!string()) const { 2923 if(children is null) 2924 return ""; 2925 2926 auto start = where.data.length; 2927 2928 foreach(child; children) { 2929 assert(child !is null); 2930 2931 child.writeToAppender(where); 2932 } 2933 2934 return where.data[start .. $]; 2935 } 2936 2937 /** 2938 Takes some html and replaces the element's children with the tree made from the string. 2939 */ 2940 @property Element innerHTML(string html, bool strict = false) { 2941 if(html.length) 2942 selfClosed = false; 2943 2944 if(html.length == 0) { 2945 // I often say innerHTML = ""; as a shortcut to clear it out, 2946 // so let's optimize that slightly. 2947 removeAllChildren(); 2948 return this; 2949 } 2950 2951 auto doc = new Document(); 2952 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2953 2954 children = doc.root.children; 2955 foreach(c; children) { 2956 c.parentNode = this; 2957 c.parentDocument = this.parentDocument; 2958 } 2959 2960 reparentTreeDocuments(); 2961 2962 doc.root.children = null; 2963 2964 return this; 2965 } 2966 2967 /// ditto 2968 @property Element innerHTML(Html html) { 2969 return this.innerHTML = html.source; 2970 } 2971 2972 private void reparentTreeDocuments() { 2973 foreach(c; this.tree) 2974 c.parentDocument = this.parentDocument; 2975 } 2976 2977 /** 2978 Replaces this node with the given html string, which is parsed 2979 2980 Note: this invalidates the this reference, since it is removed 2981 from the tree. 2982 2983 Returns the new children that replace this. 2984 */ 2985 @property Element[] outerHTML(string html) { 2986 auto doc = new Document(); 2987 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 2988 2989 children = doc.root.children; 2990 foreach(c; children) { 2991 c.parentNode = this; 2992 c.parentDocument = this.parentDocument; 2993 } 2994 2995 2996 reparentTreeDocuments(); 2997 2998 2999 stripOut(); 3000 3001 return doc.root.children; 3002 } 3003 3004 /++ 3005 Returns all the html for this element, including the tag itself. 3006 3007 This is equivalent to calling toString(). 3008 +/ 3009 @property string outerHTML() { 3010 return this.toString(); 3011 } 3012 3013 /// This sets the inner content of the element *without* trying to parse it. 3014 /// You can inject any code in there; this serves as an escape hatch from the dom. 3015 /// 3016 /// The only times you might actually need it are for < style > and < script > tags in html. 3017 /// Other than that, innerHTML and/or innerText should do the job. 3018 @property void innerRawSource(string rawSource) { 3019 children.length = 0; 3020 auto rs = new RawSource(parentDocument, rawSource); 3021 rs.parentNode = this; 3022 3023 children ~= rs; 3024 } 3025 3026 ///. 3027 Element replaceChild(Element find, Element replace) 3028 in { 3029 assert(find !is null); 3030 assert(replace !is null); 3031 assert(replace.parentNode is null); 3032 } 3033 out(ret) { 3034 assert(ret is replace); 3035 assert(replace.parentNode is this); 3036 assert(replace.parentDocument is this.parentDocument); 3037 assert(find.parentNode is null); 3038 } 3039 body { 3040 // FIXME 3041 //if(auto frag = cast(DocumentFragment) replace) 3042 //return this.replaceChild(frag, replace.children); 3043 for(int i = 0; i < children.length; i++) { 3044 if(children[i] is find) { 3045 replace.parentNode = this; 3046 children[i].parentNode = null; 3047 children[i] = replace; 3048 replace.parentDocument = this.parentDocument; 3049 return replace; 3050 } 3051 } 3052 3053 throw new Exception("no such child"); 3054 } 3055 3056 /** 3057 Replaces the given element with a whole group. 3058 */ 3059 void replaceChild(Element find, Element[] replace) 3060 in { 3061 assert(find !is null); 3062 assert(replace !is null); 3063 assert(find.parentNode is this); 3064 debug foreach(r; replace) 3065 assert(r.parentNode is null); 3066 } 3067 out { 3068 assert(find.parentNode is null); 3069 assert(children.length >= replace.length); 3070 debug foreach(child; children) 3071 assert(child !is find); 3072 debug foreach(r; replace) 3073 assert(r.parentNode is this); 3074 } 3075 body { 3076 if(replace.length == 0) { 3077 removeChild(find); 3078 return; 3079 } 3080 assert(replace.length); 3081 for(int i = 0; i < children.length; i++) { 3082 if(children[i] is find) { 3083 children[i].parentNode = null; // this element should now be dead 3084 children[i] = replace[0]; 3085 foreach(e; replace) { 3086 e.parentNode = this; 3087 e.parentDocument = this.parentDocument; 3088 } 3089 3090 children = .insertAfter(children, i, replace[1..$]); 3091 3092 return; 3093 } 3094 } 3095 3096 throw new Exception("no such child"); 3097 } 3098 3099 3100 /** 3101 Removes the given child from this list. 3102 3103 Returns the removed element. 3104 */ 3105 Element removeChild(Element c) 3106 in { 3107 assert(c !is null); 3108 assert(c.parentNode is this); 3109 } 3110 out { 3111 debug foreach(child; children) 3112 assert(child !is c); 3113 assert(c.parentNode is null); 3114 } 3115 body { 3116 foreach(i, e; children) { 3117 if(e is c) { 3118 children = children[0..i] ~ children [i+1..$]; 3119 c.parentNode = null; 3120 return c; 3121 } 3122 } 3123 3124 throw new Exception("no such child"); 3125 } 3126 3127 /// This removes all the children from this element, returning the old list. 3128 Element[] removeChildren() 3129 out (ret) { 3130 assert(children.length == 0); 3131 debug foreach(r; ret) 3132 assert(r.parentNode is null); 3133 } 3134 body { 3135 Element[] oldChildren = children.dup; 3136 foreach(c; oldChildren) 3137 c.parentNode = null; 3138 3139 children.length = 0; 3140 3141 return oldChildren; 3142 } 3143 3144 /** 3145 Fetch the inside text, with all tags stripped out. 3146 3147 <p>cool <b>api</b> & code dude<p> 3148 innerText of that is "cool api & code dude". 3149 3150 This does not match what real innerText does! 3151 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3152 3153 It is more like textContent. 3154 */ 3155 @scriptable 3156 @property string innerText() const { 3157 string s; 3158 foreach(child; children) { 3159 if(child.nodeType != NodeType.Text) 3160 s ~= child.innerText; 3161 else 3162 s ~= child.nodeValue(); 3163 } 3164 return s; 3165 } 3166 3167 /// 3168 alias textContent = innerText; 3169 3170 /** 3171 Sets the inside text, replacing all children. You don't 3172 have to worry about entity encoding. 3173 */ 3174 @scriptable 3175 @property void innerText(string text) { 3176 selfClosed = false; 3177 Element e = new TextNode(parentDocument, text); 3178 e.parentNode = this; 3179 children = [e]; 3180 } 3181 3182 /** 3183 Strips this node out of the document, replacing it with the given text 3184 */ 3185 @property void outerText(string text) { 3186 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3187 } 3188 3189 /** 3190 Same result as innerText; the tag with all inner tags stripped out 3191 */ 3192 @property string outerText() const { 3193 return innerText; 3194 } 3195 3196 3197 /* ******************************* 3198 Miscellaneous 3199 *********************************/ 3200 3201 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3202 @property Element cloned() 3203 /+ 3204 out(ret) { 3205 // FIXME: not sure why these fail... 3206 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3207 assert(ret.tagName == this.tagName); 3208 } 3209 body { 3210 +/ 3211 { 3212 return this.cloneNode(true); 3213 } 3214 3215 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3216 Element cloneNode(bool deepClone) { 3217 auto e = Element.make(this.tagName); 3218 e.parentDocument = this.parentDocument; 3219 e.attributes = this.attributes.aadup; 3220 e.selfClosed = this.selfClosed; 3221 3222 if(deepClone) { 3223 foreach(child; children) { 3224 e.appendChild(child.cloneNode(true)); 3225 } 3226 } 3227 3228 3229 return e; 3230 } 3231 3232 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3233 string nodeValue() const { 3234 return ""; 3235 } 3236 3237 // should return int 3238 ///. 3239 @property int nodeType() const { 3240 return 1; 3241 } 3242 3243 3244 invariant () { 3245 assert(tagName.indexOf(" ") == -1); 3246 3247 if(children !is null) 3248 debug foreach(child; children) { 3249 // assert(parentNode !is null); 3250 assert(child !is null); 3251 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3252 assert(child !is this); 3253 //assert(child !is parentNode); 3254 } 3255 3256 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3257 if(parentNode !is null) { 3258 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3259 auto lol = cast(TextNode) this; 3260 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3261 } 3262 +/ 3263 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3264 // reason is so you can create these without needing a reference to the document 3265 } 3266 3267 /** 3268 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3269 an XML file. 3270 */ 3271 override string toString() const { 3272 return writeToAppender(); 3273 } 3274 3275 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3276 if(indentWith is null) 3277 return null; 3278 string s; 3279 3280 if(insertComments) s ~= "<!--"; 3281 s ~= "\n"; 3282 foreach(indent; 0 .. indentationLevel) 3283 s ~= indentWith; 3284 if(insertComments) s ~= "-->"; 3285 3286 return s; 3287 } 3288 3289 /++ 3290 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3291 for eyeball debugging. 3292 +/ 3293 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3294 3295 // first step is to concatenate any consecutive text nodes to simplify 3296 // the white space analysis. this changes the tree! but i'm allowed since 3297 // the comment always says it changes the comments 3298 // 3299 // actually i'm not allowed cuz it is const so i will cheat and lie 3300 /+ 3301 TextNode lastTextChild = null; 3302 for(int a = 0; a < this.children.length; a++) { 3303 auto child = this.children[a]; 3304 if(auto tn = cast(TextNode) child) { 3305 if(lastTextChild) { 3306 lastTextChild.contents ~= tn.contents; 3307 for(int b = a; b < this.children.length - 1; b++) 3308 this.children[b] = this.children[b + 1]; 3309 this.children = this.children[0 .. $-1]; 3310 } else { 3311 lastTextChild = tn; 3312 } 3313 } else { 3314 lastTextChild = null; 3315 } 3316 } 3317 +/ 3318 3319 const(Element)[] children; 3320 3321 TextNode lastTextChild = null; 3322 for(int a = 0; a < this.children.length; a++) { 3323 auto child = this.children[a]; 3324 if(auto tn = cast(const(TextNode)) child) { 3325 if(lastTextChild !is null) { 3326 lastTextChild.contents ~= tn.contents; 3327 } else { 3328 lastTextChild = new TextNode(""); 3329 lastTextChild.parentNode = cast(Element) this; 3330 lastTextChild.contents ~= tn.contents; 3331 children ~= lastTextChild; 3332 } 3333 } else { 3334 lastTextChild = null; 3335 children ~= child; 3336 } 3337 } 3338 3339 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3340 3341 s ~= "<"; 3342 s ~= tagName; 3343 3344 // i sort these for consistent output. might be more legible 3345 // but especially it keeps it the same for diff purposes. 3346 import std.algorithm : sort; 3347 auto keys = sort(attributes.keys); 3348 foreach(n; keys) { 3349 auto v = attributes[n]; 3350 s ~= " "; 3351 s ~= n; 3352 s ~= "=\""; 3353 s ~= htmlEntitiesEncode(v); 3354 s ~= "\""; 3355 } 3356 3357 if(selfClosed){ 3358 s ~= " />"; 3359 return s; 3360 } 3361 3362 s ~= ">"; 3363 3364 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3365 // just keep them on the same line 3366 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3367 foreach(child; children) { 3368 s ~= child.toString();//toPrettyString(false, 0, null); 3369 } 3370 } else { 3371 foreach(child; children) { 3372 assert(child !is null); 3373 3374 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3375 } 3376 3377 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3378 } 3379 3380 s ~= "</"; 3381 s ~= tagName; 3382 s ~= ">"; 3383 3384 return s; 3385 } 3386 3387 /+ 3388 /// Writes out the opening tag only, if applicable. 3389 string writeTagOnly(Appender!string where = appender!string()) const { 3390 +/ 3391 3392 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3393 /// Note: the ordering of attributes in the string is undefined. 3394 /// Returns the string it creates. 3395 string writeToAppender(Appender!string where = appender!string()) const { 3396 assert(tagName !is null); 3397 3398 where.reserve((this.children.length + 1) * 512); 3399 3400 auto start = where.data.length; 3401 3402 where.put("<"); 3403 where.put(tagName); 3404 3405 import std.algorithm : sort; 3406 auto keys = sort(attributes.keys); 3407 foreach(n; keys) { 3408 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3409 //assert(v !is null); 3410 where.put(" "); 3411 where.put(n); 3412 where.put("=\""); 3413 htmlEntitiesEncode(v, where); 3414 where.put("\""); 3415 } 3416 3417 if(selfClosed){ 3418 where.put(" />"); 3419 return where.data[start .. $]; 3420 } 3421 3422 where.put('>'); 3423 3424 innerHTML(where); 3425 3426 where.put("</"); 3427 where.put(tagName); 3428 where.put('>'); 3429 3430 return where.data[start .. $]; 3431 } 3432 3433 /** 3434 Returns a lazy range of all its children, recursively. 3435 */ 3436 @property ElementStream tree() { 3437 return new ElementStream(this); 3438 } 3439 3440 // I moved these from Form because they are generally useful. 3441 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3442 /// Tags: HTML, HTML5 3443 // FIXME: add overloads for other label types... 3444 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3445 auto fs = this; 3446 auto i = fs.addChild("label"); 3447 3448 if(!(type == "checkbox" || type == "radio")) 3449 i.addChild("span", label); 3450 3451 Element input; 3452 if(type == "textarea") 3453 input = i.addChild("textarea"). 3454 setAttribute("name", name). 3455 setAttribute("rows", "6"); 3456 else 3457 input = i.addChild("input"). 3458 setAttribute("name", name). 3459 setAttribute("type", type); 3460 3461 if(type == "checkbox" || type == "radio") 3462 i.addChild("span", label); 3463 3464 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3465 fieldOptions.applyToElement(input); 3466 return i; 3467 } 3468 3469 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3470 auto fs = this; 3471 auto i = fs.addChild("label"); 3472 i.addChild(label); 3473 Element input; 3474 if(type == "textarea") 3475 input = i.addChild("textarea"). 3476 setAttribute("name", name). 3477 setAttribute("rows", "6"); 3478 else 3479 input = i.addChild("input"). 3480 setAttribute("name", name). 3481 setAttribute("type", type); 3482 3483 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3484 fieldOptions.applyToElement(input); 3485 return i; 3486 } 3487 3488 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3489 return addField(label, name, "text", fieldOptions); 3490 } 3491 3492 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3493 auto fs = this; 3494 auto i = fs.addChild("label"); 3495 i.addChild("span", label); 3496 auto sel = i.addChild("select").setAttribute("name", name); 3497 3498 foreach(k, opt; options) 3499 sel.addChild("option", opt, k); 3500 3501 // FIXME: implement requirements somehow 3502 3503 return i; 3504 } 3505 3506 Element addSubmitButton(string label = null) { 3507 auto t = this; 3508 auto holder = t.addChild("div"); 3509 holder.addClass("submit-holder"); 3510 auto i = holder.addChild("input"); 3511 i.type = "submit"; 3512 if(label.length) 3513 i.value = label; 3514 return holder; 3515 } 3516 3517 } 3518 3519 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3520 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3521 /// Group: core_functionality 3522 class XmlDocument : Document { 3523 this(string data) { 3524 contentType = "text/xml; charset=utf-8"; 3525 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3526 3527 parseStrict(data); 3528 } 3529 } 3530 3531 3532 3533 3534 import std.string; 3535 3536 /* domconvenience follows { */ 3537 3538 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3539 /// Group: core_functionality 3540 Element[] findComments(Document document, string txt) { 3541 return findComments(document.root, txt); 3542 } 3543 3544 /// ditto 3545 Element[] findComments(Element element, string txt) { 3546 txt = txt.strip().toLower(); 3547 Element[] ret; 3548 3549 foreach(comment; element.getElementsByTagName("#comment")) { 3550 string t = comment.nodeValue().strip().toLower(); 3551 if(t == txt) 3552 ret ~= comment; 3553 } 3554 3555 return ret; 3556 } 3557 3558 /// An option type that propagates null. See: [Element.optionSelector] 3559 /// Group: implementations 3560 struct MaybeNullElement(SomeElementType) { 3561 this(SomeElementType ele) { 3562 this.element = ele; 3563 } 3564 SomeElementType element; 3565 3566 /// Forwards to the element, wit a null check inserted that propagates null. 3567 auto opDispatch(string method, T...)(T args) { 3568 alias type = typeof(__traits(getMember, element, method)(args)); 3569 static if(is(type : Element)) { 3570 if(element is null) 3571 return MaybeNullElement!type(null); 3572 return __traits(getMember, element, method)(args); 3573 } else static if(is(type == string)) { 3574 if(element is null) 3575 return cast(string) null; 3576 return __traits(getMember, element, method)(args); 3577 } else static if(is(type == void)) { 3578 if(element is null) 3579 return; 3580 __traits(getMember, element, method)(args); 3581 } else { 3582 static assert(0); 3583 } 3584 } 3585 3586 /// Allows implicit casting to the wrapped element. 3587 alias element this; 3588 } 3589 3590 /++ 3591 A collection of elements which forwards methods to the children. 3592 +/ 3593 /// Group: implementations 3594 struct ElementCollection { 3595 /// 3596 this(Element e) { 3597 elements = [e]; 3598 } 3599 3600 /// 3601 this(Element e, string selector) { 3602 elements = e.querySelectorAll(selector); 3603 } 3604 3605 /// 3606 this(Element[] e) { 3607 elements = e; 3608 } 3609 3610 Element[] elements; 3611 //alias elements this; // let it implicitly convert to the underlying array 3612 3613 /// 3614 ElementCollection opIndex(string selector) { 3615 ElementCollection ec; 3616 foreach(e; elements) 3617 ec.elements ~= e.getElementsBySelector(selector); 3618 return ec; 3619 } 3620 3621 /// 3622 Element opIndex(int i) { 3623 return elements[i]; 3624 } 3625 3626 /// if you slice it, give the underlying array for easy forwarding of the 3627 /// collection to range expecting algorithms or looping over. 3628 Element[] opSlice() { 3629 return elements; 3630 } 3631 3632 /// And input range primitives so we can foreach over this 3633 void popFront() { 3634 elements = elements[1..$]; 3635 } 3636 3637 /// ditto 3638 Element front() { 3639 return elements[0]; 3640 } 3641 3642 /// ditto 3643 bool empty() { 3644 return !elements.length; 3645 } 3646 3647 /++ 3648 Collects strings from the collection, concatenating them together 3649 Kinda like running reduce and ~= on it. 3650 3651 --- 3652 document["p"].collect!"innerText"; 3653 --- 3654 +/ 3655 string collect(string method)(string separator = "") { 3656 string text; 3657 foreach(e; elements) { 3658 text ~= mixin("e." ~ method); 3659 text ~= separator; 3660 } 3661 return text; 3662 } 3663 3664 /// Forward method calls to each individual [Element|element] of the collection 3665 /// returns this so it can be chained. 3666 ElementCollection opDispatch(string name, T...)(T t) { 3667 foreach(e; elements) { 3668 mixin("e." ~ name)(t); 3669 } 3670 return this; 3671 } 3672 3673 /++ 3674 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3675 +/ 3676 ElementCollection wrapIn(Element what) { 3677 foreach(e; elements) { 3678 e.wrapIn(what.cloneNode(false)); 3679 } 3680 3681 return this; 3682 } 3683 3684 /// Concatenates two ElementCollection together. 3685 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3686 return ElementCollection(this.elements ~ rhs.elements); 3687 } 3688 } 3689 3690 3691 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3692 /// Group: implementations 3693 mixin template JavascriptStyleDispatch() { 3694 /// 3695 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3696 if(v !is null) 3697 return set(name, v); 3698 return get(name); 3699 } 3700 3701 /// 3702 string opIndex(string key) const { 3703 return get(key); 3704 } 3705 3706 /// 3707 string opIndexAssign(string value, string field) { 3708 return set(field, value); 3709 } 3710 3711 // FIXME: doesn't seem to work 3712 string* opBinary(string op)(string key) if(op == "in") { 3713 return key in fields; 3714 } 3715 } 3716 3717 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3718 /// 3719 /// Do not create this object directly. 3720 /// Group: implementations 3721 struct DataSet { 3722 /// 3723 this(Element e) { 3724 this._element = e; 3725 } 3726 3727 private Element _element; 3728 /// 3729 string set(string name, string value) { 3730 _element.setAttribute("data-" ~ unCamelCase(name), value); 3731 return value; 3732 } 3733 3734 /// 3735 string get(string name) const { 3736 return _element.getAttribute("data-" ~ unCamelCase(name)); 3737 } 3738 3739 /// 3740 mixin JavascriptStyleDispatch!(); 3741 } 3742 3743 /// Proxy object for attributes which will replace the main opDispatch eventually 3744 /// Group: implementations 3745 struct AttributeSet { 3746 /// 3747 this(Element e) { 3748 this._element = e; 3749 } 3750 3751 private Element _element; 3752 /// 3753 string set(string name, string value) { 3754 _element.setAttribute(name, value); 3755 return value; 3756 } 3757 3758 /// 3759 string get(string name) const { 3760 return _element.getAttribute(name); 3761 } 3762 3763 /// 3764 mixin JavascriptStyleDispatch!(); 3765 } 3766 3767 3768 3769 /// for style, i want to be able to set it with a string like a plain attribute, 3770 /// but also be able to do properties Javascript style. 3771 3772 /// Group: implementations 3773 struct ElementStyle { 3774 this(Element parent) { 3775 _element = parent; 3776 } 3777 3778 Element _element; 3779 3780 @property ref inout(string) _attribute() inout { 3781 auto s = "style" in _element.attributes; 3782 if(s is null) { 3783 auto e = cast() _element; // const_cast 3784 e.attributes["style"] = ""; // we need something to reference 3785 s = cast(inout) ("style" in e.attributes); 3786 } 3787 3788 assert(s !is null); 3789 return *s; 3790 } 3791 3792 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3793 3794 string set(string name, string value) { 3795 if(name.length == 0) 3796 return value; 3797 if(name == "cssFloat") 3798 name = "float"; 3799 else 3800 name = unCamelCase(name); 3801 auto r = rules(); 3802 r[name] = value; 3803 3804 _attribute = ""; 3805 foreach(k, v; r) { 3806 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3807 continue; 3808 if(_attribute.length) 3809 _attribute ~= " "; 3810 _attribute ~= k ~ ": " ~ v ~ ";"; 3811 } 3812 3813 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3814 3815 return value; 3816 } 3817 string get(string name) const { 3818 if(name == "cssFloat") 3819 name = "float"; 3820 else 3821 name = unCamelCase(name); 3822 auto r = rules(); 3823 if(name in r) 3824 return r[name]; 3825 return null; 3826 } 3827 3828 string[string] rules() const { 3829 string[string] ret; 3830 foreach(rule; _attribute.split(";")) { 3831 rule = rule.strip(); 3832 if(rule.length == 0) 3833 continue; 3834 auto idx = rule.indexOf(":"); 3835 if(idx == -1) 3836 ret[rule] = ""; 3837 else { 3838 auto name = rule[0 .. idx].strip(); 3839 auto value = rule[idx + 1 .. $].strip(); 3840 3841 ret[name] = value; 3842 } 3843 } 3844 3845 return ret; 3846 } 3847 3848 mixin JavascriptStyleDispatch!(); 3849 } 3850 3851 /// Converts a camel cased propertyName to a css style dashed property-name 3852 string unCamelCase(string a) { 3853 string ret; 3854 foreach(c; a) 3855 if((c >= 'A' && c <= 'Z')) 3856 ret ~= "-" ~ toLower("" ~ c)[0]; 3857 else 3858 ret ~= c; 3859 return ret; 3860 } 3861 3862 /// Translates a css style property-name to a camel cased propertyName 3863 string camelCase(string a) { 3864 string ret; 3865 bool justSawDash = false; 3866 foreach(c; a) 3867 if(c == '-') { 3868 justSawDash = true; 3869 } else { 3870 if(justSawDash) { 3871 justSawDash = false; 3872 ret ~= toUpper("" ~ c); 3873 } else 3874 ret ~= c; 3875 } 3876 return ret; 3877 } 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 // domconvenience ends } 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 // @safe: 3900 3901 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3902 // Instead, override writeToAppender(); 3903 3904 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3905 3906 // Stripping them is useful for reading php as html.... but adding them 3907 // is good for building php. 3908 3909 // I need to maintain compatibility with the way it is now too. 3910 3911 import std.string; 3912 import std.exception; 3913 import std.uri; 3914 import std.array; 3915 import std.range; 3916 3917 //import std.stdio; 3918 3919 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3920 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3921 // most likely a typo so I say kill kill kill. 3922 3923 3924 /++ 3925 This might belong in another module, but it represents a file with a mime type and some data. 3926 Document implements this interface with type = text/html (see Document.contentType for more info) 3927 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3928 +/ 3929 /// Group: bonus_functionality 3930 interface FileResource { 3931 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3932 @property string contentType() const; 3933 /// the data 3934 immutable(ubyte)[] getData() const; 3935 } 3936 3937 3938 3939 3940 ///. 3941 /// Group: bonus_functionality 3942 enum NodeType { Text = 3 } 3943 3944 3945 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3946 /// Group: core_functionality 3947 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 3948 in {} 3949 out(ret) { assert(ret !is null); } 3950 body { 3951 auto ret = cast(T) e; 3952 if(ret is null) 3953 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3954 return ret; 3955 } 3956 3957 3958 ///. 3959 /// Group: core_functionality 3960 class DocumentFragment : Element { 3961 ///. 3962 this(Document _parentDocument) { 3963 tagName = "#fragment"; 3964 super(_parentDocument); 3965 } 3966 3967 /++ 3968 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 3969 3970 Since: March 29, 2018 (or git tagged v2.1.0) 3971 +/ 3972 this(Html html) { 3973 this(null); 3974 3975 this.innerHTML = html.source; 3976 } 3977 3978 ///. 3979 override string writeToAppender(Appender!string where = appender!string()) const { 3980 return this.innerHTML(where); 3981 } 3982 3983 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 3984 string s; 3985 foreach(child; children) 3986 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 3987 return s; 3988 } 3989 3990 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 3991 /* 3992 override inout(Element) parentNode() inout { 3993 return children.length ? children[0].parentNode : null; 3994 } 3995 */ 3996 override Element parentNode(Element p) { 3997 this._parentNode = p; 3998 foreach(child; children) 3999 child.parentNode = p; 4000 return p; 4001 } 4002 } 4003 4004 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4005 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4006 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4007 /// 4008 /// The output parameter can be given to append to an existing buffer. You don't have to 4009 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4010 /// Group: core_functionality 4011 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4012 // if there's no entities, we can save a lot of time by not bothering with the 4013 // decoding loop. This check cuts the net toString time by better than half in my test. 4014 // let me know if it made your tests worse though, since if you use an entity in just about 4015 // every location, the check will add time... but I suspect the average experience is like mine 4016 // since the check gives up as soon as it can anyway. 4017 4018 bool shortcut = true; 4019 foreach(char c; data) { 4020 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4021 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4022 shortcut = false; // there's actual work to be done 4023 break; 4024 } 4025 } 4026 4027 if(shortcut) { 4028 output.put(data); 4029 return data; 4030 } 4031 4032 auto start = output.data.length; 4033 4034 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4035 4036 foreach(dchar d; data) { 4037 if(d == '&') 4038 output.put("&"); 4039 else if (d == '<') 4040 output.put("<"); 4041 else if (d == '>') 4042 output.put(">"); 4043 else if (d == '\"') 4044 output.put("""); 4045 // else if (d == '\'') 4046 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4047 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4048 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4049 // idk about apostrophes though. Might be worth it, might not. 4050 else if (!encodeNonAscii || (d < 128 && d > 0)) 4051 output.put(d); 4052 else 4053 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4054 } 4055 4056 //assert(output !is null); // this fails on empty attributes..... 4057 return output.data[start .. $]; 4058 4059 // data = data.replace("\u00a0", " "); 4060 } 4061 4062 /// An alias for htmlEntitiesEncode; it works for xml too 4063 /// Group: core_functionality 4064 string xmlEntitiesEncode(string data) { 4065 return htmlEntitiesEncode(data); 4066 } 4067 4068 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4069 /// Group: core_functionality 4070 dchar parseEntity(in dchar[] entity) { 4071 switch(entity[1..$-1]) { 4072 case "quot": 4073 return '"'; 4074 case "apos": 4075 return '\''; 4076 case "lt": 4077 return '<'; 4078 case "gt": 4079 return '>'; 4080 case "amp": 4081 return '&'; 4082 // the next are html rather than xml 4083 4084 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4085 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4086 case "Tab": return '\u0009'; 4087 case "NewLine": return '\u000A'; 4088 case "excl": return '\u0021'; 4089 case "QUOT": return '\u0022'; 4090 case "num": return '\u0023'; 4091 case "dollar": return '\u0024'; 4092 case "percnt": return '\u0025'; 4093 case "AMP": return '\u0026'; 4094 case "lpar": return '\u0028'; 4095 case "rpar": return '\u0029'; 4096 case "ast": case "midast": return '\u002A'; 4097 case "plus": return '\u002B'; 4098 case "comma": return '\u002C'; 4099 case "period": return '\u002E'; 4100 case "sol": return '\u002F'; 4101 case "colon": return '\u003A'; 4102 case "semi": return '\u003B'; 4103 case "LT": return '\u003C'; 4104 case "equals": return '\u003D'; 4105 case "GT": return '\u003E'; 4106 case "quest": return '\u003F'; 4107 case "commat": return '\u0040'; 4108 case "lsqb": case "lbrack": return '\u005B'; 4109 case "bsol": return '\u005C'; 4110 case "rsqb": case "rbrack": return '\u005D'; 4111 case "Hat": return '\u005E'; 4112 case "lowbar": case "UnderBar": return '\u005F'; 4113 case "grave": case "DiacriticalGrave": return '\u0060'; 4114 case "lcub": case "lbrace": return '\u007B'; 4115 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4116 case "rcub": case "rbrace": return '\u007D'; 4117 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4118 case "iexcl": return '\u00A1'; 4119 case "cent": return '\u00A2'; 4120 case "pound": return '\u00A3'; 4121 case "curren": return '\u00A4'; 4122 case "yen": return '\u00A5'; 4123 case "brvbar": return '\u00A6'; 4124 case "sect": return '\u00A7'; 4125 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4126 case "copy": case "COPY": return '\u00A9'; 4127 case "ordf": return '\u00AA'; 4128 case "laquo": return '\u00AB'; 4129 case "not": return '\u00AC'; 4130 case "shy": return '\u00AD'; 4131 case "reg": case "circledR": case "REG": return '\u00AE'; 4132 case "macr": case "strns": return '\u00AF'; 4133 case "deg": return '\u00B0'; 4134 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4135 case "sup2": return '\u00B2'; 4136 case "sup3": return '\u00B3'; 4137 case "acute": case "DiacriticalAcute": return '\u00B4'; 4138 case "micro": return '\u00B5'; 4139 case "para": return '\u00B6'; 4140 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4141 case "cedil": case "Cedilla": return '\u00B8'; 4142 case "sup1": return '\u00B9'; 4143 case "ordm": return '\u00BA'; 4144 case "raquo": return '\u00BB'; 4145 case "frac14": return '\u00BC'; 4146 case "frac12": case "half": return '\u00BD'; 4147 case "frac34": return '\u00BE'; 4148 case "iquest": return '\u00BF'; 4149 case "Agrave": return '\u00C0'; 4150 case "Aacute": return '\u00C1'; 4151 case "Acirc": return '\u00C2'; 4152 case "Atilde": return '\u00C3'; 4153 case "Auml": return '\u00C4'; 4154 case "Aring": case "angst": return '\u00C5'; 4155 case "AElig": return '\u00C6'; 4156 case "Ccedil": return '\u00C7'; 4157 case "Egrave": return '\u00C8'; 4158 case "Eacute": return '\u00C9'; 4159 case "Ecirc": return '\u00CA'; 4160 case "Euml": return '\u00CB'; 4161 case "Igrave": return '\u00CC'; 4162 case "Iacute": return '\u00CD'; 4163 case "Icirc": return '\u00CE'; 4164 case "Iuml": return '\u00CF'; 4165 case "ETH": return '\u00D0'; 4166 case "Ntilde": return '\u00D1'; 4167 case "Ograve": return '\u00D2'; 4168 case "Oacute": return '\u00D3'; 4169 case "Ocirc": return '\u00D4'; 4170 case "Otilde": return '\u00D5'; 4171 case "Ouml": return '\u00D6'; 4172 case "times": return '\u00D7'; 4173 case "Oslash": return '\u00D8'; 4174 case "Ugrave": return '\u00D9'; 4175 case "Uacute": return '\u00DA'; 4176 case "Ucirc": return '\u00DB'; 4177 case "Uuml": return '\u00DC'; 4178 case "Yacute": return '\u00DD'; 4179 case "THORN": return '\u00DE'; 4180 case "szlig": return '\u00DF'; 4181 case "agrave": return '\u00E0'; 4182 case "aacute": return '\u00E1'; 4183 case "acirc": return '\u00E2'; 4184 case "atilde": return '\u00E3'; 4185 case "auml": return '\u00E4'; 4186 case "aring": return '\u00E5'; 4187 case "aelig": return '\u00E6'; 4188 case "ccedil": return '\u00E7'; 4189 case "egrave": return '\u00E8'; 4190 case "eacute": return '\u00E9'; 4191 case "ecirc": return '\u00EA'; 4192 case "euml": return '\u00EB'; 4193 case "igrave": return '\u00EC'; 4194 case "iacute": return '\u00ED'; 4195 case "icirc": return '\u00EE'; 4196 case "iuml": return '\u00EF'; 4197 case "eth": return '\u00F0'; 4198 case "ntilde": return '\u00F1'; 4199 case "ograve": return '\u00F2'; 4200 case "oacute": return '\u00F3'; 4201 case "ocirc": return '\u00F4'; 4202 case "otilde": return '\u00F5'; 4203 case "ouml": return '\u00F6'; 4204 case "divide": case "div": return '\u00F7'; 4205 case "oslash": return '\u00F8'; 4206 case "ugrave": return '\u00F9'; 4207 case "uacute": return '\u00FA'; 4208 case "ucirc": return '\u00FB'; 4209 case "uuml": return '\u00FC'; 4210 case "yacute": return '\u00FD'; 4211 case "thorn": return '\u00FE'; 4212 case "yuml": return '\u00FF'; 4213 case "Amacr": return '\u0100'; 4214 case "amacr": return '\u0101'; 4215 case "Abreve": return '\u0102'; 4216 case "abreve": return '\u0103'; 4217 case "Aogon": return '\u0104'; 4218 case "aogon": return '\u0105'; 4219 case "Cacute": return '\u0106'; 4220 case "cacute": return '\u0107'; 4221 case "Ccirc": return '\u0108'; 4222 case "ccirc": return '\u0109'; 4223 case "Cdot": return '\u010A'; 4224 case "cdot": return '\u010B'; 4225 case "Ccaron": return '\u010C'; 4226 case "ccaron": return '\u010D'; 4227 case "Dcaron": return '\u010E'; 4228 case "dcaron": return '\u010F'; 4229 case "Dstrok": return '\u0110'; 4230 case "dstrok": return '\u0111'; 4231 case "Emacr": return '\u0112'; 4232 case "emacr": return '\u0113'; 4233 case "Edot": return '\u0116'; 4234 case "edot": return '\u0117'; 4235 case "Eogon": return '\u0118'; 4236 case "eogon": return '\u0119'; 4237 case "Ecaron": return '\u011A'; 4238 case "ecaron": return '\u011B'; 4239 case "Gcirc": return '\u011C'; 4240 case "gcirc": return '\u011D'; 4241 case "Gbreve": return '\u011E'; 4242 case "gbreve": return '\u011F'; 4243 case "Gdot": return '\u0120'; 4244 case "gdot": return '\u0121'; 4245 case "Gcedil": return '\u0122'; 4246 case "Hcirc": return '\u0124'; 4247 case "hcirc": return '\u0125'; 4248 case "Hstrok": return '\u0126'; 4249 case "hstrok": return '\u0127'; 4250 case "Itilde": return '\u0128'; 4251 case "itilde": return '\u0129'; 4252 case "Imacr": return '\u012A'; 4253 case "imacr": return '\u012B'; 4254 case "Iogon": return '\u012E'; 4255 case "iogon": return '\u012F'; 4256 case "Idot": return '\u0130'; 4257 case "imath": case "inodot": return '\u0131'; 4258 case "IJlig": return '\u0132'; 4259 case "ijlig": return '\u0133'; 4260 case "Jcirc": return '\u0134'; 4261 case "jcirc": return '\u0135'; 4262 case "Kcedil": return '\u0136'; 4263 case "kcedil": return '\u0137'; 4264 case "kgreen": return '\u0138'; 4265 case "Lacute": return '\u0139'; 4266 case "lacute": return '\u013A'; 4267 case "Lcedil": return '\u013B'; 4268 case "lcedil": return '\u013C'; 4269 case "Lcaron": return '\u013D'; 4270 case "lcaron": return '\u013E'; 4271 case "Lmidot": return '\u013F'; 4272 case "lmidot": return '\u0140'; 4273 case "Lstrok": return '\u0141'; 4274 case "lstrok": return '\u0142'; 4275 case "Nacute": return '\u0143'; 4276 case "nacute": return '\u0144'; 4277 case "Ncedil": return '\u0145'; 4278 case "ncedil": return '\u0146'; 4279 case "Ncaron": return '\u0147'; 4280 case "ncaron": return '\u0148'; 4281 case "napos": return '\u0149'; 4282 case "ENG": return '\u014A'; 4283 case "eng": return '\u014B'; 4284 case "Omacr": return '\u014C'; 4285 case "omacr": return '\u014D'; 4286 case "Odblac": return '\u0150'; 4287 case "odblac": return '\u0151'; 4288 case "OElig": return '\u0152'; 4289 case "oelig": return '\u0153'; 4290 case "Racute": return '\u0154'; 4291 case "racute": return '\u0155'; 4292 case "Rcedil": return '\u0156'; 4293 case "rcedil": return '\u0157'; 4294 case "Rcaron": return '\u0158'; 4295 case "rcaron": return '\u0159'; 4296 case "Sacute": return '\u015A'; 4297 case "sacute": return '\u015B'; 4298 case "Scirc": return '\u015C'; 4299 case "scirc": return '\u015D'; 4300 case "Scedil": return '\u015E'; 4301 case "scedil": return '\u015F'; 4302 case "Scaron": return '\u0160'; 4303 case "scaron": return '\u0161'; 4304 case "Tcedil": return '\u0162'; 4305 case "tcedil": return '\u0163'; 4306 case "Tcaron": return '\u0164'; 4307 case "tcaron": return '\u0165'; 4308 case "Tstrok": return '\u0166'; 4309 case "tstrok": return '\u0167'; 4310 case "Utilde": return '\u0168'; 4311 case "utilde": return '\u0169'; 4312 case "Umacr": return '\u016A'; 4313 case "umacr": return '\u016B'; 4314 case "Ubreve": return '\u016C'; 4315 case "ubreve": return '\u016D'; 4316 case "Uring": return '\u016E'; 4317 case "uring": return '\u016F'; 4318 case "Udblac": return '\u0170'; 4319 case "udblac": return '\u0171'; 4320 case "Uogon": return '\u0172'; 4321 case "uogon": return '\u0173'; 4322 case "Wcirc": return '\u0174'; 4323 case "wcirc": return '\u0175'; 4324 case "Ycirc": return '\u0176'; 4325 case "ycirc": return '\u0177'; 4326 case "Yuml": return '\u0178'; 4327 case "Zacute": return '\u0179'; 4328 case "zacute": return '\u017A'; 4329 case "Zdot": return '\u017B'; 4330 case "zdot": return '\u017C'; 4331 case "Zcaron": return '\u017D'; 4332 case "zcaron": return '\u017E'; 4333 case "fnof": return '\u0192'; 4334 case "imped": return '\u01B5'; 4335 case "gacute": return '\u01F5'; 4336 case "jmath": return '\u0237'; 4337 case "circ": return '\u02C6'; 4338 case "caron": case "Hacek": return '\u02C7'; 4339 case "breve": case "Breve": return '\u02D8'; 4340 case "dot": case "DiacriticalDot": return '\u02D9'; 4341 case "ring": return '\u02DA'; 4342 case "ogon": return '\u02DB'; 4343 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4344 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4345 case "DownBreve": return '\u0311'; 4346 case "Alpha": return '\u0391'; 4347 case "Beta": return '\u0392'; 4348 case "Gamma": return '\u0393'; 4349 case "Delta": return '\u0394'; 4350 case "Epsilon": return '\u0395'; 4351 case "Zeta": return '\u0396'; 4352 case "Eta": return '\u0397'; 4353 case "Theta": return '\u0398'; 4354 case "Iota": return '\u0399'; 4355 case "Kappa": return '\u039A'; 4356 case "Lambda": return '\u039B'; 4357 case "Mu": return '\u039C'; 4358 case "Nu": return '\u039D'; 4359 case "Xi": return '\u039E'; 4360 case "Omicron": return '\u039F'; 4361 case "Pi": return '\u03A0'; 4362 case "Rho": return '\u03A1'; 4363 case "Sigma": return '\u03A3'; 4364 case "Tau": return '\u03A4'; 4365 case "Upsilon": return '\u03A5'; 4366 case "Phi": return '\u03A6'; 4367 case "Chi": return '\u03A7'; 4368 case "Psi": return '\u03A8'; 4369 case "Omega": case "ohm": return '\u03A9'; 4370 case "alpha": return '\u03B1'; 4371 case "beta": return '\u03B2'; 4372 case "gamma": return '\u03B3'; 4373 case "delta": return '\u03B4'; 4374 case "epsi": case "epsilon": return '\u03B5'; 4375 case "zeta": return '\u03B6'; 4376 case "eta": return '\u03B7'; 4377 case "theta": return '\u03B8'; 4378 case "iota": return '\u03B9'; 4379 case "kappa": return '\u03BA'; 4380 case "lambda": return '\u03BB'; 4381 case "mu": return '\u03BC'; 4382 case "nu": return '\u03BD'; 4383 case "xi": return '\u03BE'; 4384 case "omicron": return '\u03BF'; 4385 case "pi": return '\u03C0'; 4386 case "rho": return '\u03C1'; 4387 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4388 case "sigma": return '\u03C3'; 4389 case "tau": return '\u03C4'; 4390 case "upsi": case "upsilon": return '\u03C5'; 4391 case "phi": return '\u03C6'; 4392 case "chi": return '\u03C7'; 4393 case "psi": return '\u03C8'; 4394 case "omega": return '\u03C9'; 4395 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4396 case "Upsi": case "upsih": return '\u03D2'; 4397 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4398 case "piv": case "varpi": return '\u03D6'; 4399 case "Gammad": return '\u03DC'; 4400 case "gammad": case "digamma": return '\u03DD'; 4401 case "kappav": case "varkappa": return '\u03F0'; 4402 case "rhov": case "varrho": return '\u03F1'; 4403 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4404 case "bepsi": case "backepsilon": return '\u03F6'; 4405 case "IOcy": return '\u0401'; 4406 case "DJcy": return '\u0402'; 4407 case "GJcy": return '\u0403'; 4408 case "Jukcy": return '\u0404'; 4409 case "DScy": return '\u0405'; 4410 case "Iukcy": return '\u0406'; 4411 case "YIcy": return '\u0407'; 4412 case "Jsercy": return '\u0408'; 4413 case "LJcy": return '\u0409'; 4414 case "NJcy": return '\u040A'; 4415 case "TSHcy": return '\u040B'; 4416 case "KJcy": return '\u040C'; 4417 case "Ubrcy": return '\u040E'; 4418 case "DZcy": return '\u040F'; 4419 case "Acy": return '\u0410'; 4420 case "Bcy": return '\u0411'; 4421 case "Vcy": return '\u0412'; 4422 case "Gcy": return '\u0413'; 4423 case "Dcy": return '\u0414'; 4424 case "IEcy": return '\u0415'; 4425 case "ZHcy": return '\u0416'; 4426 case "Zcy": return '\u0417'; 4427 case "Icy": return '\u0418'; 4428 case "Jcy": return '\u0419'; 4429 case "Kcy": return '\u041A'; 4430 case "Lcy": return '\u041B'; 4431 case "Mcy": return '\u041C'; 4432 case "Ncy": return '\u041D'; 4433 case "Ocy": return '\u041E'; 4434 case "Pcy": return '\u041F'; 4435 case "Rcy": return '\u0420'; 4436 case "Scy": return '\u0421'; 4437 case "Tcy": return '\u0422'; 4438 case "Ucy": return '\u0423'; 4439 case "Fcy": return '\u0424'; 4440 case "KHcy": return '\u0425'; 4441 case "TScy": return '\u0426'; 4442 case "CHcy": return '\u0427'; 4443 case "SHcy": return '\u0428'; 4444 case "SHCHcy": return '\u0429'; 4445 case "HARDcy": return '\u042A'; 4446 case "Ycy": return '\u042B'; 4447 case "SOFTcy": return '\u042C'; 4448 case "Ecy": return '\u042D'; 4449 case "YUcy": return '\u042E'; 4450 case "YAcy": return '\u042F'; 4451 case "acy": return '\u0430'; 4452 case "bcy": return '\u0431'; 4453 case "vcy": return '\u0432'; 4454 case "gcy": return '\u0433'; 4455 case "dcy": return '\u0434'; 4456 case "iecy": return '\u0435'; 4457 case "zhcy": return '\u0436'; 4458 case "zcy": return '\u0437'; 4459 case "icy": return '\u0438'; 4460 case "jcy": return '\u0439'; 4461 case "kcy": return '\u043A'; 4462 case "lcy": return '\u043B'; 4463 case "mcy": return '\u043C'; 4464 case "ncy": return '\u043D'; 4465 case "ocy": return '\u043E'; 4466 case "pcy": return '\u043F'; 4467 case "rcy": return '\u0440'; 4468 case "scy": return '\u0441'; 4469 case "tcy": return '\u0442'; 4470 case "ucy": return '\u0443'; 4471 case "fcy": return '\u0444'; 4472 case "khcy": return '\u0445'; 4473 case "tscy": return '\u0446'; 4474 case "chcy": return '\u0447'; 4475 case "shcy": return '\u0448'; 4476 case "shchcy": return '\u0449'; 4477 case "hardcy": return '\u044A'; 4478 case "ycy": return '\u044B'; 4479 case "softcy": return '\u044C'; 4480 case "ecy": return '\u044D'; 4481 case "yucy": return '\u044E'; 4482 case "yacy": return '\u044F'; 4483 case "iocy": return '\u0451'; 4484 case "djcy": return '\u0452'; 4485 case "gjcy": return '\u0453'; 4486 case "jukcy": return '\u0454'; 4487 case "dscy": return '\u0455'; 4488 case "iukcy": return '\u0456'; 4489 case "yicy": return '\u0457'; 4490 case "jsercy": return '\u0458'; 4491 case "ljcy": return '\u0459'; 4492 case "njcy": return '\u045A'; 4493 case "tshcy": return '\u045B'; 4494 case "kjcy": return '\u045C'; 4495 case "ubrcy": return '\u045E'; 4496 case "dzcy": return '\u045F'; 4497 case "ensp": return '\u2002'; 4498 case "emsp": return '\u2003'; 4499 case "emsp13": return '\u2004'; 4500 case "emsp14": return '\u2005'; 4501 case "numsp": return '\u2007'; 4502 case "puncsp": return '\u2008'; 4503 case "thinsp": case "ThinSpace": return '\u2009'; 4504 case "hairsp": case "VeryThinSpace": return '\u200A'; 4505 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4506 case "zwnj": return '\u200C'; 4507 case "zwj": return '\u200D'; 4508 case "lrm": return '\u200E'; 4509 case "rlm": return '\u200F'; 4510 case "hyphen": case "dash": return '\u2010'; 4511 case "ndash": return '\u2013'; 4512 case "mdash": return '\u2014'; 4513 case "horbar": return '\u2015'; 4514 case "Verbar": case "Vert": return '\u2016'; 4515 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4516 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4517 case "lsquor": case "sbquo": return '\u201A'; 4518 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4519 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4520 case "ldquor": case "bdquo": return '\u201E'; 4521 case "dagger": return '\u2020'; 4522 case "Dagger": case "ddagger": return '\u2021'; 4523 case "bull": case "bullet": return '\u2022'; 4524 case "nldr": return '\u2025'; 4525 case "hellip": case "mldr": return '\u2026'; 4526 case "permil": return '\u2030'; 4527 case "pertenk": return '\u2031'; 4528 case "prime": return '\u2032'; 4529 case "Prime": return '\u2033'; 4530 case "tprime": return '\u2034'; 4531 case "bprime": case "backprime": return '\u2035'; 4532 case "lsaquo": return '\u2039'; 4533 case "rsaquo": return '\u203A'; 4534 case "oline": case "OverBar": return '\u203E'; 4535 case "caret": return '\u2041'; 4536 case "hybull": return '\u2043'; 4537 case "frasl": return '\u2044'; 4538 case "bsemi": return '\u204F'; 4539 case "qprime": return '\u2057'; 4540 case "MediumSpace": return '\u205F'; 4541 case "NoBreak": return '\u2060'; 4542 case "ApplyFunction": case "af": return '\u2061'; 4543 case "InvisibleTimes": case "it": return '\u2062'; 4544 case "InvisibleComma": case "ic": return '\u2063'; 4545 case "euro": return '\u20AC'; 4546 case "tdot": case "TripleDot": return '\u20DB'; 4547 case "DotDot": return '\u20DC'; 4548 case "Copf": case "complexes": return '\u2102'; 4549 case "incare": return '\u2105'; 4550 case "gscr": return '\u210A'; 4551 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4552 case "Hfr": case "Poincareplane": return '\u210C'; 4553 case "quaternions": case "Hopf": return '\u210D'; 4554 case "planckh": return '\u210E'; 4555 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4556 case "Iscr": case "imagline": return '\u2110'; 4557 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4558 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4559 case "ell": return '\u2113'; 4560 case "Nopf": case "naturals": return '\u2115'; 4561 case "numero": return '\u2116'; 4562 case "copysr": return '\u2117'; 4563 case "weierp": case "wp": return '\u2118'; 4564 case "Popf": case "primes": return '\u2119'; 4565 case "rationals": case "Qopf": return '\u211A'; 4566 case "Rscr": case "realine": return '\u211B'; 4567 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4568 case "reals": case "Ropf": return '\u211D'; 4569 case "rx": return '\u211E'; 4570 case "trade": case "TRADE": return '\u2122'; 4571 case "integers": case "Zopf": return '\u2124'; 4572 case "mho": return '\u2127'; 4573 case "Zfr": case "zeetrf": return '\u2128'; 4574 case "iiota": return '\u2129'; 4575 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4576 case "Cfr": case "Cayleys": return '\u212D'; 4577 case "escr": return '\u212F'; 4578 case "Escr": case "expectation": return '\u2130'; 4579 case "Fscr": case "Fouriertrf": return '\u2131'; 4580 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4581 case "order": case "orderof": case "oscr": return '\u2134'; 4582 case "alefsym": case "aleph": return '\u2135'; 4583 case "beth": return '\u2136'; 4584 case "gimel": return '\u2137'; 4585 case "daleth": return '\u2138'; 4586 case "CapitalDifferentialD": case "DD": return '\u2145'; 4587 case "DifferentialD": case "dd": return '\u2146'; 4588 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4589 case "ImaginaryI": case "ii": return '\u2148'; 4590 case "frac13": return '\u2153'; 4591 case "frac23": return '\u2154'; 4592 case "frac15": return '\u2155'; 4593 case "frac25": return '\u2156'; 4594 case "frac35": return '\u2157'; 4595 case "frac45": return '\u2158'; 4596 case "frac16": return '\u2159'; 4597 case "frac56": return '\u215A'; 4598 case "frac18": return '\u215B'; 4599 case "frac38": return '\u215C'; 4600 case "frac58": return '\u215D'; 4601 case "frac78": return '\u215E'; 4602 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4603 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4604 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4605 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4606 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4607 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4608 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4609 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4610 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4611 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4612 case "nlarr": case "nleftarrow": return '\u219A'; 4613 case "nrarr": case "nrightarrow": return '\u219B'; 4614 case "rarrw": case "rightsquigarrow": return '\u219D'; 4615 case "Larr": case "twoheadleftarrow": return '\u219E'; 4616 case "Uarr": return '\u219F'; 4617 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4618 case "Darr": return '\u21A1'; 4619 case "larrtl": case "leftarrowtail": return '\u21A2'; 4620 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4621 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4622 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4623 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4624 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4625 case "larrhk": case "hookleftarrow": return '\u21A9'; 4626 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4627 case "larrlp": case "looparrowleft": return '\u21AB'; 4628 case "rarrlp": case "looparrowright": return '\u21AC'; 4629 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4630 case "nharr": case "nleftrightarrow": return '\u21AE'; 4631 case "lsh": case "Lsh": return '\u21B0'; 4632 case "rsh": case "Rsh": return '\u21B1'; 4633 case "ldsh": return '\u21B2'; 4634 case "rdsh": return '\u21B3'; 4635 case "crarr": return '\u21B5'; 4636 case "cularr": case "curvearrowleft": return '\u21B6'; 4637 case "curarr": case "curvearrowright": return '\u21B7'; 4638 case "olarr": case "circlearrowleft": return '\u21BA'; 4639 case "orarr": case "circlearrowright": return '\u21BB'; 4640 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4641 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4642 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4643 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4644 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4645 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4646 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4647 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4648 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4649 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4650 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4651 case "llarr": case "leftleftarrows": return '\u21C7'; 4652 case "uuarr": case "upuparrows": return '\u21C8'; 4653 case "rrarr": case "rightrightarrows": return '\u21C9'; 4654 case "ddarr": case "downdownarrows": return '\u21CA'; 4655 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4656 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4657 case "nlArr": case "nLeftarrow": return '\u21CD'; 4658 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4659 case "nrArr": case "nRightarrow": return '\u21CF'; 4660 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4661 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4662 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4663 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4664 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4665 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4666 case "nwArr": return '\u21D6'; 4667 case "neArr": return '\u21D7'; 4668 case "seArr": return '\u21D8'; 4669 case "swArr": return '\u21D9'; 4670 case "lAarr": case "Lleftarrow": return '\u21DA'; 4671 case "rAarr": case "Rrightarrow": return '\u21DB'; 4672 case "zigrarr": return '\u21DD'; 4673 case "larrb": case "LeftArrowBar": return '\u21E4'; 4674 case "rarrb": case "RightArrowBar": return '\u21E5'; 4675 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4676 case "loarr": return '\u21FD'; 4677 case "roarr": return '\u21FE'; 4678 case "hoarr": return '\u21FF'; 4679 case "forall": case "ForAll": return '\u2200'; 4680 case "comp": case "complement": return '\u2201'; 4681 case "part": case "PartialD": return '\u2202'; 4682 case "exist": case "Exists": return '\u2203'; 4683 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4684 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4685 case "nabla": case "Del": return '\u2207'; 4686 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4687 case "notin": case "NotElement": case "notinva": return '\u2209'; 4688 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4689 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4690 case "prod": case "Product": return '\u220F'; 4691 case "coprod": case "Coproduct": return '\u2210'; 4692 case "sum": case "Sum": return '\u2211'; 4693 case "minus": return '\u2212'; 4694 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4695 case "plusdo": case "dotplus": return '\u2214'; 4696 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4697 case "lowast": return '\u2217'; 4698 case "compfn": case "SmallCircle": return '\u2218'; 4699 case "radic": case "Sqrt": return '\u221A'; 4700 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4701 case "infin": return '\u221E'; 4702 case "angrt": return '\u221F'; 4703 case "ang": case "angle": return '\u2220'; 4704 case "angmsd": case "measuredangle": return '\u2221'; 4705 case "angsph": return '\u2222'; 4706 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4707 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4708 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4709 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4710 case "and": case "wedge": return '\u2227'; 4711 case "or": case "vee": return '\u2228'; 4712 case "cap": return '\u2229'; 4713 case "cup": return '\u222A'; 4714 case "int": case "Integral": return '\u222B'; 4715 case "Int": return '\u222C'; 4716 case "tint": case "iiint": return '\u222D'; 4717 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4718 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4719 case "Cconint": return '\u2230'; 4720 case "cwint": return '\u2231'; 4721 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4722 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4723 case "there4": case "therefore": case "Therefore": return '\u2234'; 4724 case "becaus": case "because": case "Because": return '\u2235'; 4725 case "ratio": return '\u2236'; 4726 case "Colon": case "Proportion": return '\u2237'; 4727 case "minusd": case "dotminus": return '\u2238'; 4728 case "mDDot": return '\u223A'; 4729 case "homtht": return '\u223B'; 4730 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4731 case "bsim": case "backsim": return '\u223D'; 4732 case "ac": case "mstpos": return '\u223E'; 4733 case "acd": return '\u223F'; 4734 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4735 case "nsim": case "NotTilde": return '\u2241'; 4736 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4737 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4738 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4739 case "cong": case "TildeFullEqual": return '\u2245'; 4740 case "simne": return '\u2246'; 4741 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4742 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4743 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4744 case "ape": case "approxeq": return '\u224A'; 4745 case "apid": return '\u224B'; 4746 case "bcong": case "backcong": return '\u224C'; 4747 case "asympeq": case "CupCap": return '\u224D'; 4748 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4749 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4750 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4751 case "eDot": case "doteqdot": return '\u2251'; 4752 case "efDot": case "fallingdotseq": return '\u2252'; 4753 case "erDot": case "risingdotseq": return '\u2253'; 4754 case "colone": case "coloneq": case "Assign": return '\u2254'; 4755 case "ecolon": case "eqcolon": return '\u2255'; 4756 case "ecir": case "eqcirc": return '\u2256'; 4757 case "cire": case "circeq": return '\u2257'; 4758 case "wedgeq": return '\u2259'; 4759 case "veeeq": return '\u225A'; 4760 case "trie": case "triangleq": return '\u225C'; 4761 case "equest": case "questeq": return '\u225F'; 4762 case "ne": case "NotEqual": return '\u2260'; 4763 case "equiv": case "Congruent": return '\u2261'; 4764 case "nequiv": case "NotCongruent": return '\u2262'; 4765 case "le": case "leq": return '\u2264'; 4766 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4767 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4768 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4769 case "lnE": case "lneqq": return '\u2268'; 4770 case "gnE": case "gneqq": return '\u2269'; 4771 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4772 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4773 case "twixt": case "between": return '\u226C'; 4774 case "NotCupCap": return '\u226D'; 4775 case "nlt": case "NotLess": case "nless": return '\u226E'; 4776 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4777 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4778 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4779 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4780 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4781 case "nlsim": case "NotLessTilde": return '\u2274'; 4782 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4783 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4784 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4785 case "ntlg": case "NotLessGreater": return '\u2278'; 4786 case "ntgl": case "NotGreaterLess": return '\u2279'; 4787 case "pr": case "Precedes": case "prec": return '\u227A'; 4788 case "sc": case "Succeeds": case "succ": return '\u227B'; 4789 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4790 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4791 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4792 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4793 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4794 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4795 case "sub": case "subset": return '\u2282'; 4796 case "sup": case "supset": case "Superset": return '\u2283'; 4797 case "nsub": return '\u2284'; 4798 case "nsup": return '\u2285'; 4799 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4800 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4801 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4802 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4803 case "subne": case "subsetneq": return '\u228A'; 4804 case "supne": case "supsetneq": return '\u228B'; 4805 case "cupdot": return '\u228D'; 4806 case "uplus": case "UnionPlus": return '\u228E'; 4807 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4808 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4809 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4810 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4811 case "sqcap": case "SquareIntersection": return '\u2293'; 4812 case "sqcup": case "SquareUnion": return '\u2294'; 4813 case "oplus": case "CirclePlus": return '\u2295'; 4814 case "ominus": case "CircleMinus": return '\u2296'; 4815 case "otimes": case "CircleTimes": return '\u2297'; 4816 case "osol": return '\u2298'; 4817 case "odot": case "CircleDot": return '\u2299'; 4818 case "ocir": case "circledcirc": return '\u229A'; 4819 case "oast": case "circledast": return '\u229B'; 4820 case "odash": case "circleddash": return '\u229D'; 4821 case "plusb": case "boxplus": return '\u229E'; 4822 case "minusb": case "boxminus": return '\u229F'; 4823 case "timesb": case "boxtimes": return '\u22A0'; 4824 case "sdotb": case "dotsquare": return '\u22A1'; 4825 case "vdash": case "RightTee": return '\u22A2'; 4826 case "dashv": case "LeftTee": return '\u22A3'; 4827 case "top": case "DownTee": return '\u22A4'; 4828 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4829 case "models": return '\u22A7'; 4830 case "vDash": case "DoubleRightTee": return '\u22A8'; 4831 case "Vdash": return '\u22A9'; 4832 case "Vvdash": return '\u22AA'; 4833 case "VDash": return '\u22AB'; 4834 case "nvdash": return '\u22AC'; 4835 case "nvDash": return '\u22AD'; 4836 case "nVdash": return '\u22AE'; 4837 case "nVDash": return '\u22AF'; 4838 case "prurel": return '\u22B0'; 4839 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4840 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4841 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4842 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4843 case "origof": return '\u22B6'; 4844 case "imof": return '\u22B7'; 4845 case "mumap": case "multimap": return '\u22B8'; 4846 case "hercon": return '\u22B9'; 4847 case "intcal": case "intercal": return '\u22BA'; 4848 case "veebar": return '\u22BB'; 4849 case "barvee": return '\u22BD'; 4850 case "angrtvb": return '\u22BE'; 4851 case "lrtri": return '\u22BF'; 4852 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4853 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4854 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4855 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4856 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4857 case "sdot": return '\u22C5'; 4858 case "sstarf": case "Star": return '\u22C6'; 4859 case "divonx": case "divideontimes": return '\u22C7'; 4860 case "bowtie": return '\u22C8'; 4861 case "ltimes": return '\u22C9'; 4862 case "rtimes": return '\u22CA'; 4863 case "lthree": case "leftthreetimes": return '\u22CB'; 4864 case "rthree": case "rightthreetimes": return '\u22CC'; 4865 case "bsime": case "backsimeq": return '\u22CD'; 4866 case "cuvee": case "curlyvee": return '\u22CE'; 4867 case "cuwed": case "curlywedge": return '\u22CF'; 4868 case "Sub": case "Subset": return '\u22D0'; 4869 case "Sup": case "Supset": return '\u22D1'; 4870 case "Cap": return '\u22D2'; 4871 case "Cup": return '\u22D3'; 4872 case "fork": case "pitchfork": return '\u22D4'; 4873 case "epar": return '\u22D5'; 4874 case "ltdot": case "lessdot": return '\u22D6'; 4875 case "gtdot": case "gtrdot": return '\u22D7'; 4876 case "Ll": return '\u22D8'; 4877 case "Gg": case "ggg": return '\u22D9'; 4878 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4879 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4880 case "cuepr": case "curlyeqprec": return '\u22DE'; 4881 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4882 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4883 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4884 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4885 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4886 case "lnsim": return '\u22E6'; 4887 case "gnsim": return '\u22E7'; 4888 case "prnsim": case "precnsim": return '\u22E8'; 4889 case "scnsim": case "succnsim": return '\u22E9'; 4890 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4891 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4892 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4893 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4894 case "vellip": return '\u22EE'; 4895 case "ctdot": return '\u22EF'; 4896 case "utdot": return '\u22F0'; 4897 case "dtdot": return '\u22F1'; 4898 case "disin": return '\u22F2'; 4899 case "isinsv": return '\u22F3'; 4900 case "isins": return '\u22F4'; 4901 case "isindot": return '\u22F5'; 4902 case "notinvc": return '\u22F6'; 4903 case "notinvb": return '\u22F7'; 4904 case "isinE": return '\u22F9'; 4905 case "nisd": return '\u22FA'; 4906 case "xnis": return '\u22FB'; 4907 case "nis": return '\u22FC'; 4908 case "notnivc": return '\u22FD'; 4909 case "notnivb": return '\u22FE'; 4910 case "barwed": case "barwedge": return '\u2305'; 4911 case "Barwed": case "doublebarwedge": return '\u2306'; 4912 case "lceil": case "LeftCeiling": return '\u2308'; 4913 case "rceil": case "RightCeiling": return '\u2309'; 4914 case "lfloor": case "LeftFloor": return '\u230A'; 4915 case "rfloor": case "RightFloor": return '\u230B'; 4916 case "drcrop": return '\u230C'; 4917 case "dlcrop": return '\u230D'; 4918 case "urcrop": return '\u230E'; 4919 case "ulcrop": return '\u230F'; 4920 case "bnot": return '\u2310'; 4921 case "profline": return '\u2312'; 4922 case "profsurf": return '\u2313'; 4923 case "telrec": return '\u2315'; 4924 case "target": return '\u2316'; 4925 case "ulcorn": case "ulcorner": return '\u231C'; 4926 case "urcorn": case "urcorner": return '\u231D'; 4927 case "dlcorn": case "llcorner": return '\u231E'; 4928 case "drcorn": case "lrcorner": return '\u231F'; 4929 case "frown": case "sfrown": return '\u2322'; 4930 case "smile": case "ssmile": return '\u2323'; 4931 case "cylcty": return '\u232D'; 4932 case "profalar": return '\u232E'; 4933 case "topbot": return '\u2336'; 4934 case "ovbar": return '\u233D'; 4935 case "solbar": return '\u233F'; 4936 case "angzarr": return '\u237C'; 4937 case "lmoust": case "lmoustache": return '\u23B0'; 4938 case "rmoust": case "rmoustache": return '\u23B1'; 4939 case "tbrk": case "OverBracket": return '\u23B4'; 4940 case "bbrk": case "UnderBracket": return '\u23B5'; 4941 case "bbrktbrk": return '\u23B6'; 4942 case "OverParenthesis": return '\u23DC'; 4943 case "UnderParenthesis": return '\u23DD'; 4944 case "OverBrace": return '\u23DE'; 4945 case "UnderBrace": return '\u23DF'; 4946 case "trpezium": return '\u23E2'; 4947 case "elinters": return '\u23E7'; 4948 case "blank": return '\u2423'; 4949 case "oS": case "circledS": return '\u24C8'; 4950 case "boxh": case "HorizontalLine": return '\u2500'; 4951 case "boxv": return '\u2502'; 4952 case "boxdr": return '\u250C'; 4953 case "boxdl": return '\u2510'; 4954 case "boxur": return '\u2514'; 4955 case "boxul": return '\u2518'; 4956 case "boxvr": return '\u251C'; 4957 case "boxvl": return '\u2524'; 4958 case "boxhd": return '\u252C'; 4959 case "boxhu": return '\u2534'; 4960 case "boxvh": return '\u253C'; 4961 case "boxH": return '\u2550'; 4962 case "boxV": return '\u2551'; 4963 case "boxdR": return '\u2552'; 4964 case "boxDr": return '\u2553'; 4965 case "boxDR": return '\u2554'; 4966 case "boxdL": return '\u2555'; 4967 case "boxDl": return '\u2556'; 4968 case "boxDL": return '\u2557'; 4969 case "boxuR": return '\u2558'; 4970 case "boxUr": return '\u2559'; 4971 case "boxUR": return '\u255A'; 4972 case "boxuL": return '\u255B'; 4973 case "boxUl": return '\u255C'; 4974 case "boxUL": return '\u255D'; 4975 case "boxvR": return '\u255E'; 4976 case "boxVr": return '\u255F'; 4977 case "boxVR": return '\u2560'; 4978 case "boxvL": return '\u2561'; 4979 case "boxVl": return '\u2562'; 4980 case "boxVL": return '\u2563'; 4981 case "boxHd": return '\u2564'; 4982 case "boxhD": return '\u2565'; 4983 case "boxHD": return '\u2566'; 4984 case "boxHu": return '\u2567'; 4985 case "boxhU": return '\u2568'; 4986 case "boxHU": return '\u2569'; 4987 case "boxvH": return '\u256A'; 4988 case "boxVh": return '\u256B'; 4989 case "boxVH": return '\u256C'; 4990 case "uhblk": return '\u2580'; 4991 case "lhblk": return '\u2584'; 4992 case "block": return '\u2588'; 4993 case "blk14": return '\u2591'; 4994 case "blk12": return '\u2592'; 4995 case "blk34": return '\u2593'; 4996 case "squ": case "square": case "Square": return '\u25A1'; 4997 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 4998 case "EmptyVerySmallSquare": return '\u25AB'; 4999 case "rect": return '\u25AD'; 5000 case "marker": return '\u25AE'; 5001 case "fltns": return '\u25B1'; 5002 case "xutri": case "bigtriangleup": return '\u25B3'; 5003 case "utrif": case "blacktriangle": return '\u25B4'; 5004 case "utri": case "triangle": return '\u25B5'; 5005 case "rtrif": case "blacktriangleright": return '\u25B8'; 5006 case "rtri": case "triangleright": return '\u25B9'; 5007 case "xdtri": case "bigtriangledown": return '\u25BD'; 5008 case "dtrif": case "blacktriangledown": return '\u25BE'; 5009 case "dtri": case "triangledown": return '\u25BF'; 5010 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5011 case "ltri": case "triangleleft": return '\u25C3'; 5012 case "loz": case "lozenge": return '\u25CA'; 5013 case "cir": return '\u25CB'; 5014 case "tridot": return '\u25EC'; 5015 case "xcirc": case "bigcirc": return '\u25EF'; 5016 case "ultri": return '\u25F8'; 5017 case "urtri": return '\u25F9'; 5018 case "lltri": return '\u25FA'; 5019 case "EmptySmallSquare": return '\u25FB'; 5020 case "FilledSmallSquare": return '\u25FC'; 5021 case "starf": case "bigstar": return '\u2605'; 5022 case "star": return '\u2606'; 5023 case "phone": return '\u260E'; 5024 case "female": return '\u2640'; 5025 case "male": return '\u2642'; 5026 case "spades": case "spadesuit": return '\u2660'; 5027 case "clubs": case "clubsuit": return '\u2663'; 5028 case "hearts": case "heartsuit": return '\u2665'; 5029 case "diams": case "diamondsuit": return '\u2666'; 5030 case "sung": return '\u266A'; 5031 case "flat": return '\u266D'; 5032 case "natur": case "natural": return '\u266E'; 5033 case "sharp": return '\u266F'; 5034 case "check": case "checkmark": return '\u2713'; 5035 case "cross": return '\u2717'; 5036 case "malt": case "maltese": return '\u2720'; 5037 case "sext": return '\u2736'; 5038 case "VerticalSeparator": return '\u2758'; 5039 case "lbbrk": return '\u2772'; 5040 case "rbbrk": return '\u2773'; 5041 case "bsolhsub": return '\u27C8'; 5042 case "suphsol": return '\u27C9'; 5043 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5044 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5045 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5046 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5047 case "Lang": return '\u27EA'; 5048 case "Rang": return '\u27EB'; 5049 case "loang": return '\u27EC'; 5050 case "roang": return '\u27ED'; 5051 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5052 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5053 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5054 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5055 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5056 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5057 case "xmap": case "longmapsto": return '\u27FC'; 5058 case "dzigrarr": return '\u27FF'; 5059 case "nvlArr": return '\u2902'; 5060 case "nvrArr": return '\u2903'; 5061 case "nvHarr": return '\u2904'; 5062 case "Map": return '\u2905'; 5063 case "lbarr": return '\u290C'; 5064 case "rbarr": case "bkarow": return '\u290D'; 5065 case "lBarr": return '\u290E'; 5066 case "rBarr": case "dbkarow": return '\u290F'; 5067 case "RBarr": case "drbkarow": return '\u2910'; 5068 case "DDotrahd": return '\u2911'; 5069 case "UpArrowBar": return '\u2912'; 5070 case "DownArrowBar": return '\u2913'; 5071 case "Rarrtl": return '\u2916'; 5072 case "latail": return '\u2919'; 5073 case "ratail": return '\u291A'; 5074 case "lAtail": return '\u291B'; 5075 case "rAtail": return '\u291C'; 5076 case "larrfs": return '\u291D'; 5077 case "rarrfs": return '\u291E'; 5078 case "larrbfs": return '\u291F'; 5079 case "rarrbfs": return '\u2920'; 5080 case "nwarhk": return '\u2923'; 5081 case "nearhk": return '\u2924'; 5082 case "searhk": case "hksearow": return '\u2925'; 5083 case "swarhk": case "hkswarow": return '\u2926'; 5084 case "nwnear": return '\u2927'; 5085 case "nesear": case "toea": return '\u2928'; 5086 case "seswar": case "tosa": return '\u2929'; 5087 case "swnwar": return '\u292A'; 5088 case "rarrc": return '\u2933'; 5089 case "cudarrr": return '\u2935'; 5090 case "ldca": return '\u2936'; 5091 case "rdca": return '\u2937'; 5092 case "cudarrl": return '\u2938'; 5093 case "larrpl": return '\u2939'; 5094 case "curarrm": return '\u293C'; 5095 case "cularrp": return '\u293D'; 5096 case "rarrpl": return '\u2945'; 5097 case "harrcir": return '\u2948'; 5098 case "Uarrocir": return '\u2949'; 5099 case "lurdshar": return '\u294A'; 5100 case "ldrushar": return '\u294B'; 5101 case "LeftRightVector": return '\u294E'; 5102 case "RightUpDownVector": return '\u294F'; 5103 case "DownLeftRightVector": return '\u2950'; 5104 case "LeftUpDownVector": return '\u2951'; 5105 case "LeftVectorBar": return '\u2952'; 5106 case "RightVectorBar": return '\u2953'; 5107 case "RightUpVectorBar": return '\u2954'; 5108 case "RightDownVectorBar": return '\u2955'; 5109 case "DownLeftVectorBar": return '\u2956'; 5110 case "DownRightVectorBar": return '\u2957'; 5111 case "LeftUpVectorBar": return '\u2958'; 5112 case "LeftDownVectorBar": return '\u2959'; 5113 case "LeftTeeVector": return '\u295A'; 5114 case "RightTeeVector": return '\u295B'; 5115 case "RightUpTeeVector": return '\u295C'; 5116 case "RightDownTeeVector": return '\u295D'; 5117 case "DownLeftTeeVector": return '\u295E'; 5118 case "DownRightTeeVector": return '\u295F'; 5119 case "LeftUpTeeVector": return '\u2960'; 5120 case "LeftDownTeeVector": return '\u2961'; 5121 case "lHar": return '\u2962'; 5122 case "uHar": return '\u2963'; 5123 case "rHar": return '\u2964'; 5124 case "dHar": return '\u2965'; 5125 case "luruhar": return '\u2966'; 5126 case "ldrdhar": return '\u2967'; 5127 case "ruluhar": return '\u2968'; 5128 case "rdldhar": return '\u2969'; 5129 case "lharul": return '\u296A'; 5130 case "llhard": return '\u296B'; 5131 case "rharul": return '\u296C'; 5132 case "lrhard": return '\u296D'; 5133 case "udhar": case "UpEquilibrium": return '\u296E'; 5134 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5135 case "RoundImplies": return '\u2970'; 5136 case "erarr": return '\u2971'; 5137 case "simrarr": return '\u2972'; 5138 case "larrsim": return '\u2973'; 5139 case "rarrsim": return '\u2974'; 5140 case "rarrap": return '\u2975'; 5141 case "ltlarr": return '\u2976'; 5142 case "gtrarr": return '\u2978'; 5143 case "subrarr": return '\u2979'; 5144 case "suplarr": return '\u297B'; 5145 case "lfisht": return '\u297C'; 5146 case "rfisht": return '\u297D'; 5147 case "ufisht": return '\u297E'; 5148 case "dfisht": return '\u297F'; 5149 case "lopar": return '\u2985'; 5150 case "ropar": return '\u2986'; 5151 case "lbrke": return '\u298B'; 5152 case "rbrke": return '\u298C'; 5153 case "lbrkslu": return '\u298D'; 5154 case "rbrksld": return '\u298E'; 5155 case "lbrksld": return '\u298F'; 5156 case "rbrkslu": return '\u2990'; 5157 case "langd": return '\u2991'; 5158 case "rangd": return '\u2992'; 5159 case "lparlt": return '\u2993'; 5160 case "rpargt": return '\u2994'; 5161 case "gtlPar": return '\u2995'; 5162 case "ltrPar": return '\u2996'; 5163 case "vzigzag": return '\u299A'; 5164 case "vangrt": return '\u299C'; 5165 case "angrtvbd": return '\u299D'; 5166 case "ange": return '\u29A4'; 5167 case "range": return '\u29A5'; 5168 case "dwangle": return '\u29A6'; 5169 case "uwangle": return '\u29A7'; 5170 case "angmsdaa": return '\u29A8'; 5171 case "angmsdab": return '\u29A9'; 5172 case "angmsdac": return '\u29AA'; 5173 case "angmsdad": return '\u29AB'; 5174 case "angmsdae": return '\u29AC'; 5175 case "angmsdaf": return '\u29AD'; 5176 case "angmsdag": return '\u29AE'; 5177 case "angmsdah": return '\u29AF'; 5178 case "bemptyv": return '\u29B0'; 5179 case "demptyv": return '\u29B1'; 5180 case "cemptyv": return '\u29B2'; 5181 case "raemptyv": return '\u29B3'; 5182 case "laemptyv": return '\u29B4'; 5183 case "ohbar": return '\u29B5'; 5184 case "omid": return '\u29B6'; 5185 case "opar": return '\u29B7'; 5186 case "operp": return '\u29B9'; 5187 case "olcross": return '\u29BB'; 5188 case "odsold": return '\u29BC'; 5189 case "olcir": return '\u29BE'; 5190 case "ofcir": return '\u29BF'; 5191 case "olt": return '\u29C0'; 5192 case "ogt": return '\u29C1'; 5193 case "cirscir": return '\u29C2'; 5194 case "cirE": return '\u29C3'; 5195 case "solb": return '\u29C4'; 5196 case "bsolb": return '\u29C5'; 5197 case "boxbox": return '\u29C9'; 5198 case "trisb": return '\u29CD'; 5199 case "rtriltri": return '\u29CE'; 5200 case "LeftTriangleBar": return '\u29CF'; 5201 case "RightTriangleBar": return '\u29D0'; 5202 case "iinfin": return '\u29DC'; 5203 case "infintie": return '\u29DD'; 5204 case "nvinfin": return '\u29DE'; 5205 case "eparsl": return '\u29E3'; 5206 case "smeparsl": return '\u29E4'; 5207 case "eqvparsl": return '\u29E5'; 5208 case "lozf": case "blacklozenge": return '\u29EB'; 5209 case "RuleDelayed": return '\u29F4'; 5210 case "dsol": return '\u29F6'; 5211 case "xodot": case "bigodot": return '\u2A00'; 5212 case "xoplus": case "bigoplus": return '\u2A01'; 5213 case "xotime": case "bigotimes": return '\u2A02'; 5214 case "xuplus": case "biguplus": return '\u2A04'; 5215 case "xsqcup": case "bigsqcup": return '\u2A06'; 5216 case "qint": case "iiiint": return '\u2A0C'; 5217 case "fpartint": return '\u2A0D'; 5218 case "cirfnint": return '\u2A10'; 5219 case "awint": return '\u2A11'; 5220 case "rppolint": return '\u2A12'; 5221 case "scpolint": return '\u2A13'; 5222 case "npolint": return '\u2A14'; 5223 case "pointint": return '\u2A15'; 5224 case "quatint": return '\u2A16'; 5225 case "intlarhk": return '\u2A17'; 5226 case "pluscir": return '\u2A22'; 5227 case "plusacir": return '\u2A23'; 5228 case "simplus": return '\u2A24'; 5229 case "plusdu": return '\u2A25'; 5230 case "plussim": return '\u2A26'; 5231 case "plustwo": return '\u2A27'; 5232 case "mcomma": return '\u2A29'; 5233 case "minusdu": return '\u2A2A'; 5234 case "loplus": return '\u2A2D'; 5235 case "roplus": return '\u2A2E'; 5236 case "Cross": return '\u2A2F'; 5237 case "timesd": return '\u2A30'; 5238 case "timesbar": return '\u2A31'; 5239 case "smashp": return '\u2A33'; 5240 case "lotimes": return '\u2A34'; 5241 case "rotimes": return '\u2A35'; 5242 case "otimesas": return '\u2A36'; 5243 case "Otimes": return '\u2A37'; 5244 case "odiv": return '\u2A38'; 5245 case "triplus": return '\u2A39'; 5246 case "triminus": return '\u2A3A'; 5247 case "tritime": return '\u2A3B'; 5248 case "iprod": case "intprod": return '\u2A3C'; 5249 case "amalg": return '\u2A3F'; 5250 case "capdot": return '\u2A40'; 5251 case "ncup": return '\u2A42'; 5252 case "ncap": return '\u2A43'; 5253 case "capand": return '\u2A44'; 5254 case "cupor": return '\u2A45'; 5255 case "cupcap": return '\u2A46'; 5256 case "capcup": return '\u2A47'; 5257 case "cupbrcap": return '\u2A48'; 5258 case "capbrcup": return '\u2A49'; 5259 case "cupcup": return '\u2A4A'; 5260 case "capcap": return '\u2A4B'; 5261 case "ccups": return '\u2A4C'; 5262 case "ccaps": return '\u2A4D'; 5263 case "ccupssm": return '\u2A50'; 5264 case "And": return '\u2A53'; 5265 case "Or": return '\u2A54'; 5266 case "andand": return '\u2A55'; 5267 case "oror": return '\u2A56'; 5268 case "orslope": return '\u2A57'; 5269 case "andslope": return '\u2A58'; 5270 case "andv": return '\u2A5A'; 5271 case "orv": return '\u2A5B'; 5272 case "andd": return '\u2A5C'; 5273 case "ord": return '\u2A5D'; 5274 case "wedbar": return '\u2A5F'; 5275 case "sdote": return '\u2A66'; 5276 case "simdot": return '\u2A6A'; 5277 case "congdot": return '\u2A6D'; 5278 case "easter": return '\u2A6E'; 5279 case "apacir": return '\u2A6F'; 5280 case "apE": return '\u2A70'; 5281 case "eplus": return '\u2A71'; 5282 case "pluse": return '\u2A72'; 5283 case "Esim": return '\u2A73'; 5284 case "Colone": return '\u2A74'; 5285 case "Equal": return '\u2A75'; 5286 case "eDDot": case "ddotseq": return '\u2A77'; 5287 case "equivDD": return '\u2A78'; 5288 case "ltcir": return '\u2A79'; 5289 case "gtcir": return '\u2A7A'; 5290 case "ltquest": return '\u2A7B'; 5291 case "gtquest": return '\u2A7C'; 5292 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5293 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5294 case "lesdot": return '\u2A7F'; 5295 case "gesdot": return '\u2A80'; 5296 case "lesdoto": return '\u2A81'; 5297 case "gesdoto": return '\u2A82'; 5298 case "lesdotor": return '\u2A83'; 5299 case "gesdotol": return '\u2A84'; 5300 case "lap": case "lessapprox": return '\u2A85'; 5301 case "gap": case "gtrapprox": return '\u2A86'; 5302 case "lne": case "lneq": return '\u2A87'; 5303 case "gne": case "gneq": return '\u2A88'; 5304 case "lnap": case "lnapprox": return '\u2A89'; 5305 case "gnap": case "gnapprox": return '\u2A8A'; 5306 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5307 case "gEl": case "gtreqqless": return '\u2A8C'; 5308 case "lsime": return '\u2A8D'; 5309 case "gsime": return '\u2A8E'; 5310 case "lsimg": return '\u2A8F'; 5311 case "gsiml": return '\u2A90'; 5312 case "lgE": return '\u2A91'; 5313 case "glE": return '\u2A92'; 5314 case "lesges": return '\u2A93'; 5315 case "gesles": return '\u2A94'; 5316 case "els": case "eqslantless": return '\u2A95'; 5317 case "egs": case "eqslantgtr": return '\u2A96'; 5318 case "elsdot": return '\u2A97'; 5319 case "egsdot": return '\u2A98'; 5320 case "el": return '\u2A99'; 5321 case "eg": return '\u2A9A'; 5322 case "siml": return '\u2A9D'; 5323 case "simg": return '\u2A9E'; 5324 case "simlE": return '\u2A9F'; 5325 case "simgE": return '\u2AA0'; 5326 case "LessLess": return '\u2AA1'; 5327 case "GreaterGreater": return '\u2AA2'; 5328 case "glj": return '\u2AA4'; 5329 case "gla": return '\u2AA5'; 5330 case "ltcc": return '\u2AA6'; 5331 case "gtcc": return '\u2AA7'; 5332 case "lescc": return '\u2AA8'; 5333 case "gescc": return '\u2AA9'; 5334 case "smt": return '\u2AAA'; 5335 case "lat": return '\u2AAB'; 5336 case "smte": return '\u2AAC'; 5337 case "late": return '\u2AAD'; 5338 case "bumpE": return '\u2AAE'; 5339 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5340 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5341 case "prE": return '\u2AB3'; 5342 case "scE": return '\u2AB4'; 5343 case "prnE": case "precneqq": return '\u2AB5'; 5344 case "scnE": case "succneqq": return '\u2AB6'; 5345 case "prap": case "precapprox": return '\u2AB7'; 5346 case "scap": case "succapprox": return '\u2AB8'; 5347 case "prnap": case "precnapprox": return '\u2AB9'; 5348 case "scnap": case "succnapprox": return '\u2ABA'; 5349 case "Pr": return '\u2ABB'; 5350 case "Sc": return '\u2ABC'; 5351 case "subdot": return '\u2ABD'; 5352 case "supdot": return '\u2ABE'; 5353 case "subplus": return '\u2ABF'; 5354 case "supplus": return '\u2AC0'; 5355 case "submult": return '\u2AC1'; 5356 case "supmult": return '\u2AC2'; 5357 case "subedot": return '\u2AC3'; 5358 case "supedot": return '\u2AC4'; 5359 case "subE": case "subseteqq": return '\u2AC5'; 5360 case "supE": case "supseteqq": return '\u2AC6'; 5361 case "subsim": return '\u2AC7'; 5362 case "supsim": return '\u2AC8'; 5363 case "subnE": case "subsetneqq": return '\u2ACB'; 5364 case "supnE": case "supsetneqq": return '\u2ACC'; 5365 case "csub": return '\u2ACF'; 5366 case "csup": return '\u2AD0'; 5367 case "csube": return '\u2AD1'; 5368 case "csupe": return '\u2AD2'; 5369 case "subsup": return '\u2AD3'; 5370 case "supsub": return '\u2AD4'; 5371 case "subsub": return '\u2AD5'; 5372 case "supsup": return '\u2AD6'; 5373 case "suphsub": return '\u2AD7'; 5374 case "supdsub": return '\u2AD8'; 5375 case "forkv": return '\u2AD9'; 5376 case "topfork": return '\u2ADA'; 5377 case "mlcp": return '\u2ADB'; 5378 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5379 case "Vdashl": return '\u2AE6'; 5380 case "Barv": return '\u2AE7'; 5381 case "vBar": return '\u2AE8'; 5382 case "vBarv": return '\u2AE9'; 5383 case "Vbar": return '\u2AEB'; 5384 case "Not": return '\u2AEC'; 5385 case "bNot": return '\u2AED'; 5386 case "rnmid": return '\u2AEE'; 5387 case "cirmid": return '\u2AEF'; 5388 case "midcir": return '\u2AF0'; 5389 case "topcir": return '\u2AF1'; 5390 case "nhpar": return '\u2AF2'; 5391 case "parsim": return '\u2AF3'; 5392 case "parsl": return '\u2AFD'; 5393 case "fflig": return '\uFB00'; 5394 case "filig": return '\uFB01'; 5395 case "fllig": return '\uFB02'; 5396 case "ffilig": return '\uFB03'; 5397 case "ffllig": return '\uFB04'; 5398 case "Ascr": return '\U0001D49C'; 5399 case "Cscr": return '\U0001D49E'; 5400 case "Dscr": return '\U0001D49F'; 5401 case "Gscr": return '\U0001D4A2'; 5402 case "Jscr": return '\U0001D4A5'; 5403 case "Kscr": return '\U0001D4A6'; 5404 case "Nscr": return '\U0001D4A9'; 5405 case "Oscr": return '\U0001D4AA'; 5406 case "Pscr": return '\U0001D4AB'; 5407 case "Qscr": return '\U0001D4AC'; 5408 case "Sscr": return '\U0001D4AE'; 5409 case "Tscr": return '\U0001D4AF'; 5410 case "Uscr": return '\U0001D4B0'; 5411 case "Vscr": return '\U0001D4B1'; 5412 case "Wscr": return '\U0001D4B2'; 5413 case "Xscr": return '\U0001D4B3'; 5414 case "Yscr": return '\U0001D4B4'; 5415 case "Zscr": return '\U0001D4B5'; 5416 case "ascr": return '\U0001D4B6'; 5417 case "bscr": return '\U0001D4B7'; 5418 case "cscr": return '\U0001D4B8'; 5419 case "dscr": return '\U0001D4B9'; 5420 case "fscr": return '\U0001D4BB'; 5421 case "hscr": return '\U0001D4BD'; 5422 case "iscr": return '\U0001D4BE'; 5423 case "jscr": return '\U0001D4BF'; 5424 case "kscr": return '\U0001D4C0'; 5425 case "lscr": return '\U0001D4C1'; 5426 case "mscr": return '\U0001D4C2'; 5427 case "nscr": return '\U0001D4C3'; 5428 case "pscr": return '\U0001D4C5'; 5429 case "qscr": return '\U0001D4C6'; 5430 case "rscr": return '\U0001D4C7'; 5431 case "sscr": return '\U0001D4C8'; 5432 case "tscr": return '\U0001D4C9'; 5433 case "uscr": return '\U0001D4CA'; 5434 case "vscr": return '\U0001D4CB'; 5435 case "wscr": return '\U0001D4CC'; 5436 case "xscr": return '\U0001D4CD'; 5437 case "yscr": return '\U0001D4CE'; 5438 case "zscr": return '\U0001D4CF'; 5439 case "Afr": return '\U0001D504'; 5440 case "Bfr": return '\U0001D505'; 5441 case "Dfr": return '\U0001D507'; 5442 case "Efr": return '\U0001D508'; 5443 case "Ffr": return '\U0001D509'; 5444 case "Gfr": return '\U0001D50A'; 5445 case "Jfr": return '\U0001D50D'; 5446 case "Kfr": return '\U0001D50E'; 5447 case "Lfr": return '\U0001D50F'; 5448 case "Mfr": return '\U0001D510'; 5449 case "Nfr": return '\U0001D511'; 5450 case "Ofr": return '\U0001D512'; 5451 case "Pfr": return '\U0001D513'; 5452 case "Qfr": return '\U0001D514'; 5453 case "Sfr": return '\U0001D516'; 5454 case "Tfr": return '\U0001D517'; 5455 case "Ufr": return '\U0001D518'; 5456 case "Vfr": return '\U0001D519'; 5457 case "Wfr": return '\U0001D51A'; 5458 case "Xfr": return '\U0001D51B'; 5459 case "Yfr": return '\U0001D51C'; 5460 case "afr": return '\U0001D51E'; 5461 case "bfr": return '\U0001D51F'; 5462 case "cfr": return '\U0001D520'; 5463 case "dfr": return '\U0001D521'; 5464 case "efr": return '\U0001D522'; 5465 case "ffr": return '\U0001D523'; 5466 case "gfr": return '\U0001D524'; 5467 case "hfr": return '\U0001D525'; 5468 case "ifr": return '\U0001D526'; 5469 case "jfr": return '\U0001D527'; 5470 case "kfr": return '\U0001D528'; 5471 case "lfr": return '\U0001D529'; 5472 case "mfr": return '\U0001D52A'; 5473 case "nfr": return '\U0001D52B'; 5474 case "ofr": return '\U0001D52C'; 5475 case "pfr": return '\U0001D52D'; 5476 case "qfr": return '\U0001D52E'; 5477 case "rfr": return '\U0001D52F'; 5478 case "sfr": return '\U0001D530'; 5479 case "tfr": return '\U0001D531'; 5480 case "ufr": return '\U0001D532'; 5481 case "vfr": return '\U0001D533'; 5482 case "wfr": return '\U0001D534'; 5483 case "xfr": return '\U0001D535'; 5484 case "yfr": return '\U0001D536'; 5485 case "zfr": return '\U0001D537'; 5486 case "Aopf": return '\U0001D538'; 5487 case "Bopf": return '\U0001D539'; 5488 case "Dopf": return '\U0001D53B'; 5489 case "Eopf": return '\U0001D53C'; 5490 case "Fopf": return '\U0001D53D'; 5491 case "Gopf": return '\U0001D53E'; 5492 case "Iopf": return '\U0001D540'; 5493 case "Jopf": return '\U0001D541'; 5494 case "Kopf": return '\U0001D542'; 5495 case "Lopf": return '\U0001D543'; 5496 case "Mopf": return '\U0001D544'; 5497 case "Oopf": return '\U0001D546'; 5498 case "Sopf": return '\U0001D54A'; 5499 case "Topf": return '\U0001D54B'; 5500 case "Uopf": return '\U0001D54C'; 5501 case "Vopf": return '\U0001D54D'; 5502 case "Wopf": return '\U0001D54E'; 5503 case "Xopf": return '\U0001D54F'; 5504 case "Yopf": return '\U0001D550'; 5505 case "aopf": return '\U0001D552'; 5506 case "bopf": return '\U0001D553'; 5507 case "copf": return '\U0001D554'; 5508 case "dopf": return '\U0001D555'; 5509 case "eopf": return '\U0001D556'; 5510 case "fopf": return '\U0001D557'; 5511 case "gopf": return '\U0001D558'; 5512 case "hopf": return '\U0001D559'; 5513 case "iopf": return '\U0001D55A'; 5514 case "jopf": return '\U0001D55B'; 5515 case "kopf": return '\U0001D55C'; 5516 case "lopf": return '\U0001D55D'; 5517 case "mopf": return '\U0001D55E'; 5518 case "nopf": return '\U0001D55F'; 5519 case "oopf": return '\U0001D560'; 5520 case "popf": return '\U0001D561'; 5521 case "qopf": return '\U0001D562'; 5522 case "ropf": return '\U0001D563'; 5523 case "sopf": return '\U0001D564'; 5524 case "topf": return '\U0001D565'; 5525 case "uopf": return '\U0001D566'; 5526 case "vopf": return '\U0001D567'; 5527 case "wopf": return '\U0001D568'; 5528 case "xopf": return '\U0001D569'; 5529 case "yopf": return '\U0001D56A'; 5530 case "zopf": return '\U0001D56B'; 5531 5532 // and handling numeric entities 5533 default: 5534 if(entity[1] == '#') { 5535 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5536 auto hex = entity[3..$-1]; 5537 5538 auto p = intFromHex(to!string(hex).toLower()); 5539 return cast(dchar) p; 5540 } else { 5541 auto decimal = entity[2..$-1]; 5542 5543 // dealing with broken html entities 5544 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5545 decimal = decimal[1 .. $]; 5546 5547 if(decimal.length == 0) 5548 return ' '; // this is really broken html 5549 // done with dealing with broken stuff 5550 5551 auto p = std.conv.to!int(decimal); 5552 return cast(dchar) p; 5553 } 5554 } else 5555 return '\ufffd'; // replacement character diamond thing 5556 } 5557 5558 assert(0); 5559 } 5560 5561 import std.utf; 5562 import std.stdio; 5563 5564 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5565 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5566 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5567 /// Group: core_functionality 5568 string htmlEntitiesDecode(string data, bool strict = false) { 5569 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5570 if(data.indexOf("&") == -1) // all html entities begin with & 5571 return data; // if there are no entities in here, we can return the original slice and save some time 5572 5573 char[] a; // this seems to do a *better* job than appender! 5574 5575 char[4] buffer; 5576 5577 bool tryingEntity = false; 5578 dchar[16] entityBeingTried; 5579 int entityBeingTriedLength = 0; 5580 int entityAttemptIndex = 0; 5581 5582 foreach(dchar ch; data) { 5583 if(tryingEntity) { 5584 entityAttemptIndex++; 5585 entityBeingTried[entityBeingTriedLength++] = ch; 5586 5587 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5588 if(ch == '&') { 5589 if(strict) 5590 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5591 5592 // if not strict, let's try to parse both. 5593 5594 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5595 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5596 else 5597 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5598 5599 // tryingEntity is still true 5600 entityBeingTriedLength = 1; 5601 entityAttemptIndex = 0; // restarting o this 5602 } else 5603 if(ch == ';') { 5604 tryingEntity = false; 5605 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5606 } else if(ch == ' ') { 5607 // e.g. you & i 5608 if(strict) 5609 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5610 else { 5611 tryingEntity = false; 5612 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5613 } 5614 } else { 5615 if(entityAttemptIndex >= 9) { 5616 if(strict) 5617 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5618 else { 5619 tryingEntity = false; 5620 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5621 } 5622 } 5623 } 5624 } else { 5625 if(ch == '&') { 5626 tryingEntity = true; 5627 entityBeingTriedLength = 0; 5628 entityBeingTried[entityBeingTriedLength++] = ch; 5629 entityAttemptIndex = 0; 5630 } else { 5631 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5632 } 5633 } 5634 } 5635 5636 if(tryingEntity) { 5637 if(strict) 5638 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5639 5640 // otherwise, let's try to recover, at least so we don't drop any data 5641 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5642 // FIXME: what if we have "cool &"? should we try to parse it? 5643 } 5644 5645 return cast(string) a; // assumeUnique is actually kinda slow, lol 5646 } 5647 5648 /// Group: implementations 5649 abstract class SpecialElement : Element { 5650 this(Document _parentDocument) { 5651 super(_parentDocument); 5652 } 5653 5654 ///. 5655 override Element appendChild(Element e) { 5656 assert(0, "Cannot append to a special node"); 5657 } 5658 5659 ///. 5660 @property override int nodeType() const { 5661 return 100; 5662 } 5663 } 5664 5665 ///. 5666 /// Group: implementations 5667 class RawSource : SpecialElement { 5668 ///. 5669 this(Document _parentDocument, string s) { 5670 super(_parentDocument); 5671 source = s; 5672 tagName = "#raw"; 5673 } 5674 5675 ///. 5676 override string nodeValue() const { 5677 return this.toString(); 5678 } 5679 5680 ///. 5681 override string writeToAppender(Appender!string where = appender!string()) const { 5682 where.put(source); 5683 return source; 5684 } 5685 5686 override string toPrettyString(bool, int, string) const { 5687 return source; 5688 } 5689 5690 5691 override RawSource cloneNode(bool deep) { 5692 return new RawSource(parentDocument, source); 5693 } 5694 5695 ///. 5696 string source; 5697 } 5698 5699 /// Group: implementations 5700 abstract class ServerSideCode : SpecialElement { 5701 this(Document _parentDocument, string type) { 5702 super(_parentDocument); 5703 tagName = "#" ~ type; 5704 } 5705 5706 ///. 5707 override string nodeValue() const { 5708 return this.source; 5709 } 5710 5711 ///. 5712 override string writeToAppender(Appender!string where = appender!string()) const { 5713 auto start = where.data.length; 5714 where.put("<"); 5715 where.put(source); 5716 where.put(">"); 5717 return where.data[start .. $]; 5718 } 5719 5720 override string toPrettyString(bool, int, string) const { 5721 return "<" ~ source ~ ">"; 5722 } 5723 5724 ///. 5725 string source; 5726 } 5727 5728 ///. 5729 /// Group: implementations 5730 class PhpCode : ServerSideCode { 5731 ///. 5732 this(Document _parentDocument, string s) { 5733 super(_parentDocument, "php"); 5734 source = s; 5735 } 5736 5737 override PhpCode cloneNode(bool deep) { 5738 return new PhpCode(parentDocument, source); 5739 } 5740 } 5741 5742 ///. 5743 /// Group: implementations 5744 class AspCode : ServerSideCode { 5745 ///. 5746 this(Document _parentDocument, string s) { 5747 super(_parentDocument, "asp"); 5748 source = s; 5749 } 5750 5751 override AspCode cloneNode(bool deep) { 5752 return new AspCode(parentDocument, source); 5753 } 5754 } 5755 5756 ///. 5757 /// Group: implementations 5758 class BangInstruction : SpecialElement { 5759 ///. 5760 this(Document _parentDocument, string s) { 5761 super(_parentDocument); 5762 source = s; 5763 tagName = "#bpi"; 5764 } 5765 5766 ///. 5767 override string nodeValue() const { 5768 return this.source; 5769 } 5770 5771 override BangInstruction cloneNode(bool deep) { 5772 return new BangInstruction(parentDocument, source); 5773 } 5774 5775 ///. 5776 override string writeToAppender(Appender!string where = appender!string()) const { 5777 auto start = where.data.length; 5778 where.put("<!"); 5779 where.put(source); 5780 where.put(">"); 5781 return where.data[start .. $]; 5782 } 5783 5784 override string toPrettyString(bool, int, string) const { 5785 string s; 5786 s ~= "<!"; 5787 s ~= source; 5788 s ~= ">"; 5789 return s; 5790 } 5791 5792 ///. 5793 string source; 5794 } 5795 5796 ///. 5797 /// Group: implementations 5798 class QuestionInstruction : SpecialElement { 5799 ///. 5800 this(Document _parentDocument, string s) { 5801 super(_parentDocument); 5802 source = s; 5803 tagName = "#qpi"; 5804 } 5805 5806 override QuestionInstruction cloneNode(bool deep) { 5807 return new QuestionInstruction(parentDocument, source); 5808 } 5809 5810 ///. 5811 override string nodeValue() const { 5812 return this.source; 5813 } 5814 5815 ///. 5816 override string writeToAppender(Appender!string where = appender!string()) const { 5817 auto start = where.data.length; 5818 where.put("<"); 5819 where.put(source); 5820 where.put(">"); 5821 return where.data[start .. $]; 5822 } 5823 5824 override string toPrettyString(bool, int, string) const { 5825 string s; 5826 s ~= "<"; 5827 s ~= source; 5828 s ~= ">"; 5829 return s; 5830 } 5831 5832 5833 ///. 5834 string source; 5835 } 5836 5837 ///. 5838 /// Group: implementations 5839 class HtmlComment : SpecialElement { 5840 ///. 5841 this(Document _parentDocument, string s) { 5842 super(_parentDocument); 5843 source = s; 5844 tagName = "#comment"; 5845 } 5846 5847 override HtmlComment cloneNode(bool deep) { 5848 return new HtmlComment(parentDocument, source); 5849 } 5850 5851 ///. 5852 override string nodeValue() const { 5853 return this.source; 5854 } 5855 5856 ///. 5857 override string writeToAppender(Appender!string where = appender!string()) const { 5858 auto start = where.data.length; 5859 where.put("<!--"); 5860 where.put(source); 5861 where.put("-->"); 5862 return where.data[start .. $]; 5863 } 5864 5865 override string toPrettyString(bool, int, string) const { 5866 string s; 5867 s ~= "<!--"; 5868 s ~= source; 5869 s ~= "-->"; 5870 return s; 5871 } 5872 5873 5874 ///. 5875 string source; 5876 } 5877 5878 5879 5880 5881 ///. 5882 /// Group: implementations 5883 class TextNode : Element { 5884 public: 5885 ///. 5886 this(Document _parentDocument, string e) { 5887 super(_parentDocument); 5888 contents = e; 5889 tagName = "#text"; 5890 } 5891 5892 /// 5893 this(string e) { 5894 this(null, e); 5895 } 5896 5897 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5898 5899 ///. 5900 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5901 auto e = new TextNode(_parentDocument, ""); 5902 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5903 return e; 5904 } 5905 5906 ///. 5907 override @property TextNode cloneNode(bool deep) { 5908 auto n = new TextNode(parentDocument, contents); 5909 return n; 5910 } 5911 5912 ///. 5913 override string nodeValue() const { 5914 return this.contents; //toString(); 5915 } 5916 5917 ///. 5918 @property override int nodeType() const { 5919 return NodeType.Text; 5920 } 5921 5922 ///. 5923 override string writeToAppender(Appender!string where = appender!string()) const { 5924 string s; 5925 if(contents.length) 5926 s = htmlEntitiesEncode(contents, where); 5927 else 5928 s = ""; 5929 5930 assert(s !is null); 5931 return s; 5932 } 5933 5934 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5935 string s; 5936 5937 string contents = this.contents; 5938 // we will first collapse the whitespace per html 5939 // sort of. note this can break stuff yo!!!! 5940 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5941 string n = ""; 5942 bool lastWasWhitespace = indentationLevel > 0; 5943 foreach(char c; contents) { 5944 if(c.isSimpleWhite) { 5945 if(!lastWasWhitespace) 5946 n ~= ' '; 5947 lastWasWhitespace = true; 5948 } else { 5949 n ~= c; 5950 lastWasWhitespace = false; 5951 } 5952 } 5953 5954 contents = n; 5955 } 5956 5957 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5958 contents = contents.strip; 5959 } 5960 5961 auto e = htmlEntitiesEncode(contents); 5962 import std.algorithm.iteration : splitter; 5963 bool first = true; 5964 foreach(line; splitter(e, "\n")) { 5965 if(first) { 5966 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5967 first = false; 5968 } else { 5969 s ~= "\n"; 5970 if(insertComments) 5971 s ~= "<!--"; 5972 foreach(i; 0 .. indentationLevel) 5973 s ~= "\t"; 5974 if(insertComments) 5975 s ~= "-->"; 5976 } 5977 s ~= line.stripRight; 5978 } 5979 return s; 5980 } 5981 5982 ///. 5983 override Element appendChild(Element e) { 5984 assert(0, "Cannot append to a text node"); 5985 } 5986 5987 ///. 5988 string contents; 5989 // alias contents content; // I just mistype this a lot, 5990 } 5991 5992 /** 5993 There are subclasses of Element offering improved helper 5994 functions for the element in HTML. 5995 */ 5996 5997 ///. 5998 /// Group: implementations 5999 class Link : Element { 6000 6001 ///. 6002 this(Document _parentDocument) { 6003 super(_parentDocument); 6004 this.tagName = "a"; 6005 } 6006 6007 6008 ///. 6009 this(string href, string text) { 6010 super("a"); 6011 setAttribute("href", href); 6012 innerText = text; 6013 } 6014 /+ 6015 /// Returns everything in the href EXCEPT the query string 6016 @property string targetSansQuery() { 6017 6018 } 6019 6020 ///. 6021 @property string domainName() { 6022 6023 } 6024 6025 ///. 6026 @property string path 6027 +/ 6028 /// This gets a variable from the URL's query string. 6029 string getValue(string name) { 6030 auto vars = variablesHash(); 6031 if(name in vars) 6032 return vars[name]; 6033 return null; 6034 } 6035 6036 private string[string] variablesHash() { 6037 string href = getAttribute("href"); 6038 if(href is null) 6039 return null; 6040 6041 auto ques = href.indexOf("?"); 6042 string str = ""; 6043 if(ques != -1) { 6044 str = href[ques+1..$]; 6045 6046 auto fragment = str.indexOf("#"); 6047 if(fragment != -1) 6048 str = str[0..fragment]; 6049 } 6050 6051 string[] variables = str.split("&"); 6052 6053 string[string] hash; 6054 6055 foreach(var; variables) { 6056 auto index = var.indexOf("="); 6057 if(index == -1) 6058 hash[var] = ""; 6059 else { 6060 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6061 } 6062 } 6063 6064 return hash; 6065 } 6066 6067 ///. 6068 /*private*/ void updateQueryString(string[string] vars) { 6069 string href = getAttribute("href"); 6070 6071 auto question = href.indexOf("?"); 6072 if(question != -1) 6073 href = href[0..question]; 6074 6075 string frag = ""; 6076 auto fragment = href.indexOf("#"); 6077 if(fragment != -1) { 6078 frag = href[fragment..$]; 6079 href = href[0..fragment]; 6080 } 6081 6082 string query = "?"; 6083 bool first = true; 6084 foreach(name, value; vars) { 6085 if(!first) 6086 query ~= "&"; 6087 else 6088 first = false; 6089 6090 query ~= encodeComponent(name); 6091 if(value.length) 6092 query ~= "=" ~ encodeComponent(value); 6093 } 6094 6095 if(query != "?") 6096 href ~= query; 6097 6098 href ~= frag; 6099 6100 setAttribute("href", href); 6101 } 6102 6103 /// Sets or adds the variable with the given name to the given value 6104 /// It automatically URI encodes the values and takes care of the ? and &. 6105 override void setValue(string name, string variable) { 6106 auto vars = variablesHash(); 6107 vars[name] = variable; 6108 6109 updateQueryString(vars); 6110 } 6111 6112 /// Removes the given variable from the query string 6113 void removeValue(string name) { 6114 auto vars = variablesHash(); 6115 vars.remove(name); 6116 6117 updateQueryString(vars); 6118 } 6119 6120 /* 6121 ///. 6122 override string toString() { 6123 6124 } 6125 6126 ///. 6127 override string getAttribute(string name) { 6128 if(name == "href") { 6129 6130 } else 6131 return super.getAttribute(name); 6132 } 6133 */ 6134 } 6135 6136 ///. 6137 /// Group: implementations 6138 class Form : Element { 6139 6140 ///. 6141 this(Document _parentDocument) { 6142 super(_parentDocument); 6143 tagName = "form"; 6144 } 6145 6146 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6147 auto t = this.querySelector("fieldset div"); 6148 if(t is null) 6149 return super.addField(label, name, type, fieldOptions); 6150 else 6151 return t.addField(label, name, type, fieldOptions); 6152 } 6153 6154 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6155 auto type = "text"; 6156 auto t = this.querySelector("fieldset div"); 6157 if(t is null) 6158 return super.addField(label, name, type, fieldOptions); 6159 else 6160 return t.addField(label, name, type, fieldOptions); 6161 } 6162 6163 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6164 auto t = this.querySelector("fieldset div"); 6165 if(t is null) 6166 return super.addField(label, name, options, fieldOptions); 6167 else 6168 return t.addField(label, name, options, fieldOptions); 6169 } 6170 6171 override void setValue(string field, string value) { 6172 setValue(field, value, true); 6173 } 6174 6175 // FIXME: doesn't handle arrays; multiple fields can have the same name 6176 6177 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6178 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6179 /// the checked/selected attribute from all, and adds it to the one matching the value. 6180 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6181 6182 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6183 /// Otherwise, it makes a new input with type=hidden to keep the value. 6184 void setValue(string field, string value, bool makeNew) { 6185 auto eles = getField(field); 6186 if(eles.length == 0) { 6187 if(makeNew) { 6188 addInput(field, value); 6189 return; 6190 } else 6191 throw new Exception("form field does not exist"); 6192 } 6193 6194 if(eles.length == 1) { 6195 auto e = eles[0]; 6196 switch(e.tagName) { 6197 default: assert(0); 6198 case "textarea": 6199 e.innerText = value; 6200 break; 6201 case "input": 6202 string type = e.getAttribute("type"); 6203 if(type is null) { 6204 e.value = value; 6205 return; 6206 } 6207 switch(type) { 6208 case "checkbox": 6209 case "radio": 6210 if(value.length && value != "false") 6211 e.setAttribute("checked", "checked"); 6212 else 6213 e.removeAttribute("checked"); 6214 break; 6215 default: 6216 e.value = value; 6217 return; 6218 } 6219 break; 6220 case "select": 6221 bool found = false; 6222 foreach(child; e.tree) { 6223 if(child.tagName != "option") 6224 continue; 6225 string val = child.getAttribute("value"); 6226 if(val is null) 6227 val = child.innerText; 6228 if(val == value) { 6229 child.setAttribute("selected", "selected"); 6230 found = true; 6231 } else 6232 child.removeAttribute("selected"); 6233 } 6234 6235 if(!found) { 6236 e.addChild("option", value) 6237 .setAttribute("selected", "selected"); 6238 } 6239 break; 6240 } 6241 } else { 6242 // assume radio boxes 6243 foreach(e; eles) { 6244 string val = e.getAttribute("value"); 6245 //if(val is null) 6246 // throw new Exception("don't know what to do with radio boxes with null value"); 6247 if(val == value) 6248 e.setAttribute("checked", "checked"); 6249 else 6250 e.removeAttribute("checked"); 6251 } 6252 } 6253 } 6254 6255 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6256 /// it makes no attempt to find and modify existing elements in the form to the new values. 6257 void addValueArray(string key, string[] arrayOfValues) { 6258 foreach(arr; arrayOfValues) 6259 addChild("input", key, arr); 6260 } 6261 6262 /// Gets the value of the field; what would be given if it submitted right now. (so 6263 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6264 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6265 string getValue(string field) { 6266 auto eles = getField(field); 6267 if(eles.length == 0) 6268 return ""; 6269 if(eles.length == 1) { 6270 auto e = eles[0]; 6271 switch(e.tagName) { 6272 default: assert(0); 6273 case "input": 6274 if(e.type == "checkbox") { 6275 if(e.checked) 6276 return e.value.length ? e.value : "checked"; 6277 return ""; 6278 } else 6279 return e.value; 6280 case "textarea": 6281 return e.innerText; 6282 case "select": 6283 foreach(child; e.tree) { 6284 if(child.tagName != "option") 6285 continue; 6286 if(child.selected) 6287 return child.value; 6288 } 6289 break; 6290 } 6291 } else { 6292 // assuming radio 6293 foreach(e; eles) { 6294 if(e.checked) 6295 return e.value; 6296 } 6297 } 6298 6299 return ""; 6300 } 6301 6302 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6303 ///. 6304 string getPostableData() { 6305 bool[string] namesDone; 6306 6307 string ret; 6308 bool outputted = false; 6309 6310 foreach(e; getElementsBySelector("[name]")) { 6311 if(e.name in namesDone) 6312 continue; 6313 6314 if(outputted) 6315 ret ~= "&"; 6316 else 6317 outputted = true; 6318 6319 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6320 6321 namesDone[e.name] = true; 6322 } 6323 6324 return ret; 6325 } 6326 6327 /// Gets the actual elements with the given name 6328 Element[] getField(string name) { 6329 Element[] ret; 6330 foreach(e; tree) { 6331 if(e.name == name) 6332 ret ~= e; 6333 } 6334 return ret; 6335 } 6336 6337 /// Grabs the <label> with the given for tag, if there is one. 6338 Element getLabel(string forId) { 6339 foreach(e; tree) 6340 if(e.tagName == "label" && e.getAttribute("for") == forId) 6341 return e; 6342 return null; 6343 } 6344 6345 /// Adds a new INPUT field to the end of the form with the given attributes. 6346 Element addInput(string name, string value, string type = "hidden") { 6347 auto e = new Element(parentDocument, "input", null, true); 6348 e.name = name; 6349 e.value = value; 6350 e.type = type; 6351 6352 appendChild(e); 6353 6354 return e; 6355 } 6356 6357 /// Removes the given field from the form. It finds the element and knocks it right out. 6358 void removeField(string name) { 6359 foreach(e; getField(name)) 6360 e.parentNode.removeChild(e); 6361 } 6362 6363 /+ 6364 /// Returns all form members. 6365 @property Element[] elements() { 6366 6367 } 6368 6369 ///. 6370 string opDispatch(string name)(string v = null) 6371 // filter things that should actually be attributes on the form 6372 if( name != "method" && name != "action" && name != "enctype" 6373 && name != "style" && name != "name" && name != "id" && name != "class") 6374 { 6375 6376 } 6377 +/ 6378 /+ 6379 void submit() { 6380 // take its elements and submit them through http 6381 } 6382 +/ 6383 } 6384 6385 import std.conv; 6386 6387 ///. 6388 /// Group: implementations 6389 class Table : Element { 6390 6391 ///. 6392 this(Document _parentDocument) { 6393 super(_parentDocument); 6394 tagName = "table"; 6395 } 6396 6397 /// Creates an element with the given type and content. 6398 Element th(T)(T t) { 6399 Element e; 6400 if(parentDocument !is null) 6401 e = parentDocument.createElement("th"); 6402 else 6403 e = Element.make("th"); 6404 static if(is(T == Html)) 6405 e.innerHTML = t; 6406 else 6407 e.innerText = to!string(t); 6408 return e; 6409 } 6410 6411 /// ditto 6412 Element td(T)(T t) { 6413 Element e; 6414 if(parentDocument !is null) 6415 e = parentDocument.createElement("td"); 6416 else 6417 e = Element.make("td"); 6418 static if(is(T == Html)) 6419 e.innerHTML = t; 6420 else 6421 e.innerText = to!string(t); 6422 return e; 6423 } 6424 6425 /// . 6426 Element appendHeaderRow(T...)(T t) { 6427 return appendRowInternal("th", "thead", t); 6428 } 6429 6430 /// . 6431 Element appendFooterRow(T...)(T t) { 6432 return appendRowInternal("td", "tfoot", t); 6433 } 6434 6435 /// . 6436 Element appendRow(T...)(T t) { 6437 return appendRowInternal("td", "tbody", t); 6438 } 6439 6440 void addColumnClasses(string[] classes...) { 6441 auto grid = getGrid(); 6442 foreach(row; grid) 6443 foreach(i, cl; classes) { 6444 if(cl.length) 6445 if(i < row.length) 6446 row[i].addClass(cl); 6447 } 6448 } 6449 6450 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6451 Element row = Element.make("tr"); 6452 6453 foreach(e; t) { 6454 static if(is(typeof(e) : Element)) { 6455 if(e.tagName == "td" || e.tagName == "th") 6456 row.appendChild(e); 6457 else { 6458 Element a = Element.make(innerType); 6459 6460 a.appendChild(e); 6461 6462 row.appendChild(a); 6463 } 6464 } else static if(is(typeof(e) == Html)) { 6465 Element a = Element.make(innerType); 6466 a.innerHTML = e.source; 6467 row.appendChild(a); 6468 } else static if(is(typeof(e) == Element[])) { 6469 Element a = Element.make(innerType); 6470 foreach(ele; e) 6471 a.appendChild(ele); 6472 row.appendChild(a); 6473 } else static if(is(typeof(e) == string[])) { 6474 foreach(ele; e) { 6475 Element a = Element.make(innerType); 6476 a.innerText = to!string(ele); 6477 row.appendChild(a); 6478 } 6479 } else { 6480 Element a = Element.make(innerType); 6481 a.innerText = to!string(e); 6482 row.appendChild(a); 6483 } 6484 } 6485 6486 foreach(e; children) { 6487 if(e.tagName == findType) { 6488 e.appendChild(row); 6489 return row; 6490 } 6491 } 6492 6493 // the type was not found if we are here... let's add it so it is well-formed 6494 auto lol = this.addChild(findType); 6495 lol.appendChild(row); 6496 6497 return row; 6498 } 6499 6500 ///. 6501 Element captionElement() { 6502 Element cap; 6503 foreach(c; children) { 6504 if(c.tagName == "caption") { 6505 cap = c; 6506 break; 6507 } 6508 } 6509 6510 if(cap is null) { 6511 cap = Element.make("caption"); 6512 appendChild(cap); 6513 } 6514 6515 return cap; 6516 } 6517 6518 ///. 6519 @property string caption() { 6520 return captionElement().innerText; 6521 } 6522 6523 ///. 6524 @property void caption(string text) { 6525 captionElement().innerText = text; 6526 } 6527 6528 /// Gets the logical layout of the table as a rectangular grid of 6529 /// cells. It considers rowspan and colspan. A cell with a large 6530 /// span is represented in the grid by being referenced several times. 6531 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6532 /// <tfoot> portion if you pass one. 6533 /// 6534 /// Note: the rectangular grid might include null cells. 6535 /// 6536 /// This is kinda expensive so you should call once when you want the grid, 6537 /// then do lookups on the returned array. 6538 TableCell[][] getGrid(Element tablePortition = null) 6539 in { 6540 if(tablePortition is null) 6541 assert(tablePortition is null); 6542 else { 6543 assert(tablePortition !is null); 6544 assert(tablePortition.parentNode is this); 6545 assert( 6546 tablePortition.tagName == "tbody" 6547 || 6548 tablePortition.tagName == "tfoot" 6549 || 6550 tablePortition.tagName == "thead" 6551 ); 6552 } 6553 } 6554 body { 6555 if(tablePortition is null) 6556 tablePortition = this; 6557 6558 TableCell[][] ret; 6559 6560 // FIXME: will also return rows of sub tables! 6561 auto rows = tablePortition.getElementsByTagName("tr"); 6562 ret.length = rows.length; 6563 6564 int maxLength = 0; 6565 6566 int insertCell(int row, int position, TableCell cell) { 6567 if(row >= ret.length) 6568 return position; // not supposed to happen - a rowspan is prolly too big. 6569 6570 if(position == -1) { 6571 position++; 6572 foreach(item; ret[row]) { 6573 if(item is null) 6574 break; 6575 position++; 6576 } 6577 } 6578 6579 if(position < ret[row].length) 6580 ret[row][position] = cell; 6581 else 6582 foreach(i; ret[row].length .. position + 1) { 6583 if(i == position) 6584 ret[row] ~= cell; 6585 else 6586 ret[row] ~= null; 6587 } 6588 return position; 6589 } 6590 6591 foreach(i, rowElement; rows) { 6592 auto row = cast(TableRow) rowElement; 6593 assert(row !is null); 6594 assert(i < ret.length); 6595 6596 int position = 0; 6597 foreach(cellElement; rowElement.childNodes) { 6598 auto cell = cast(TableCell) cellElement; 6599 if(cell is null) 6600 continue; 6601 6602 // FIXME: colspan == 0 or rowspan == 0 6603 // is supposed to mean fill in the rest of 6604 // the table, not skip it 6605 foreach(int j; 0 .. cell.colspan) { 6606 foreach(int k; 0 .. cell.rowspan) 6607 // if the first row, always append. 6608 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6609 position++; 6610 } 6611 } 6612 6613 if(ret[i].length > maxLength) 6614 maxLength = cast(int) ret[i].length; 6615 } 6616 6617 // want to ensure it's rectangular 6618 foreach(ref r; ret) { 6619 foreach(i; r.length .. maxLength) 6620 r ~= null; 6621 } 6622 6623 return ret; 6624 } 6625 } 6626 6627 /// Represents a table row element - a <tr> 6628 /// Group: implementations 6629 class TableRow : Element { 6630 ///. 6631 this(Document _parentDocument) { 6632 super(_parentDocument); 6633 tagName = "tr"; 6634 } 6635 6636 // FIXME: the standard says there should be a lot more in here, 6637 // but meh, I never use it and it's a pain to implement. 6638 } 6639 6640 /// Represents anything that can be a table cell - <td> or <th> html. 6641 /// Group: implementations 6642 class TableCell : Element { 6643 ///. 6644 this(Document _parentDocument, string _tagName) { 6645 super(_parentDocument, _tagName); 6646 } 6647 6648 @property int rowspan() const { 6649 int ret = 1; 6650 auto it = getAttribute("rowspan"); 6651 if(it.length) 6652 ret = to!int(it); 6653 return ret; 6654 } 6655 6656 @property int colspan() const { 6657 int ret = 1; 6658 auto it = getAttribute("colspan"); 6659 if(it.length) 6660 ret = to!int(it); 6661 return ret; 6662 } 6663 6664 @property int rowspan(int i) { 6665 setAttribute("rowspan", to!string(i)); 6666 return i; 6667 } 6668 6669 @property int colspan(int i) { 6670 setAttribute("colspan", to!string(i)); 6671 return i; 6672 } 6673 6674 } 6675 6676 6677 ///. 6678 /// Group: implementations 6679 class MarkupException : Exception { 6680 6681 ///. 6682 this(string message, string file = __FILE__, size_t line = __LINE__) { 6683 super(message, file, line); 6684 } 6685 } 6686 6687 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6688 /// Group: implementations 6689 class ElementNotFoundException : Exception { 6690 6691 /// type == kind of element you were looking for and search == a selector describing the search. 6692 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6693 this.searchContext = searchContext; 6694 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6695 } 6696 6697 Element searchContext; 6698 } 6699 6700 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6701 /// 6702 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6703 /// Group: core_functionality 6704 struct Html { 6705 /// This string holds the actual html. Use it to retrieve the contents. 6706 string source; 6707 } 6708 6709 // for the observers 6710 enum DomMutationOperations { 6711 setAttribute, 6712 removeAttribute, 6713 appendChild, // tagname, attributes[], innerHTML 6714 insertBefore, 6715 truncateChildren, 6716 removeChild, 6717 appendHtml, 6718 replaceHtml, 6719 appendText, 6720 replaceText, 6721 replaceTextOnly 6722 } 6723 6724 // and for observers too 6725 struct DomMutationEvent { 6726 DomMutationOperations operation; 6727 Element target; 6728 Element related; // what this means differs with the operation 6729 Element related2; 6730 string relatedString; 6731 string relatedString2; 6732 } 6733 6734 6735 private immutable static string[] selfClosedElements = [ 6736 // html 4 6737 "img", "hr", "input", "br", "col", "link", "meta", 6738 // html 5 6739 "source" ]; 6740 6741 private immutable static string[] inlineElements = [ 6742 "span", "strong", "em", "b", "i", "a" 6743 ]; 6744 6745 6746 static import std.conv; 6747 6748 ///. 6749 int intFromHex(string hex) { 6750 int place = 1; 6751 int value = 0; 6752 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6753 int v; 6754 char q = hex[a]; 6755 if( q >= '0' && q <= '9') 6756 v = q - '0'; 6757 else if (q >= 'a' && q <= 'f') 6758 v = q - 'a' + 10; 6759 else throw new Exception("Illegal hex character: " ~ q); 6760 6761 value += v * place; 6762 6763 place *= 16; 6764 } 6765 6766 return value; 6767 } 6768 6769 6770 // CSS selector handling 6771 6772 // EXTENSIONS 6773 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6774 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6775 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6776 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6777 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6778 6779 6780 6781 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6782 // That might be useful to implement, though I do have parent selectors too. 6783 6784 ///. 6785 static immutable string[] selectorTokens = [ 6786 // It is important that the 2 character possibilities go first here for accurate lexing 6787 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 6788 "::", ">>", 6789 "<<", // my any-parent extension (reciprocal of whitespace) 6790 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6791 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6792 ]; // other is white space or a name. 6793 6794 ///. 6795 sizediff_t idToken(string str, sizediff_t position) { 6796 sizediff_t tid = -1; 6797 char c = str[position]; 6798 foreach(a, token; selectorTokens) 6799 6800 if(c == token[0]) { 6801 if(token.length > 1) { 6802 if(position + 1 >= str.length || str[position+1] != token[1]) 6803 continue; // not this token 6804 } 6805 tid = a; 6806 break; 6807 } 6808 return tid; 6809 } 6810 6811 ///. 6812 // look, ma, no phobos! 6813 // new lexer by ketmar 6814 string[] lexSelector (string selstr) { 6815 6816 static sizediff_t idToken (string str, size_t stpos) { 6817 char c = str[stpos]; 6818 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6819 if (c == token[0]) { 6820 if (token.length > 1) { 6821 assert(token.length == 2, token); // we don't have 3-char tokens yet 6822 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6823 } 6824 return tidx; 6825 } 6826 } 6827 return -1; 6828 } 6829 6830 // skip spaces and comments 6831 static string removeLeadingBlanks (string str) { 6832 size_t curpos = 0; 6833 while (curpos < str.length) { 6834 immutable char ch = str[curpos]; 6835 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6836 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6837 // comment 6838 curpos += 2; 6839 while (curpos < str.length) { 6840 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6841 curpos += 2; 6842 break; 6843 } 6844 ++curpos; 6845 } 6846 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6847 ++curpos; 6848 6849 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6850 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6851 // That is not the same as ".foo.bar". If the space is stripped, important 6852 // information is lost, despite the tokens being separatable anyway. 6853 // 6854 // The parser really needs to be aware of the presence of a space. 6855 } else { 6856 break; 6857 } 6858 } 6859 return str[curpos..$]; 6860 } 6861 6862 static bool isBlankAt() (string str, size_t pos) { 6863 // we should consider unicode spaces too, but... unicode sux anyway. 6864 return 6865 (pos < str.length && // in string 6866 (str[pos] <= 32 || // space 6867 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6868 } 6869 6870 string[] tokens; 6871 // lexx it! 6872 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6873 if(selstr[0] == '\"' || selstr[0] == '\'') { 6874 auto end = selstr[0]; 6875 auto pos = 1; 6876 bool escaping; 6877 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6878 if(escaping) 6879 escaping = false; 6880 else if(selstr[pos] == '\\') 6881 escaping = true; 6882 pos++; 6883 } 6884 6885 // FIXME: do better unescaping 6886 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6887 if(pos+1 >= selstr.length) 6888 assert(0, selstr); 6889 selstr = selstr[pos + 1.. $]; 6890 continue; 6891 } 6892 6893 6894 // no tokens starts with escape 6895 immutable tid = idToken(selstr, 0); 6896 if (tid >= 0) { 6897 // special token 6898 tokens ~= selectorTokens[tid]; // it's funnier this way 6899 selstr = selstr[selectorTokens[tid].length..$]; 6900 continue; 6901 } 6902 // from start to space or special token 6903 size_t escapePos = size_t.max; 6904 size_t curpos = 0; // i can has chizburger^w escape at the start 6905 while (curpos < selstr.length) { 6906 if (selstr[curpos] == '\\') { 6907 // this is escape, just skip it and next char 6908 if (escapePos == size_t.max) escapePos = curpos; 6909 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6910 } else { 6911 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6912 ++curpos; 6913 } 6914 } 6915 // identifier 6916 if (escapePos != size_t.max) { 6917 // i hate it when it happens 6918 string id = selstr[0..escapePos]; 6919 while (escapePos < curpos) { 6920 if (curpos-escapePos < 2) break; 6921 id ~= selstr[escapePos+1]; // escaped char 6922 escapePos += 2; 6923 immutable stp = escapePos; 6924 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6925 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6926 } 6927 if (id.length > 0) tokens ~= id; 6928 } else { 6929 tokens ~= selstr[0..curpos]; 6930 } 6931 selstr = selstr[curpos..$]; 6932 } 6933 return tokens; 6934 } 6935 version(unittest_domd_lexer) unittest { 6936 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6937 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6938 assert(lexSelector(r" < <") == ["<", "<"]); 6939 assert(lexSelector(r" <<") == ["<<"]); 6940 assert(lexSelector(r" <</") == ["<<", "/"]); 6941 assert(lexSelector(r" <</*") == ["<<"]); 6942 assert(lexSelector(r" <\</*") == ["<", "<"]); 6943 assert(lexSelector(r"heh\") == ["heh"]); 6944 assert(lexSelector(r"alice \") == ["alice"]); 6945 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6946 } 6947 6948 ///. 6949 struct SelectorPart { 6950 string tagNameFilter; ///. 6951 string[] attributesPresent; /// [attr] 6952 string[2][] attributesEqual; /// [attr=value] 6953 string[2][] attributesStartsWith; /// [attr^=value] 6954 string[2][] attributesEndsWith; /// [attr$=value] 6955 // split it on space, then match to these 6956 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6957 // split it on dash, then match to these 6958 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6959 string[2][] attributesInclude; /// [attr*=value] 6960 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6961 6962 string[] hasSelectors; /// :has(this) 6963 string[] notSelectors; /// :not(this) 6964 6965 string[] isSelectors; /// :is(this) 6966 string[] whereSelectors; /// :where(this) 6967 6968 ParsedNth[] nthOfType; /// . 6969 ParsedNth[] nthLastOfType; /// . 6970 ParsedNth[] nthChild; /// . 6971 6972 bool firstChild; ///. 6973 bool lastChild; ///. 6974 6975 bool firstOfType; /// . 6976 bool lastOfType; /// . 6977 6978 bool emptyElement; ///. 6979 bool whitespaceOnly; /// 6980 bool oddChild; ///. 6981 bool evenChild; ///. 6982 6983 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6984 6985 bool rootElement; ///. 6986 6987 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6988 6989 bool isCleanSlateExceptSeparation() { 6990 auto cp = this; 6991 cp.separation = -1; 6992 return cp is SelectorPart.init; 6993 } 6994 6995 ///. 6996 string toString() { 6997 string ret; 6998 switch(separation) { 6999 default: assert(0); 7000 case -1: break; 7001 case 0: ret ~= " "; break; 7002 case 1: ret ~= " > "; break; 7003 case 2: ret ~= " + "; break; 7004 case 3: ret ~= " ~ "; break; 7005 case 4: ret ~= " < "; break; 7006 } 7007 ret ~= tagNameFilter; 7008 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7009 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7010 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7011 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7012 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7013 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7014 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7015 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7016 7017 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7018 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7019 7020 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7021 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7022 7023 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7024 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7025 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7026 7027 if(firstChild) ret ~= ":first-child"; 7028 if(lastChild) ret ~= ":last-child"; 7029 if(firstOfType) ret ~= ":first-of-type"; 7030 if(lastOfType) ret ~= ":last-of-type"; 7031 if(emptyElement) ret ~= ":empty"; 7032 if(whitespaceOnly) ret ~= ":whitespace-only"; 7033 if(oddChild) ret ~= ":odd-child"; 7034 if(evenChild) ret ~= ":even-child"; 7035 if(rootElement) ret ~= ":root"; 7036 if(scopeElement) ret ~= ":scope"; 7037 7038 return ret; 7039 } 7040 7041 // USEFUL 7042 ///. 7043 bool matchElement(Element e) { 7044 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7045 // Each individual call is reasonably fast already, but it adds up. 7046 if(e is null) return false; 7047 if(e.nodeType != 1) return false; 7048 7049 if(tagNameFilter != "" && tagNameFilter != "*") 7050 if(e.tagName != tagNameFilter) 7051 return false; 7052 if(firstChild) { 7053 if(e.parentNode is null) 7054 return false; 7055 if(e.parentNode.childElements[0] !is e) 7056 return false; 7057 } 7058 if(lastChild) { 7059 if(e.parentNode is null) 7060 return false; 7061 auto ce = e.parentNode.childElements; 7062 if(ce[$-1] !is e) 7063 return false; 7064 } 7065 if(firstOfType) { 7066 if(e.parentNode is null) 7067 return false; 7068 auto ce = e.parentNode.childElements; 7069 foreach(c; ce) { 7070 if(c.tagName == e.tagName) { 7071 if(c is e) 7072 return true; 7073 else 7074 return false; 7075 } 7076 } 7077 } 7078 if(lastOfType) { 7079 if(e.parentNode is null) 7080 return false; 7081 auto ce = e.parentNode.childElements; 7082 foreach_reverse(c; ce) { 7083 if(c.tagName == e.tagName) { 7084 if(c is e) 7085 return true; 7086 else 7087 return false; 7088 } 7089 } 7090 } 7091 /+ 7092 if(scopeElement) { 7093 if(e !is this_) 7094 return false; 7095 } 7096 +/ 7097 if(emptyElement) { 7098 if(e.children.length) 7099 return false; 7100 } 7101 if(whitespaceOnly) { 7102 if(e.innerText.strip.length) 7103 return false; 7104 } 7105 if(rootElement) { 7106 if(e.parentNode !is null) 7107 return false; 7108 } 7109 if(oddChild || evenChild) { 7110 if(e.parentNode is null) 7111 return false; 7112 foreach(i, child; e.parentNode.childElements) { 7113 if(child is e) { 7114 if(oddChild && !(i&1)) 7115 return false; 7116 if(evenChild && (i&1)) 7117 return false; 7118 break; 7119 } 7120 } 7121 } 7122 7123 bool matchWithSeparator(string attr, string value, string separator) { 7124 foreach(s; attr.split(separator)) 7125 if(s == value) 7126 return true; 7127 return false; 7128 } 7129 7130 foreach(a; attributesPresent) 7131 if(a !in e.attributes) 7132 return false; 7133 foreach(a; attributesEqual) 7134 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7135 return false; 7136 foreach(a; attributesNotEqual) 7137 // FIXME: maybe it should say null counts... this just bit me. 7138 // I did [attr][attr!=value] to work around. 7139 // 7140 // if it's null, it's not equal, right? 7141 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7142 if(e.getAttribute(a[0]) == a[1]) 7143 return false; 7144 foreach(a; attributesInclude) 7145 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7146 return false; 7147 foreach(a; attributesStartsWith) 7148 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7149 return false; 7150 foreach(a; attributesEndsWith) 7151 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7152 return false; 7153 foreach(a; attributesIncludesSeparatedBySpaces) 7154 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7155 return false; 7156 foreach(a; attributesIncludesSeparatedByDashes) 7157 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7158 return false; 7159 foreach(a; hasSelectors) { 7160 if(e.querySelector(a) is null) 7161 return false; 7162 } 7163 foreach(a; notSelectors) { 7164 auto sel = Selector(a); 7165 if(sel.matchesElement(e)) 7166 return false; 7167 } 7168 foreach(a; isSelectors) { 7169 auto sel = Selector(a); 7170 if(!sel.matchesElement(e)) 7171 return false; 7172 } 7173 foreach(a; whereSelectors) { 7174 auto sel = Selector(a); 7175 if(!sel.matchesElement(e)) 7176 return false; 7177 } 7178 7179 foreach(a; nthChild) { 7180 if(e.parentNode is null) 7181 return false; 7182 7183 auto among = e.parentNode.childElements; 7184 7185 if(!a.solvesFor(among, e)) 7186 return false; 7187 } 7188 foreach(a; nthOfType) { 7189 if(e.parentNode is null) 7190 return false; 7191 7192 auto among = e.parentNode.childElements(e.tagName); 7193 7194 if(!a.solvesFor(among, e)) 7195 return false; 7196 } 7197 foreach(a; nthLastOfType) { 7198 if(e.parentNode is null) 7199 return false; 7200 7201 auto among = retro(e.parentNode.childElements(e.tagName)); 7202 7203 if(!a.solvesFor(among, e)) 7204 return false; 7205 } 7206 7207 return true; 7208 } 7209 } 7210 7211 struct ParsedNth { 7212 int multiplier; 7213 int adder; 7214 7215 string of; 7216 7217 this(string text) { 7218 auto original = text; 7219 consumeWhitespace(text); 7220 if(text.startsWith("odd")) { 7221 multiplier = 2; 7222 adder = 1; 7223 7224 text = text[3 .. $]; 7225 } else if(text.startsWith("even")) { 7226 multiplier = 2; 7227 adder = 1; 7228 7229 text = text[4 .. $]; 7230 } else { 7231 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7232 consumeWhitespace(text); 7233 if(text.length && text[0] == 'n') { 7234 multiplier = n; 7235 text = text[1 .. $]; 7236 consumeWhitespace(text); 7237 if(text.length) { 7238 if(text[0] == '+') { 7239 text = text[1 .. $]; 7240 adder = parseNumber(text); 7241 } else if(text[0] == '-') { 7242 text = text[1 .. $]; 7243 adder = -parseNumber(text); 7244 } else if(text[0] == 'o') { 7245 // continue, this is handled below 7246 } else 7247 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7248 } 7249 } else { 7250 adder = n; 7251 } 7252 } 7253 7254 consumeWhitespace(text); 7255 if(text.startsWith("of")) { 7256 text = text[2 .. $]; 7257 consumeWhitespace(text); 7258 of = text[0 .. $]; 7259 } 7260 } 7261 7262 string toString() { 7263 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7264 } 7265 7266 bool solvesFor(R)(R elements, Element e) { 7267 int idx = 1; 7268 bool found = false; 7269 foreach(ele; elements) { 7270 if(of.length) { 7271 auto sel = Selector(of); 7272 if(!sel.matchesElement(ele)) 7273 continue; 7274 } 7275 if(ele is e) { 7276 found = true; 7277 break; 7278 } 7279 idx++; 7280 } 7281 if(!found) return false; 7282 7283 // multiplier* n + adder = idx 7284 // if there is a solution for integral n, it matches 7285 7286 idx -= adder; 7287 if(multiplier) { 7288 if(idx % multiplier == 0) 7289 return true; 7290 } else { 7291 return idx == 0; 7292 } 7293 return false; 7294 } 7295 7296 private void consumeWhitespace(ref string text) { 7297 while(text.length && text[0] == ' ') 7298 text = text[1 .. $]; 7299 } 7300 7301 private int parseNumber(ref string text) { 7302 consumeWhitespace(text); 7303 if(text.length == 0) return 0; 7304 bool negative = text[0] == '-'; 7305 if(text[0] == '+') 7306 text = text[1 .. $]; 7307 if(negative) text = text[1 .. $]; 7308 int i = 0; 7309 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7310 i++; 7311 if(i == 0) 7312 return 0; 7313 int cool = to!int(text[0 .. i]); 7314 text = text[i .. $]; 7315 return negative ? -cool : cool; 7316 } 7317 } 7318 7319 // USEFUL 7320 ///. 7321 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7322 Element[] ret; 7323 if(!parts.length) { 7324 return [start]; // the null selector only matches the start point; it 7325 // is what terminates the recursion 7326 } 7327 7328 auto part = parts[0]; 7329 //writeln("checking ", part, " against ", start, " with ", part.separation); 7330 switch(part.separation) { 7331 default: assert(0); 7332 case -1: 7333 case 0: // tree 7334 foreach(e; start.tree) { 7335 if(part.separation == 0 && start is e) 7336 continue; // space doesn't match itself! 7337 if(part.matchElement(e)) { 7338 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7339 } 7340 } 7341 break; 7342 case 1: // children 7343 foreach(e; start.childNodes) { 7344 if(part.matchElement(e)) { 7345 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7346 } 7347 } 7348 break; 7349 case 2: // next-sibling 7350 auto e = start.nextSibling("*"); 7351 if(part.matchElement(e)) 7352 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7353 break; 7354 case 3: // younger sibling 7355 auto tmp = start.parentNode; 7356 if(tmp !is null) { 7357 sizediff_t pos = -1; 7358 auto children = tmp.childElements; 7359 foreach(i, child; children) { 7360 if(child is start) { 7361 pos = i; 7362 break; 7363 } 7364 } 7365 assert(pos != -1); 7366 foreach(e; children[pos+1..$]) { 7367 if(part.matchElement(e)) 7368 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7369 } 7370 } 7371 break; 7372 case 4: // immediate parent node, an extension of mine to walk back up the tree 7373 auto e = start.parentNode; 7374 if(part.matchElement(e)) { 7375 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7376 } 7377 /* 7378 Example of usefulness: 7379 7380 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7381 7382 table th < tr 7383 7384 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7385 */ 7386 break; 7387 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7388 /* 7389 Like with the < operator, this is best used to find some parent of a particular known element. 7390 7391 Say you have an anchor inside a 7392 */ 7393 } 7394 7395 return ret; 7396 } 7397 7398 /++ 7399 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7400 7401 See_Also: 7402 $(LIST 7403 * [Element.querySelector] 7404 * [Element.querySelectorAll] 7405 * [Element.matches] 7406 * [Element.closest] 7407 * [Document.querySelector] 7408 * [Document.querySelectorAll] 7409 ) 7410 +/ 7411 /// Group: core_functionality 7412 struct Selector { 7413 SelectorComponent[] components; 7414 string original; 7415 /++ 7416 Parses the selector string and constructs the usable structure. 7417 +/ 7418 this(string cssSelector) { 7419 components = parseSelectorString(cssSelector); 7420 original = cssSelector; 7421 } 7422 7423 /++ 7424 Returns true if the given element matches this selector, 7425 considered relative to an arbitrary element. 7426 7427 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7428 with [std.algorithm.iteration.filter]: 7429 7430 --- 7431 Selector sel = Selector("foo > bar"); 7432 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7433 --- 7434 +/ 7435 bool matchesElement(Element e, Element relativeTo = null) { 7436 foreach(component; components) 7437 if(component.matchElement(e, relativeTo)) 7438 return true; 7439 7440 return false; 7441 } 7442 7443 /++ 7444 Reciprocal of [Element.querySelectorAll] 7445 +/ 7446 Element[] getMatchingElements(Element start) { 7447 Element[] ret; 7448 foreach(component; components) 7449 ret ~= getElementsBySelectorParts(start, component.parts); 7450 return removeDuplicates(ret); 7451 } 7452 7453 /++ 7454 Like [getMatchingElements], but returns a lazy range. Be careful 7455 about mutating the dom as you iterate through this. 7456 +/ 7457 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7458 import std.algorithm.iteration; 7459 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7460 } 7461 7462 7463 /// Returns the string this was built from 7464 string toString() { 7465 return original; 7466 } 7467 7468 /++ 7469 Returns a string from the parsed result 7470 7471 7472 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7473 +/ 7474 string parsedToString() { 7475 string ret; 7476 7477 foreach(idx, component; components) { 7478 if(idx) ret ~= ", "; 7479 ret ~= component.toString(); 7480 } 7481 7482 return ret; 7483 } 7484 } 7485 7486 ///. 7487 struct SelectorComponent { 7488 ///. 7489 SelectorPart[] parts; 7490 7491 ///. 7492 string toString() { 7493 string ret; 7494 foreach(part; parts) 7495 ret ~= part.toString(); 7496 return ret; 7497 } 7498 7499 // USEFUL 7500 ///. 7501 Element[] getElements(Element start) { 7502 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7503 } 7504 7505 // USEFUL (but not implemented) 7506 /// If relativeTo == null, it assumes the root of the parent document. 7507 bool matchElement(Element e, Element relativeTo = null) { 7508 if(e is null) return false; 7509 Element where = e; 7510 int lastSeparation = -1; 7511 7512 auto lparts = parts; 7513 7514 if(parts.length && parts[0].separation > 0) { 7515 // if it starts with a non-trivial separator, inject 7516 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7517 // which implies html 7518 7519 // there is probably a MUCH better way to do this. 7520 auto dummy = SelectorPart.init; 7521 dummy.tagNameFilter = "*"; 7522 dummy.separation = 0; 7523 lparts = dummy ~ lparts; 7524 } 7525 7526 foreach(part; retro(lparts)) { 7527 7528 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7529 // writeln(parts); 7530 7531 if(lastSeparation == -1) { 7532 if(!part.matchElement(where)) 7533 return false; 7534 } else if(lastSeparation == 0) { // generic parent 7535 // need to go up the whole chain 7536 where = where.parentNode; 7537 7538 while(where !is null) { 7539 if(part.matchElement(where)) 7540 break; 7541 7542 if(where is relativeTo) 7543 return false; 7544 7545 where = where.parentNode; 7546 } 7547 7548 if(where is null) 7549 return false; 7550 } else if(lastSeparation == 1) { // the > operator 7551 where = where.parentNode; 7552 7553 if(!part.matchElement(where)) 7554 return false; 7555 } else if(lastSeparation == 2) { // the + operator 7556 //writeln("WHERE", where, " ", part); 7557 where = where.previousSibling("*"); 7558 7559 if(!part.matchElement(where)) 7560 return false; 7561 } else if(lastSeparation == 3) { // the ~ operator 7562 where = where.previousSibling("*"); 7563 while(where !is null) { 7564 if(part.matchElement(where)) 7565 break; 7566 7567 if(where is relativeTo) 7568 return false; 7569 7570 where = where.previousSibling("*"); 7571 } 7572 7573 if(where is null) 7574 return false; 7575 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7576 // FIXME 7577 } 7578 7579 lastSeparation = part.separation; 7580 7581 if(where is relativeTo) 7582 return false; // at end of line, if we aren't done by now, the match fails 7583 } 7584 return true; // if we got here, it is a success 7585 } 7586 7587 // the string should NOT have commas. Use parseSelectorString for that instead 7588 ///. 7589 static SelectorComponent fromString(string selector) { 7590 return parseSelector(lexSelector(selector)); 7591 } 7592 } 7593 7594 ///. 7595 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7596 SelectorComponent[] ret; 7597 auto tokens = lexSelector(selector); // this will parse commas too 7598 // and now do comma-separated slices (i haz phobosophobia!) 7599 int parensCount = 0; 7600 while (tokens.length > 0) { 7601 size_t end = 0; 7602 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7603 if(tokens[end] == "(") parensCount++; 7604 if(tokens[end] == ")") parensCount--; 7605 ++end; 7606 } 7607 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7608 if (tokens.length-end < 2) break; 7609 tokens = tokens[end+1..$]; 7610 } 7611 return ret; 7612 } 7613 7614 ///. 7615 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7616 SelectorComponent s; 7617 7618 SelectorPart current; 7619 void commit() { 7620 // might as well skip null items 7621 if(!current.isCleanSlateExceptSeparation()) { 7622 s.parts ~= current; 7623 current = current.init; // start right over 7624 } 7625 } 7626 enum State { 7627 Starting, 7628 ReadingClass, 7629 ReadingId, 7630 ReadingAttributeSelector, 7631 ReadingAttributeComparison, 7632 ExpectingAttributeCloser, 7633 ReadingPseudoClass, 7634 ReadingAttributeValue, 7635 7636 SkippingFunctionalSelector, 7637 } 7638 State state = State.Starting; 7639 string attributeName, attributeValue, attributeComparison; 7640 int parensCount; 7641 foreach(idx, token; tokens) { 7642 string readFunctionalSelector() { 7643 string s; 7644 if(tokens[idx + 1] != "(") 7645 throw new Exception("parse error"); 7646 int pc = 1; 7647 foreach(t; tokens[idx + 2 .. $]) { 7648 if(t == "(") 7649 pc++; 7650 if(t == ")") 7651 pc--; 7652 if(pc == 0) 7653 break; 7654 s ~= t; 7655 } 7656 7657 return s; 7658 } 7659 7660 sizediff_t tid = -1; 7661 foreach(i, item; selectorTokens) 7662 if(token == item) { 7663 tid = i; 7664 break; 7665 } 7666 final switch(state) { 7667 case State.Starting: // fresh, might be reading an operator or a tagname 7668 if(tid == -1) { 7669 if(!caseSensitiveTags) 7670 token = token.toLower(); 7671 7672 if(current.isCleanSlateExceptSeparation()) { 7673 current.tagNameFilter = token; 7674 // default thing, see comment under "*" below 7675 if(current.separation == -1) current.separation = 0; 7676 } else { 7677 // if it was already set, we must see two thingies 7678 // separated by whitespace... 7679 commit(); 7680 current.separation = 0; // tree 7681 current.tagNameFilter = token; 7682 } 7683 } else { 7684 // Selector operators 7685 switch(token) { 7686 case "*": 7687 current.tagNameFilter = "*"; 7688 // the idea here is if we haven't actually set a separation 7689 // yet (e.g. the > operator), it should assume the generic 7690 // whitespace (descendant) mode to avoid matching self with -1 7691 if(current.separation == -1) current.separation = 0; 7692 break; 7693 case " ": 7694 // If some other separation has already been set, 7695 // this is irrelevant whitespace, so we should skip it. 7696 // this happens in the case of "foo > bar" for example. 7697 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7698 continue; 7699 commit(); 7700 current.separation = 0; // tree 7701 break; 7702 case ">>": 7703 commit(); 7704 current.separation = 0; // alternate syntax for tree from html5 css 7705 break; 7706 case ">": 7707 commit(); 7708 current.separation = 1; // child 7709 break; 7710 case "+": 7711 commit(); 7712 current.separation = 2; // sibling directly after 7713 break; 7714 case "~": 7715 commit(); 7716 current.separation = 3; // any sibling after 7717 break; 7718 case "<": 7719 commit(); 7720 current.separation = 4; // immediate parent of 7721 break; 7722 case "[": 7723 state = State.ReadingAttributeSelector; 7724 if(current.separation == -1) current.separation = 0; 7725 break; 7726 case ".": 7727 state = State.ReadingClass; 7728 if(current.separation == -1) current.separation = 0; 7729 break; 7730 case "#": 7731 state = State.ReadingId; 7732 if(current.separation == -1) current.separation = 0; 7733 break; 7734 case ":": 7735 case "::": 7736 state = State.ReadingPseudoClass; 7737 if(current.separation == -1) current.separation = 0; 7738 break; 7739 7740 default: 7741 assert(0, token); 7742 } 7743 } 7744 break; 7745 case State.ReadingClass: 7746 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7747 state = State.Starting; 7748 break; 7749 case State.ReadingId: 7750 current.attributesEqual ~= ["id", token]; 7751 state = State.Starting; 7752 break; 7753 case State.ReadingPseudoClass: 7754 switch(token) { 7755 case "first-of-type": 7756 current.firstOfType = true; 7757 break; 7758 case "last-of-type": 7759 current.lastOfType = true; 7760 break; 7761 case "only-of-type": 7762 current.firstOfType = true; 7763 current.lastOfType = true; 7764 break; 7765 case "first-child": 7766 current.firstChild = true; 7767 break; 7768 case "last-child": 7769 current.lastChild = true; 7770 break; 7771 case "only-child": 7772 current.firstChild = true; 7773 current.lastChild = true; 7774 break; 7775 case "scope": 7776 current.scopeElement = true; 7777 break; 7778 case "empty": 7779 // one with no children 7780 current.emptyElement = true; 7781 break; 7782 case "whitespace-only": 7783 current.whitespaceOnly = true; 7784 break; 7785 case "link": 7786 current.attributesPresent ~= "href"; 7787 break; 7788 case "root": 7789 current.rootElement = true; 7790 break; 7791 case "nth-child": 7792 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7793 state = State.SkippingFunctionalSelector; 7794 continue; 7795 case "nth-of-type": 7796 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7797 state = State.SkippingFunctionalSelector; 7798 continue; 7799 case "nth-last-of-type": 7800 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7801 state = State.SkippingFunctionalSelector; 7802 continue; 7803 case "is": 7804 state = State.SkippingFunctionalSelector; 7805 current.isSelectors ~= readFunctionalSelector(); 7806 continue; // now the rest of the parser skips past the parens we just handled 7807 case "where": 7808 state = State.SkippingFunctionalSelector; 7809 current.whereSelectors ~= readFunctionalSelector(); 7810 continue; // now the rest of the parser skips past the parens we just handled 7811 case "not": 7812 state = State.SkippingFunctionalSelector; 7813 current.notSelectors ~= readFunctionalSelector(); 7814 continue; // now the rest of the parser skips past the parens we just handled 7815 case "has": 7816 state = State.SkippingFunctionalSelector; 7817 current.hasSelectors ~= readFunctionalSelector(); 7818 continue; // now the rest of the parser skips past the parens we just handled 7819 // back to standards though not quite right lol 7820 case "disabled": 7821 current.attributesPresent ~= "disabled"; 7822 break; 7823 case "checked": 7824 current.attributesPresent ~= "checked"; 7825 break; 7826 7827 case "visited", "active", "hover", "target", "focus", "selected": 7828 current.attributesPresent ~= "nothing"; 7829 // FIXME 7830 /+ 7831 // extensions not implemented 7832 //case "text": // takes the text in the element and wraps it in an element, returning it 7833 +/ 7834 goto case; 7835 case "before", "after": 7836 current.attributesPresent ~= "FIXME"; 7837 7838 break; 7839 // My extensions 7840 case "odd-child": 7841 current.oddChild = true; 7842 break; 7843 case "even-child": 7844 current.evenChild = true; 7845 break; 7846 default: 7847 //if(token.indexOf("lang") == -1) 7848 //assert(0, token); 7849 break; 7850 } 7851 state = State.Starting; 7852 break; 7853 case State.SkippingFunctionalSelector: 7854 if(token == "(") { 7855 parensCount++; 7856 } else if(token == ")") { 7857 parensCount--; 7858 } 7859 7860 if(parensCount == 0) 7861 state = State.Starting; 7862 break; 7863 case State.ReadingAttributeSelector: 7864 attributeName = token; 7865 attributeComparison = null; 7866 attributeValue = null; 7867 state = State.ReadingAttributeComparison; 7868 break; 7869 case State.ReadingAttributeComparison: 7870 // FIXME: these things really should be quotable in the proper lexer... 7871 if(token != "]") { 7872 if(token.indexOf("=") == -1) { 7873 // not a comparison; consider it 7874 // part of the attribute 7875 attributeValue ~= token; 7876 } else { 7877 attributeComparison = token; 7878 state = State.ReadingAttributeValue; 7879 } 7880 break; 7881 } 7882 goto case; 7883 case State.ExpectingAttributeCloser: 7884 if(token != "]") { 7885 // not the closer; consider it part of comparison 7886 if(attributeComparison == "") 7887 attributeName ~= token; 7888 else 7889 attributeValue ~= token; 7890 break; 7891 } 7892 7893 // Selector operators 7894 switch(attributeComparison) { 7895 default: assert(0); 7896 case "": 7897 current.attributesPresent ~= attributeName; 7898 break; 7899 case "=": 7900 current.attributesEqual ~= [attributeName, attributeValue]; 7901 break; 7902 case "|=": 7903 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7904 break; 7905 case "~=": 7906 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7907 break; 7908 case "$=": 7909 current.attributesEndsWith ~= [attributeName, attributeValue]; 7910 break; 7911 case "^=": 7912 current.attributesStartsWith ~= [attributeName, attributeValue]; 7913 break; 7914 case "*=": 7915 current.attributesInclude ~= [attributeName, attributeValue]; 7916 break; 7917 case "!=": 7918 current.attributesNotEqual ~= [attributeName, attributeValue]; 7919 break; 7920 } 7921 7922 state = State.Starting; 7923 break; 7924 case State.ReadingAttributeValue: 7925 attributeValue = token; 7926 state = State.ExpectingAttributeCloser; 7927 break; 7928 } 7929 } 7930 7931 commit(); 7932 7933 return s; 7934 } 7935 7936 ///. 7937 Element[] removeDuplicates(Element[] input) { 7938 Element[] ret; 7939 7940 bool[Element] already; 7941 foreach(e; input) { 7942 if(e in already) continue; 7943 already[e] = true; 7944 ret ~= e; 7945 } 7946 7947 return ret; 7948 } 7949 7950 // done with CSS selector handling 7951 7952 7953 // FIXME: use the better parser from html.d 7954 /// This is probably not useful to you unless you're writing a browser or something like that. 7955 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7956 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7957 class CssStyle { 7958 ///. 7959 this(string rule, string content) { 7960 rule = rule.strip(); 7961 content = content.strip(); 7962 7963 if(content.length == 0) 7964 return; 7965 7966 originatingRule = rule; 7967 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7968 7969 foreach(part; content.split(";")) { 7970 part = part.strip(); 7971 if(part.length == 0) 7972 continue; 7973 auto idx = part.indexOf(":"); 7974 if(idx == -1) 7975 continue; 7976 //throw new Exception("Bad css rule (no colon): " ~ part); 7977 7978 Property p; 7979 7980 p.name = part[0 .. idx].strip(); 7981 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7982 p.givenExplicitly = true; 7983 p.specificity = originatingSpecificity; 7984 7985 properties ~= p; 7986 } 7987 7988 foreach(property; properties) 7989 expandShortForm(property, originatingSpecificity); 7990 } 7991 7992 ///. 7993 Specificity getSpecificityOfRule(string rule) { 7994 Specificity s; 7995 if(rule.length == 0) { // inline 7996 // s.important = 2; 7997 } else { 7998 // FIXME 7999 } 8000 8001 return s; 8002 } 8003 8004 string originatingRule; ///. 8005 Specificity originatingSpecificity; ///. 8006 8007 ///. 8008 union Specificity { 8009 uint score; ///. 8010 // version(little_endian) 8011 ///. 8012 struct { 8013 ubyte tags; ///. 8014 ubyte classes; ///. 8015 ubyte ids; ///. 8016 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8017 } 8018 } 8019 8020 ///. 8021 struct Property { 8022 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8023 string name; ///. 8024 string value; ///. 8025 Specificity specificity; ///. 8026 // do we care about the original source rule? 8027 } 8028 8029 ///. 8030 Property[] properties; 8031 8032 ///. 8033 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8034 string name = unCamelCase(nameGiven); 8035 if(value is null) 8036 return getValue(name); 8037 else 8038 return setValue(name, value, 0x02000000 /* inline specificity */); 8039 } 8040 8041 /// takes dash style name 8042 string getValue(string name) { 8043 foreach(property; properties) 8044 if(property.name == name) 8045 return property.value; 8046 return null; 8047 } 8048 8049 /// takes dash style name 8050 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8051 value = value.replace("! important", "!important"); 8052 if(value.indexOf("!important") != -1) { 8053 newSpecificity.important = 1; // FIXME 8054 value = value.replace("!important", "").strip(); 8055 } 8056 8057 foreach(ref property; properties) 8058 if(property.name == name) { 8059 if(newSpecificity.score >= property.specificity.score) { 8060 property.givenExplicitly = explicit; 8061 expandShortForm(property, newSpecificity); 8062 return (property.value = value); 8063 } else { 8064 if(name == "display") 8065 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8066 return value; // do nothing - the specificity is too low 8067 } 8068 } 8069 8070 // it's not here... 8071 8072 Property p; 8073 p.givenExplicitly = true; 8074 p.name = name; 8075 p.value = value; 8076 p.specificity = originatingSpecificity; 8077 8078 properties ~= p; 8079 expandShortForm(p, originatingSpecificity); 8080 8081 return value; 8082 } 8083 8084 private void expandQuadShort(string name, string value, Specificity specificity) { 8085 auto parts = value.split(" "); 8086 switch(parts.length) { 8087 case 1: 8088 setValue(name ~"-left", parts[0], specificity, false); 8089 setValue(name ~"-right", parts[0], specificity, false); 8090 setValue(name ~"-top", parts[0], specificity, false); 8091 setValue(name ~"-bottom", parts[0], specificity, false); 8092 break; 8093 case 2: 8094 setValue(name ~"-left", parts[1], specificity, false); 8095 setValue(name ~"-right", parts[1], specificity, false); 8096 setValue(name ~"-top", parts[0], specificity, false); 8097 setValue(name ~"-bottom", parts[0], specificity, false); 8098 break; 8099 case 3: 8100 setValue(name ~"-top", parts[0], specificity, false); 8101 setValue(name ~"-right", parts[1], specificity, false); 8102 setValue(name ~"-bottom", parts[2], specificity, false); 8103 setValue(name ~"-left", parts[2], specificity, false); 8104 8105 break; 8106 case 4: 8107 setValue(name ~"-top", parts[0], specificity, false); 8108 setValue(name ~"-right", parts[1], specificity, false); 8109 setValue(name ~"-bottom", parts[2], specificity, false); 8110 setValue(name ~"-left", parts[3], specificity, false); 8111 break; 8112 default: 8113 assert(0, value); 8114 } 8115 } 8116 8117 ///. 8118 void expandShortForm(Property p, Specificity specificity) { 8119 switch(p.name) { 8120 case "margin": 8121 case "padding": 8122 expandQuadShort(p.name, p.value, specificity); 8123 break; 8124 case "border": 8125 case "outline": 8126 setValue(p.name ~ "-left", p.value, specificity, false); 8127 setValue(p.name ~ "-right", p.value, specificity, false); 8128 setValue(p.name ~ "-top", p.value, specificity, false); 8129 setValue(p.name ~ "-bottom", p.value, specificity, false); 8130 break; 8131 8132 case "border-top": 8133 case "border-bottom": 8134 case "border-left": 8135 case "border-right": 8136 case "outline-top": 8137 case "outline-bottom": 8138 case "outline-left": 8139 case "outline-right": 8140 8141 default: {} 8142 } 8143 } 8144 8145 ///. 8146 override string toString() { 8147 string ret; 8148 if(originatingRule.length) 8149 ret = originatingRule ~ " {"; 8150 8151 foreach(property; properties) { 8152 if(!property.givenExplicitly) 8153 continue; // skip the inferred shit 8154 8155 if(originatingRule.length) 8156 ret ~= "\n\t"; 8157 else 8158 ret ~= " "; 8159 8160 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8161 } 8162 8163 if(originatingRule.length) 8164 ret ~= "\n}\n"; 8165 8166 return ret; 8167 } 8168 } 8169 8170 string cssUrl(string url) { 8171 return "url(\"" ~ url ~ "\")"; 8172 } 8173 8174 /// This probably isn't useful, unless you're writing a browser or something like that. 8175 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8176 /// as text. 8177 /// 8178 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8179 /// that you can apply to your documents to build the complete computedStyle object. 8180 class StyleSheet { 8181 ///. 8182 CssStyle[] rules; 8183 8184 ///. 8185 this(string source) { 8186 // FIXME: handle @ rules and probably could improve lexer 8187 // add nesting? 8188 int state; 8189 string currentRule; 8190 string currentValue; 8191 8192 string* currentThing = ¤tRule; 8193 foreach(c; source) { 8194 handle: switch(state) { 8195 default: assert(0); 8196 case 0: // starting - we assume we're reading a rule 8197 switch(c) { 8198 case '@': 8199 state = 4; 8200 break; 8201 case '/': 8202 state = 1; 8203 break; 8204 case '{': 8205 currentThing = ¤tValue; 8206 break; 8207 case '}': 8208 if(currentThing is ¤tValue) { 8209 rules ~= new CssStyle(currentRule, currentValue); 8210 8211 currentRule = ""; 8212 currentValue = ""; 8213 8214 currentThing = ¤tRule; 8215 } else { 8216 // idk what is going on here. 8217 // check sveit.com to reproduce 8218 currentRule = ""; 8219 currentValue = ""; 8220 } 8221 break; 8222 default: 8223 (*currentThing) ~= c; 8224 } 8225 break; 8226 case 1: // expecting * 8227 if(c == '*') 8228 state = 2; 8229 else { 8230 state = 0; 8231 (*currentThing) ~= "/" ~ c; 8232 } 8233 break; 8234 case 2: // inside comment 8235 if(c == '*') 8236 state = 3; 8237 break; 8238 case 3: // expecting / to end comment 8239 if(c == '/') 8240 state = 0; 8241 else 8242 state = 2; // it's just a comment so no need to append 8243 break; 8244 case 4: 8245 if(c == '{') 8246 state = 5; 8247 if(c == ';') 8248 state = 0; // just skipping import 8249 break; 8250 case 5: 8251 if(c == '}') 8252 state = 0; // skipping font face probably 8253 } 8254 } 8255 } 8256 8257 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8258 void apply(Document document) { 8259 foreach(rule; rules) { 8260 if(rule.originatingRule.length == 0) 8261 continue; // this shouldn't happen here in a stylesheet 8262 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8263 // note: this should be a different object than the inline style 8264 // since givenExplicitly is likely destroyed here 8265 auto current = element.computedStyle; 8266 8267 foreach(item; rule.properties) 8268 current.setValue(item.name, item.value, item.specificity); 8269 } 8270 } 8271 } 8272 } 8273 8274 8275 /// This is kinda private; just a little utility container for use by the ElementStream class. 8276 final class Stack(T) { 8277 this() { 8278 internalLength = 0; 8279 arr = initialBuffer[]; 8280 } 8281 8282 ///. 8283 void push(T t) { 8284 if(internalLength >= arr.length) { 8285 auto oldarr = arr; 8286 if(arr.length < 4096) 8287 arr = new T[arr.length * 2]; 8288 else 8289 arr = new T[arr.length + 4096]; 8290 arr[0 .. oldarr.length] = oldarr[]; 8291 } 8292 8293 arr[internalLength] = t; 8294 internalLength++; 8295 } 8296 8297 ///. 8298 T pop() { 8299 assert(internalLength); 8300 internalLength--; 8301 return arr[internalLength]; 8302 } 8303 8304 ///. 8305 T peek() { 8306 assert(internalLength); 8307 return arr[internalLength - 1]; 8308 } 8309 8310 ///. 8311 @property bool empty() { 8312 return internalLength ? false : true; 8313 } 8314 8315 ///. 8316 private T[] arr; 8317 private size_t internalLength; 8318 private T[64] initialBuffer; 8319 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8320 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8321 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8322 } 8323 8324 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8325 final class ElementStream { 8326 8327 ///. 8328 @property Element front() { 8329 return current.element; 8330 } 8331 8332 /// Use Element.tree instead. 8333 this(Element start) { 8334 current.element = start; 8335 current.childPosition = -1; 8336 isEmpty = false; 8337 stack = new Stack!(Current); 8338 } 8339 8340 /* 8341 Handle it 8342 handle its children 8343 8344 */ 8345 8346 ///. 8347 void popFront() { 8348 more: 8349 if(isEmpty) return; 8350 8351 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8352 8353 current.childPosition++; 8354 if(current.childPosition >= current.element.children.length) { 8355 if(stack.empty()) 8356 isEmpty = true; 8357 else { 8358 current = stack.pop(); 8359 goto more; 8360 } 8361 } else { 8362 stack.push(current); 8363 current.element = current.element.children[current.childPosition]; 8364 current.childPosition = -1; 8365 } 8366 } 8367 8368 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8369 void currentKilled() { 8370 if(stack.empty) // should never happen 8371 isEmpty = true; 8372 else { 8373 current = stack.pop(); 8374 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8375 } 8376 } 8377 8378 ///. 8379 @property bool empty() { 8380 return isEmpty; 8381 } 8382 8383 private: 8384 8385 struct Current { 8386 Element element; 8387 int childPosition; 8388 } 8389 8390 Current current; 8391 8392 Stack!(Current) stack; 8393 8394 bool isEmpty; 8395 } 8396 8397 8398 8399 // unbelievable. 8400 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8401 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8402 static import std.algorithm; 8403 auto found = std.algorithm.find(haystack, needle); 8404 if(found.length == 0) 8405 return -1; 8406 return haystack.length - found.length; 8407 } 8408 8409 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8410 assert(position < arr.length); 8411 T[] ret; 8412 ret.length = arr.length + what.length; 8413 int a = 0; 8414 foreach(i; arr[0..position+1]) 8415 ret[a++] = i; 8416 8417 foreach(i; what) 8418 ret[a++] = i; 8419 8420 foreach(i; arr[position+1..$]) 8421 ret[a++] = i; 8422 8423 return ret; 8424 } 8425 8426 package bool isInArray(T)(T item, T[] arr) { 8427 foreach(i; arr) 8428 if(item == i) 8429 return true; 8430 return false; 8431 } 8432 8433 private string[string] aadup(in string[string] arr) { 8434 string[string] ret; 8435 foreach(k, v; arr) 8436 ret[k] = v; 8437 return ret; 8438 } 8439 8440 // dom event support, if you want to use it 8441 8442 /// used for DOM events 8443 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8444 8445 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8446 class Event { 8447 this(string eventName, Element target) { 8448 this.eventName = eventName; 8449 this.srcElement = target; 8450 } 8451 8452 /// Prevents the default event handler (if there is one) from being called 8453 void preventDefault() { 8454 defaultPrevented = true; 8455 } 8456 8457 /// Stops the event propagation immediately. 8458 void stopPropagation() { 8459 propagationStopped = true; 8460 } 8461 8462 bool defaultPrevented; 8463 bool propagationStopped; 8464 string eventName; 8465 8466 Element srcElement; 8467 alias srcElement target; 8468 8469 Element relatedTarget; 8470 8471 int clientX; 8472 int clientY; 8473 8474 int button; 8475 8476 bool isBubbling; 8477 8478 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8479 void send() { 8480 if(srcElement is null) 8481 return; 8482 8483 auto e = srcElement; 8484 8485 if(eventName in e.bubblingEventHandlers) 8486 foreach(handler; e.bubblingEventHandlers[eventName]) 8487 handler(e, this); 8488 8489 if(!defaultPrevented) 8490 if(eventName in e.defaultEventHandlers) 8491 e.defaultEventHandlers[eventName](e, this); 8492 } 8493 8494 /// this dispatches the element using the capture -> target -> bubble process 8495 void dispatch() { 8496 if(srcElement is null) 8497 return; 8498 8499 // first capture, then bubble 8500 8501 Element[] chain; 8502 Element curr = srcElement; 8503 while(curr) { 8504 auto l = curr; 8505 chain ~= l; 8506 curr = curr.parentNode; 8507 8508 } 8509 8510 isBubbling = false; 8511 8512 foreach(e; chain.retro()) { 8513 if(eventName in e.capturingEventHandlers) 8514 foreach(handler; e.capturingEventHandlers[eventName]) 8515 handler(e, this); 8516 8517 // the default on capture should really be to always do nothing 8518 8519 //if(!defaultPrevented) 8520 // if(eventName in e.defaultEventHandlers) 8521 // e.defaultEventHandlers[eventName](e.element, this); 8522 8523 if(propagationStopped) 8524 break; 8525 } 8526 8527 isBubbling = true; 8528 if(!propagationStopped) 8529 foreach(e; chain) { 8530 if(eventName in e.bubblingEventHandlers) 8531 foreach(handler; e.bubblingEventHandlers[eventName]) 8532 handler(e, this); 8533 8534 if(propagationStopped) 8535 break; 8536 } 8537 8538 if(!defaultPrevented) 8539 foreach(e; chain) { 8540 if(eventName in e.defaultEventHandlers) 8541 e.defaultEventHandlers[eventName](e, this); 8542 } 8543 } 8544 } 8545 8546 struct FormFieldOptions { 8547 // usable for any 8548 8549 /// this is a regex pattern used to validate the field 8550 string pattern; 8551 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8552 bool isRequired; 8553 /// this is displayed as an example to the user 8554 string placeholder; 8555 8556 // usable for numeric ones 8557 8558 8559 // convenience methods to quickly get some options 8560 @property static FormFieldOptions none() { 8561 FormFieldOptions f; 8562 return f; 8563 } 8564 8565 static FormFieldOptions required() { 8566 FormFieldOptions f; 8567 f.isRequired = true; 8568 return f; 8569 } 8570 8571 static FormFieldOptions regex(string pattern, bool required = false) { 8572 FormFieldOptions f; 8573 f.pattern = pattern; 8574 f.isRequired = required; 8575 return f; 8576 } 8577 8578 static FormFieldOptions fromElement(Element e) { 8579 FormFieldOptions f; 8580 if(e.hasAttribute("required")) 8581 f.isRequired = true; 8582 if(e.hasAttribute("pattern")) 8583 f.pattern = e.pattern; 8584 if(e.hasAttribute("placeholder")) 8585 f.placeholder = e.placeholder; 8586 return f; 8587 } 8588 8589 Element applyToElement(Element e) { 8590 if(this.isRequired) 8591 e.required = "required"; 8592 if(this.pattern.length) 8593 e.pattern = this.pattern; 8594 if(this.placeholder.length) 8595 e.placeholder = this.placeholder; 8596 return e; 8597 } 8598 } 8599 8600 // this needs to look just like a string, but can expand as needed 8601 version(no_dom_stream) 8602 alias string Utf8Stream; 8603 else 8604 class Utf8Stream { 8605 protected: 8606 // these two should be overridden in subclasses to actually do the stream magic 8607 string getMore() { 8608 if(getMoreHelper !is null) 8609 return getMoreHelper(); 8610 return null; 8611 } 8612 8613 bool hasMore() { 8614 if(hasMoreHelper !is null) 8615 return hasMoreHelper(); 8616 return false; 8617 } 8618 // the rest should be ok 8619 8620 public: 8621 this(string d) { 8622 this.data = d; 8623 } 8624 8625 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8626 this.getMoreHelper = getMoreHelper; 8627 this.hasMoreHelper = hasMoreHelper; 8628 8629 if(hasMore()) 8630 this.data ~= getMore(); 8631 8632 stdout.flush(); 8633 } 8634 8635 @property final size_t length() { 8636 // the parser checks length primarily directly before accessing the next character 8637 // so this is the place we'll hook to append more if possible and needed. 8638 if(lastIdx + 1 >= data.length && hasMore()) { 8639 data ~= getMore(); 8640 } 8641 return data.length; 8642 } 8643 8644 final char opIndex(size_t idx) { 8645 if(idx > lastIdx) 8646 lastIdx = idx; 8647 return data[idx]; 8648 } 8649 8650 final string opSlice(size_t start, size_t end) { 8651 if(end > lastIdx) 8652 lastIdx = end; 8653 return data[start .. end]; 8654 } 8655 8656 final size_t opDollar() { 8657 return length(); 8658 } 8659 8660 final Utf8Stream opBinary(string op : "~")(string s) { 8661 this.data ~= s; 8662 return this; 8663 } 8664 8665 final Utf8Stream opOpAssign(string op : "~")(string s) { 8666 this.data ~= s; 8667 return this; 8668 } 8669 8670 final Utf8Stream opAssign(string rhs) { 8671 this.data = rhs; 8672 return this; 8673 } 8674 private: 8675 string data; 8676 8677 size_t lastIdx; 8678 8679 bool delegate() hasMoreHelper; 8680 string delegate() getMoreHelper; 8681 8682 8683 /+ 8684 // used to maybe clear some old stuff 8685 // you might have to remove elements parsed with it too since they can hold slices into the 8686 // old stuff, preventing gc 8687 void dropFront(int bytes) { 8688 posAdjustment += bytes; 8689 data = data[bytes .. $]; 8690 } 8691 8692 int posAdjustment; 8693 +/ 8694 } 8695 8696 void fillForm(T)(Form form, T obj, string name) { 8697 import arsd.database; 8698 fillData((k, v) => form.setValue(k, v), obj, name); 8699 } 8700 8701 8702 /+ 8703 /+ 8704 Syntax: 8705 8706 Tag: tagname#id.class 8707 Tree: Tag(Children, comma, separated...) 8708 Children: Tee or Variable 8709 Variable: $varname with optional |funcname following. 8710 8711 If a variable has a tree after it, it breaks the variable down: 8712 * if array, foreach it does the tree 8713 * if struct, it breaks down the member variables 8714 8715 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8716 +/ 8717 struct Stringplate { 8718 /++ 8719 8720 +/ 8721 this(string s) { 8722 8723 } 8724 8725 /++ 8726 8727 +/ 8728 Element expand(T...)(T vars) { 8729 return null; 8730 } 8731 } 8732 /// 8733 unittest { 8734 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8735 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8736 } 8737 +/ 8738 8739 bool allAreInlineHtml(const(Element)[] children) { 8740 foreach(child; children) { 8741 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8742 // cool 8743 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 8744 // cool 8745 } else { 8746 // prolly block 8747 return false; 8748 } 8749 } 8750 return true; 8751 } 8752 8753 private bool isSimpleWhite(dchar c) { 8754 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8755 } 8756 8757 unittest { 8758 // Test for issue #120 8759 string s = `<html> 8760 <body> 8761 <P>AN 8762 <P>bubbles</P> 8763 <P>giggles</P> 8764 </body> 8765 </html>`; 8766 auto doc = new Document(); 8767 doc.parseUtf8(s, false, false); 8768 auto s2 = doc.toString(); 8769 assert( 8770 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8771 "paragraph order incorrect:\n" ~ s2); 8772 } 8773 8774 unittest { 8775 // test for suncarpet email dec 24 2019 8776 // arbitrary id asduiwh 8777 auto document = new Document("<html> 8778 <head> 8779 <meta charset=\"utf-8\"></meta> 8780 <title>Element.querySelector Test</title> 8781 </head> 8782 <body> 8783 <div id=\"foo\"> 8784 <div>Foo</div> 8785 <div>Bar</div> 8786 </div> 8787 </body> 8788 </html>"); 8789 8790 auto doc = document; 8791 8792 assert(doc.querySelectorAll("div div").length == 2); 8793 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8794 assert(doc.querySelectorAll("> html").length == 0); 8795 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8796 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8797 8798 8799 assert(doc.root.matches("html")); 8800 assert(!doc.root.matches("nothtml")); 8801 assert(doc.querySelector("#foo > div").matches("div")); 8802 assert(doc.querySelector("body > #foo").matches("#foo")); 8803 8804 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8805 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8806 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8807 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8808 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8809 8810 // also confirming the querySelector works via the mdn definition 8811 auto foo = doc.requireSelector("#foo"); 8812 assert(foo.querySelector("#foo > div") !is null); 8813 assert(foo.querySelector("body #foo > div") !is null); 8814 8815 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8816 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8817 //assert(foo.querySelectorAll("#foo > div").length == 2); 8818 } 8819 8820 unittest { 8821 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8822 auto document = new Document(`<article> 8823 <div id="div-01">Here is div-01 8824 <div id="div-02">Here is div-02 8825 <div id="div-03">Here is div-03</div> 8826 </div> 8827 </div> 8828 </article>`, true, true); 8829 8830 auto el = document.getElementById("div-03"); 8831 assert(el.closest("#div-02").id == "div-02"); 8832 assert(el.closest("div div").id == "div-03"); 8833 assert(el.closest("article > div").id == "div-01"); 8834 assert(el.closest(":not(div)").tagName == "article"); 8835 8836 assert(el.closest("p") is null); 8837 assert(el.closest("p, div") is el); 8838 } 8839 8840 unittest { 8841 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8842 auto document = new Document(`<test> 8843 <div class="foo"><p>cool</p><span>bar</span></div> 8844 <main><p>two</p></main> 8845 </test>`); 8846 8847 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8848 assert(document.querySelector("div:where(.foo)") !is null); 8849 } 8850 8851 unittest { 8852 immutable string html = q{ 8853 <root> 8854 <div class="roundedbox"> 8855 <table> 8856 <caption class="boxheader">Recent Reviews</caption> 8857 <tr> 8858 <th>Game</th> 8859 <th>User</th> 8860 <th>Rating</th> 8861 <th>Created</th> 8862 </tr> 8863 8864 <tr> 8865 <td>June 13, 2020 15:10</td> 8866 <td><a href="/reviews/8833">[Show]</a></td> 8867 </tr> 8868 8869 <tr> 8870 <td>June 13, 2020 15:02</td> 8871 <td><a href="/reviews/8832">[Show]</a></td> 8872 </tr> 8873 8874 <tr> 8875 <td>June 13, 2020 14:41</td> 8876 <td><a href="/reviews/8831">[Show]</a></td> 8877 </tr> 8878 </table> 8879 </div> 8880 </root> 8881 }; 8882 8883 auto doc = new Document(cast(string)html); 8884 // this should select the second table row, but... 8885 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8886 assert(rd !is null); 8887 assert(rd.href == "/reviews/8832"); 8888 8889 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8890 assert(rd !is null); 8891 assert(rd.href == "/reviews/8832"); 8892 } 8893 8894 /* 8895 Copyright: Adam D. Ruppe, 2010 - 2020 8896 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8897 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8898 8899 Copyright Adam D. Ruppe 2010-2020. 8900 Distributed under the Boost Software License, Version 1.0. 8901 (See accompanying file LICENSE_1_0.txt or copy at 8902 http://www.boost.org/LICENSE_1_0.txt) 8903 */ 8904 8905