1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource, DomParent { 98 inout(Document) asDocument() inout { return this; } 99 inout(Element) asElement() inout { return null; } 100 101 /// Convenience method for web scraping. Requires [arsd.http2] to be 102 /// included in the build as well as [arsd.characterencodings]. 103 static Document fromUrl()(string url, bool strictMode = false) { 104 import arsd.http2; 105 auto client = new HttpClient(); 106 107 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 108 auto res = req.waitForCompletion(); 109 110 auto document = new Document(); 111 if(strictMode) { 112 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 113 } else { 114 document.parseGarbage(cast(string) res.content); 115 } 116 117 return document; 118 } 119 120 ///. 121 this(string data, bool caseSensitive = false, bool strict = false) { 122 parseUtf8(data, caseSensitive, strict); 123 } 124 125 /** 126 Creates an empty document. It has *nothing* in it at all. 127 */ 128 this() { 129 130 } 131 132 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 133 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 134 /// can chain it. 135 /// 136 /// Example: document["p"].innerText("hello").addClass("modified"); 137 /// 138 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 139 /// 140 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 141 /// 142 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 143 /// you could put in some kind of custom filter function tho. 144 ElementCollection opIndex(string selector) { 145 auto e = ElementCollection(this.root); 146 return e[selector]; 147 } 148 149 string _contentType = "text/html; charset=utf-8"; 150 151 /// If you're using this for some other kind of XML, you can 152 /// set the content type here. 153 /// 154 /// Note: this has no impact on the function of this class. 155 /// It is only used if the document is sent via a protocol like HTTP. 156 /// 157 /// This may be called by parse() if it recognizes the data. Otherwise, 158 /// if you don't set it, it assumes text/html; charset=utf-8. 159 @property string contentType(string mimeType) { 160 _contentType = mimeType; 161 return _contentType; 162 } 163 164 /// implementing the FileResource interface, useful for sending via 165 /// http automatically. 166 @property string filename() const { return null; } 167 168 /// implementing the FileResource interface, useful for sending via 169 /// http automatically. 170 override @property string contentType() const { 171 return _contentType; 172 } 173 174 /// implementing the FileResource interface; it calls toString. 175 override immutable(ubyte)[] getData() const { 176 return cast(immutable(ubyte)[]) this.toString(); 177 } 178 179 180 /// Concatenates any consecutive text nodes 181 /* 182 void normalize() { 183 184 } 185 */ 186 187 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 188 /// Call this before calling parse(). 189 190 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 191 void enableAddingSpecialTagsToDom() { 192 parseSawComment = (string) => true; 193 parseSawAspCode = (string) => true; 194 parseSawPhpCode = (string) => true; 195 parseSawQuestionInstruction = (string) => true; 196 parseSawBangInstruction = (string) => true; 197 } 198 199 /// If the parser sees a html comment, it will call this callback 200 /// <!-- comment --> will call parseSawComment(" comment ") 201 /// Return true if you want the node appended to the document. 202 bool delegate(string) parseSawComment; 203 204 /// If the parser sees <% asp code... %>, it will call this callback. 205 /// It will be passed "% asp code... %" or "%= asp code .. %" 206 /// Return true if you want the node appended to the document. 207 bool delegate(string) parseSawAspCode; 208 209 /// If the parser sees <?php php code... ?>, it will call this callback. 210 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 211 /// Note: dom.d cannot identify the other php <? code ?> short format. 212 /// Return true if you want the node appended to the document. 213 bool delegate(string) parseSawPhpCode; 214 215 /// if it sees a <?xxx> that is not php or asp 216 /// it calls this function with the contents. 217 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 218 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 219 /// Return true if you want the node appended to the document. 220 bool delegate(string) parseSawQuestionInstruction; 221 222 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 223 /// it calls this function with the contents. 224 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 225 /// Return true if you want the node appended to the document. 226 bool delegate(string) parseSawBangInstruction; 227 228 /// Given the kind of garbage you find on the Internet, try to make sense of it. 229 /// Equivalent to document.parse(data, false, false, null); 230 /// (Case-insensitive, non-strict, determine character encoding from the data.) 231 232 /// NOTE: this makes no attempt at added security. 233 /// 234 /// It is a template so it lazily imports characterencodings. 235 void parseGarbage()(string data) { 236 parse(data, false, false, null); 237 } 238 239 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 240 /// Will throw exceptions on things like unclosed tags. 241 void parseStrict(string data) { 242 parseStream(toUtf8Stream(data), true, true); 243 } 244 245 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 246 /// tag soup, but does NOT try to correct bad character encodings. 247 /// 248 /// They will still throw an exception. 249 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 250 parseStream(toUtf8Stream(data), caseSensitive, strict); 251 } 252 253 // this is a template so we get lazy import behavior 254 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 255 import arsd.characterencodings; 256 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 257 if(dataEncoding is null) { 258 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 259 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 260 // Now, XML and HTML can both list encoding in the document, but we can't really parse 261 // it here without changing a lot of code until we know the encoding. So I'm going to 262 // do some hackish string checking. 263 if(dataEncoding is null) { 264 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 265 // first, look for an XML prolog 266 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 267 if(idx != -1) { 268 idx += "encoding=\"".length; 269 // we're probably past the prolog if it's this far in; we might be looking at 270 // content. Forget about it. 271 if(idx > 100) 272 idx = -1; 273 } 274 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 275 if(idx == -1) { 276 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 277 if(idx != -1) { 278 idx += "charset=".length; 279 if(dataAsBytes[idx] == '"') 280 idx++; 281 } 282 } 283 284 // found something in either branch... 285 if(idx != -1) { 286 // read till a quote or about 12 chars, whichever comes first... 287 auto end = idx; 288 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 289 end++; 290 291 dataEncoding = cast(string) dataAsBytes[idx .. end]; 292 } 293 // otherwise, we just don't know. 294 } 295 } 296 297 if(dataEncoding is null) { 298 if(strict) 299 throw new MarkupException("I couldn't figure out the encoding of this document."); 300 else 301 // if we really don't know by here, it means we already tried UTF-8, 302 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 303 // tags... let's assume it's Windows-1252, since that's probably the most 304 // common aside from utf that wouldn't be labeled. 305 306 dataEncoding = "Windows 1252"; 307 } 308 309 // and now, go ahead and convert it. 310 311 string data; 312 313 if(!strict) { 314 // if we're in non-strict mode, we need to check 315 // the document for mislabeling too; sometimes 316 // web documents will say they are utf-8, but aren't 317 // actually properly encoded. If it fails to validate, 318 // we'll assume it's actually Windows encoding - the most 319 // likely candidate for mislabeled garbage. 320 dataEncoding = dataEncoding.toLower(); 321 dataEncoding = dataEncoding.replace(" ", ""); 322 dataEncoding = dataEncoding.replace("-", ""); 323 dataEncoding = dataEncoding.replace("_", ""); 324 if(dataEncoding == "utf8") { 325 try { 326 validate(rawdata); 327 } catch(UTFException e) { 328 dataEncoding = "Windows 1252"; 329 } 330 } 331 } 332 333 if(dataEncoding != "UTF-8") { 334 if(strict) 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 336 else { 337 try { 338 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 339 } catch(Exception e) { 340 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 341 } 342 } 343 } else 344 data = rawdata; 345 346 return toUtf8Stream(data); 347 } 348 349 private 350 Utf8Stream toUtf8Stream(in string rawdata) { 351 string data = rawdata; 352 static if(is(Utf8Stream == string)) 353 return data; 354 else 355 return new Utf8Stream(data); 356 } 357 358 /++ 359 List of elements that can be assumed to be self-closed 360 in this document. The default for a Document are a hard-coded 361 list of ones appropriate for HTML. For [XmlDocument], it defaults 362 to empty. You can modify this after construction but before parsing. 363 364 History: 365 Added February 8, 2021 (included in dub release 9.2) 366 +/ 367 string[] selfClosedElements = htmlSelfClosedElements; 368 369 /++ 370 List of elements that are considered inline for pretty printing. 371 The default for a Document are hard-coded to something appropriate 372 for HTML. For [XmlDocument], it defaults to empty. You can modify 373 this after construction but before parsing. 374 375 History: 376 Added June 21, 2021 (included in dub release 10.1) 377 +/ 378 string[] inlineElements = htmlInlineElements; 379 380 /** 381 Take XMLish data and try to make the DOM tree out of it. 382 383 The goal isn't to be perfect, but to just be good enough to 384 approximate Javascript's behavior. 385 386 If strict, it throws on something that doesn't make sense. 387 (Examples: mismatched tags. It doesn't validate!) 388 If not strict, it tries to recover anyway, and only throws 389 when something is REALLY unworkable. 390 391 If strict is false, it uses a magic list of tags that needn't 392 be closed. If you are writing a document specifically for this, 393 try to avoid such - use self closed tags at least. Easier to parse. 394 395 The dataEncoding argument can be used to pass a specific 396 charset encoding for automatic conversion. If null (which is NOT 397 the default!), it tries to determine from the data itself, 398 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 399 400 If this assumption is wrong, it can throw on non-ascii 401 characters! 402 403 404 Note that it previously assumed the data was encoded as UTF-8, which 405 is why the dataEncoding argument defaults to that. 406 407 So it shouldn't break backward compatibility. 408 409 But, if you want the best behavior on wild data - figuring it out from the document 410 instead of assuming - you'll probably want to change that argument to null. 411 412 This is a template so it lazily imports arsd.characterencodings, which is required 413 to fix up data encodings. 414 415 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 416 dependency. If it is data from the Internet though, a random website, the encoding 417 is often a lie. This function, if dataEncoding == null, can correct for that, or 418 you can try parseGarbage. In those cases, arsd.characterencodings is required to 419 compile. 420 */ 421 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 422 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 423 parseStream(data, caseSensitive, strict); 424 } 425 426 // note: this work best in strict mode, unless data is just a simple string wrapper 427 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 428 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 429 // of my big app. 430 431 assert(data !is null); 432 433 // go through character by character. 434 // if you see a <, consider it a tag. 435 // name goes until the first non tagname character 436 // then see if it self closes or has an attribute 437 438 // if not in a tag, anything not a tag is a big text 439 // node child. It ends as soon as it sees a < 440 441 // Whitespace in text or attributes is preserved, but not between attributes 442 443 // & and friends are converted when I know them, left the same otherwise 444 445 446 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 447 //validate(data); // it *must* be UTF-8 for this to work correctly 448 449 sizediff_t pos = 0; 450 451 clear(); 452 453 loose = !caseSensitive; 454 455 bool sawImproperNesting = false; 456 bool paragraphHackfixRequired = false; 457 458 int getLineNumber(sizediff_t p) { 459 int line = 1; 460 foreach(c; data[0..p]) 461 if(c == '\n') 462 line++; 463 return line; 464 } 465 466 void parseError(string message) { 467 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 468 } 469 470 bool eatWhitespace() { 471 bool ateAny = false; 472 while(pos < data.length && data[pos].isSimpleWhite) { 473 pos++; 474 ateAny = true; 475 } 476 return ateAny; 477 } 478 479 string readTagName() { 480 // remember to include : for namespaces 481 // basically just keep going until >, /, or whitespace 482 auto start = pos; 483 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 484 { 485 pos++; 486 if(pos == data.length) { 487 if(strict) 488 throw new Exception("tag name incomplete when file ended"); 489 else 490 break; 491 } 492 } 493 494 if(!caseSensitive) 495 return toLower(data[start..pos]); 496 else 497 return data[start..pos]; 498 } 499 500 string readAttributeName() { 501 // remember to include : for namespaces 502 // basically just keep going until >, /, or whitespace 503 auto start = pos; 504 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 505 { 506 if(data[pos] == '<') { 507 if(strict) 508 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 509 else 510 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 511 } 512 pos++; 513 if(pos == data.length) { 514 if(strict) 515 throw new Exception("unterminated attribute name"); 516 else 517 break; 518 } 519 } 520 521 if(!caseSensitive) 522 return toLower(data[start..pos]); 523 else 524 return data[start..pos]; 525 } 526 527 string readAttributeValue() { 528 if(pos >= data.length) { 529 if(strict) 530 throw new Exception("no attribute value before end of file"); 531 else 532 return null; 533 } 534 switch(data[pos]) { 535 case '\'': 536 case '"': 537 auto started = pos; 538 char end = data[pos]; 539 pos++; 540 auto start = pos; 541 while(pos < data.length && data[pos] != end) 542 pos++; 543 if(strict && pos == data.length) 544 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 545 string v = htmlEntitiesDecode(data[start..pos], strict); 546 pos++; // skip over the end 547 return v; 548 default: 549 if(strict) 550 parseError("Attributes must be quoted"); 551 // read until whitespace or terminator (/> or >) 552 auto start = pos; 553 while( 554 pos < data.length && 555 data[pos] != '>' && 556 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 557 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 558 !data[pos].isSimpleWhite) 559 pos++; 560 561 string v = htmlEntitiesDecode(data[start..pos], strict); 562 // don't skip the end - we'll need it later 563 return v; 564 } 565 } 566 567 TextNode readTextNode() { 568 auto start = pos; 569 while(pos < data.length && data[pos] != '<') { 570 pos++; 571 } 572 573 return TextNode.fromUndecodedString(this, data[start..pos]); 574 } 575 576 // this is obsolete! 577 RawSource readCDataNode() { 578 auto start = pos; 579 while(pos < data.length && data[pos] != '<') { 580 pos++; 581 } 582 583 return new RawSource(this, data[start..pos]); 584 } 585 586 587 struct Ele { 588 int type; // element or closing tag or nothing 589 /* 590 type == 0 means regular node, self-closed (element is valid) 591 type == 1 means closing tag (payload is the tag name, element may be valid) 592 type == 2 means you should ignore it completely 593 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 594 type == 4 means the document was totally empty 595 */ 596 Element element; // for type == 0 or type == 3 597 string payload; // for type == 1 598 } 599 // recursively read a tag 600 Ele readElement(string[] parentChain = null) { 601 // FIXME: this is the slowest function in this module, by far, even in strict mode. 602 // Loose mode should perform decently, but strict mode is the important one. 603 if(!strict && parentChain is null) 604 parentChain = []; 605 606 static string[] recentAutoClosedTags; 607 608 if(pos >= data.length) 609 { 610 if(strict) { 611 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 612 } else { 613 if(parentChain.length) 614 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 615 else 616 return Ele(4); // signal emptiness upstream 617 } 618 } 619 620 if(data[pos] != '<') { 621 return Ele(0, readTextNode(), null); 622 } 623 624 enforce(data[pos] == '<'); 625 pos++; 626 if(pos == data.length) { 627 if(strict) 628 throw new MarkupException("Found trailing < at end of file"); 629 // if not strict, we'll just skip the switch 630 } else 631 switch(data[pos]) { 632 // I don't care about these, so I just want to skip them 633 case '!': // might be a comment, a doctype, or a special instruction 634 pos++; 635 636 // FIXME: we should store these in the tree too 637 // though I like having it stripped out tbh. 638 639 if(pos == data.length) { 640 if(strict) 641 throw new MarkupException("<! opened at end of file"); 642 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 643 // comment 644 pos += 2; 645 646 // FIXME: technically, a comment is anything 647 // between -- and -- inside a <!> block. 648 // so in <!-- test -- lol> , the " lol" is NOT a comment 649 // and should probably be handled differently in here, but for now 650 // I'll just keep running until --> since that's the common way 651 652 auto commentStart = pos; 653 while(pos+3 < data.length && data[pos..pos+3] != "-->") 654 pos++; 655 656 auto end = commentStart; 657 658 if(pos + 3 >= data.length) { 659 if(strict) 660 throw new MarkupException("unclosed comment"); 661 end = data.length; 662 pos = data.length; 663 } else { 664 end = pos; 665 assert(data[pos] == '-'); 666 pos++; 667 assert(data[pos] == '-'); 668 pos++; 669 assert(data[pos] == '>'); 670 pos++; 671 } 672 673 if(parseSawComment !is null) 674 if(parseSawComment(data[commentStart .. end])) { 675 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 676 } 677 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 678 pos += 7; 679 680 auto cdataStart = pos; 681 682 ptrdiff_t end = -1; 683 typeof(end) cdataEnd; 684 685 if(pos < data.length) { 686 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 687 end = data[pos .. $].indexOf("]]>"); 688 } 689 690 if(end == -1) { 691 if(strict) 692 throw new MarkupException("Unclosed CDATA section"); 693 end = pos; 694 cdataEnd = pos; 695 } else { 696 cdataEnd = pos + end; 697 pos = cdataEnd + 3; 698 } 699 700 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 701 } else { 702 auto start = pos; 703 while(pos < data.length && data[pos] != '>') 704 pos++; 705 706 auto bangEnds = pos; 707 if(pos == data.length) { 708 if(strict) 709 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 710 } else pos++; // skipping the > 711 712 if(parseSawBangInstruction !is null) 713 if(parseSawBangInstruction(data[start .. bangEnds])) { 714 // FIXME: these should be able to modify the parser state, 715 // doing things like adding entities, somehow. 716 717 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 718 } 719 } 720 721 /* 722 if(pos < data.length && data[pos] == '>') 723 pos++; // skip the > 724 else 725 assert(!strict); 726 */ 727 break; 728 case '%': 729 case '?': 730 /* 731 Here's what we want to support: 732 733 <% asp code %> 734 <%= asp code %> 735 <?php php code ?> 736 <?= php code ?> 737 738 The contents don't really matter, just if it opens with 739 one of the above for, it ends on the two char terminator. 740 741 <?something> 742 this is NOT php code 743 because I've seen this in the wild: <?EM-dummyText> 744 745 This could be php with shorttags which would be cut off 746 prematurely because if(a >) - that > counts as the close 747 of the tag, but since dom.d can't tell the difference 748 between that and the <?EM> real world example, it will 749 not try to look for the ?> ending. 750 751 The difference between this and the asp/php stuff is that it 752 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 753 on >. 754 */ 755 756 char end = data[pos]; 757 auto started = pos; 758 bool isAsp = end == '%'; 759 int currentIndex = 0; 760 bool isPhp = false; 761 bool isEqualTag = false; 762 int phpCount = 0; 763 764 more: 765 pos++; // skip the start 766 if(pos == data.length) { 767 if(strict) 768 throw new MarkupException("Unclosed <"~end~" by end of file"); 769 } else { 770 currentIndex++; 771 if(currentIndex == 1 && data[pos] == '=') { 772 if(!isAsp) 773 isPhp = true; 774 isEqualTag = true; 775 goto more; 776 } 777 if(currentIndex == 1 && data[pos] == 'p') 778 phpCount++; 779 if(currentIndex == 2 && data[pos] == 'h') 780 phpCount++; 781 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 782 isPhp = true; 783 784 if(data[pos] == '>') { 785 if((isAsp || isPhp) && data[pos - 1] != end) 786 goto more; 787 // otherwise we're done 788 } else 789 goto more; 790 } 791 792 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 793 auto code = data[started .. pos]; 794 795 796 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 797 if(pos < data.length) 798 pos++; // get past the > 799 800 if(isAsp && parseSawAspCode !is null) { 801 if(parseSawAspCode(code)) { 802 return Ele(3, new AspCode(this, code), null); 803 } 804 } else if(isPhp && parseSawPhpCode !is null) { 805 if(parseSawPhpCode(code)) { 806 return Ele(3, new PhpCode(this, code), null); 807 } 808 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 809 if(parseSawQuestionInstruction(code)) { 810 return Ele(3, new QuestionInstruction(this, code), null); 811 } 812 } 813 break; 814 case '/': // closing an element 815 pos++; // skip the start 816 auto p = pos; 817 while(pos < data.length && data[pos] != '>') 818 pos++; 819 //writefln("</%s>", data[p..pos]); 820 if(pos == data.length && data[pos-1] != '>') { 821 if(strict) 822 throw new MarkupException("File ended before closing tag had a required >"); 823 else 824 data ~= ">"; // just hack it in 825 } 826 pos++; // skip the '>' 827 828 string tname = data[p..pos-1]; 829 if(!caseSensitive) 830 tname = tname.toLower(); 831 832 return Ele(1, null, tname); // closing tag reports itself here 833 case ' ': // assume it isn't a real element... 834 if(strict) { 835 parseError("bad markup - improperly placed <"); 836 assert(0); // parseError always throws 837 } else 838 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 839 default: 840 841 if(!strict) { 842 // what about something that kinda looks like a tag, but isn't? 843 auto nextTag = data[pos .. $].indexOf("<"); 844 auto closeTag = data[pos .. $].indexOf(">"); 845 if(closeTag != -1 && nextTag != -1) 846 if(nextTag < closeTag) { 847 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 848 849 auto equal = data[pos .. $].indexOf("=\""); 850 if(equal != -1 && equal < closeTag) { 851 // this MIGHT be ok, soldier on 852 } else { 853 // definitely no good, this must be a (horribly distorted) text node 854 pos++; // skip the < we're on - don't want text node to end prematurely 855 auto node = readTextNode(); 856 node.contents = "<" ~ node.contents; // put this back 857 return Ele(0, node, null); 858 } 859 } 860 } 861 862 string tagName = readTagName(); 863 string[string] attributes; 864 865 Ele addTag(bool selfClosed) { 866 if(selfClosed) 867 pos++; 868 else { 869 if(!strict) 870 if(tagName.isInArray(selfClosedElements)) 871 // these are de-facto self closed 872 selfClosed = true; 873 } 874 875 import std.algorithm.comparison; 876 877 if(strict) { 878 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 879 } else { 880 // if we got here, it's probably because a slash was in an 881 // unquoted attribute - don't trust the selfClosed value 882 if(!selfClosed) 883 selfClosed = tagName.isInArray(selfClosedElements); 884 885 while(pos < data.length && data[pos] != '>') 886 pos++; 887 888 if(pos >= data.length) { 889 // the tag never closed 890 assert(data.length != 0); 891 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 892 } 893 } 894 895 auto whereThisTagStarted = pos; // for better error messages 896 897 pos++; 898 899 auto e = createElement(tagName); 900 e.attributes = attributes; 901 version(dom_node_indexes) { 902 if(e.dataset.nodeIndex.length == 0) 903 e.dataset.nodeIndex = to!string(&(e.attributes)); 904 } 905 e.selfClosed = selfClosed; 906 e.parseAttributes(); 907 908 909 // HACK to handle script and style as a raw data section as it is in HTML browsers 910 if(tagName == "script" || tagName == "style") { 911 if(!selfClosed) { 912 string closer = "</" ~ tagName ~ ">"; 913 ptrdiff_t ending; 914 if(pos >= data.length) 915 ending = -1; 916 else 917 ending = indexOf(data[pos..$], closer); 918 919 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 920 /* 921 if(loose && ending == -1 && pos < data.length) 922 ending = indexOf(data[pos..$], closer.toUpper()); 923 */ 924 if(ending == -1) { 925 if(strict) 926 throw new Exception("tag " ~ tagName ~ " never closed"); 927 else { 928 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 929 if(pos < data.length) { 930 e = new TextNode(this, data[pos .. $]); 931 pos = data.length; 932 } 933 } 934 } else { 935 ending += pos; 936 e.innerRawSource = data[pos..ending]; 937 pos = ending + closer.length; 938 } 939 } 940 return Ele(0, e, null); 941 } 942 943 bool closed = selfClosed; 944 945 void considerHtmlParagraphHack(Element n) { 946 assert(!strict); 947 if(e.tagName == "p" && e.tagName == n.tagName) { 948 // html lets you write <p> para 1 <p> para 1 949 // but in the dom tree, they should be siblings, not children. 950 paragraphHackfixRequired = true; 951 } 952 } 953 954 //writef("<%s>", tagName); 955 while(!closed) { 956 Ele n; 957 if(strict) 958 n = readElement(); 959 else 960 n = readElement(parentChain ~ tagName); 961 962 if(n.type == 4) return n; // the document is empty 963 964 if(n.type == 3 && n.element !is null) { 965 // special node, append if possible 966 if(e !is null) 967 e.appendChild(n.element); 968 else 969 piecesBeforeRoot ~= n.element; 970 } else if(n.type == 0) { 971 if(!strict) 972 considerHtmlParagraphHack(n.element); 973 e.appendChild(n.element); 974 } else if(n.type == 1) { 975 bool found = false; 976 if(n.payload != tagName) { 977 if(strict) 978 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 979 else { 980 sawImproperNesting = true; 981 // this is so we don't drop several levels of awful markup 982 if(n.element) { 983 if(!strict) 984 considerHtmlParagraphHack(n.element); 985 e.appendChild(n.element); 986 n.element = null; 987 } 988 989 // is the element open somewhere up the chain? 990 foreach(i, parent; parentChain) 991 if(parent == n.payload) { 992 recentAutoClosedTags ~= tagName; 993 // just rotating it so we don't inadvertently break stuff with vile crap 994 if(recentAutoClosedTags.length > 4) 995 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 996 997 n.element = e; 998 return n; 999 } 1000 1001 // if not, this is a text node; we can't fix it up... 1002 1003 // If it's already in the tree somewhere, assume it is closed by algorithm 1004 // and we shouldn't output it - odds are the user just flipped a couple tags 1005 foreach(ele; e.tree) { 1006 if(ele.tagName == n.payload) { 1007 found = true; 1008 break; 1009 } 1010 } 1011 1012 foreach(ele; recentAutoClosedTags) { 1013 if(ele == n.payload) { 1014 found = true; 1015 break; 1016 } 1017 } 1018 1019 if(!found) // if not found in the tree though, it's probably just text 1020 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1021 } 1022 } else { 1023 if(n.element) { 1024 if(!strict) 1025 considerHtmlParagraphHack(n.element); 1026 e.appendChild(n.element); 1027 } 1028 } 1029 1030 if(n.payload == tagName) // in strict mode, this is always true 1031 closed = true; 1032 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1033 } 1034 //writef("</%s>\n", tagName); 1035 return Ele(0, e, null); 1036 } 1037 1038 // if a tag was opened but not closed by end of file, we can arrive here 1039 if(!strict && pos >= data.length) 1040 return addTag(false); 1041 //else if(strict) assert(0); // should be caught before 1042 1043 switch(data[pos]) { 1044 default: assert(0); 1045 case '/': // self closing tag 1046 return addTag(true); 1047 case '>': 1048 return addTag(false); 1049 case ' ': 1050 case '\t': 1051 case '\n': 1052 case '\r': 1053 // there might be attributes... 1054 moreAttributes: 1055 eatWhitespace(); 1056 1057 // same deal as above the switch.... 1058 if(!strict && pos >= data.length) 1059 return addTag(false); 1060 1061 if(strict && pos >= data.length) 1062 throw new MarkupException("tag open, didn't find > before end of file"); 1063 1064 switch(data[pos]) { 1065 case '/': // self closing tag 1066 return addTag(true); 1067 case '>': // closed tag; open -- we now read the contents 1068 return addTag(false); 1069 default: // it is an attribute 1070 string attrName = readAttributeName(); 1071 string attrValue = attrName; 1072 1073 bool ateAny = eatWhitespace(); 1074 if(strict && ateAny) 1075 throw new MarkupException("inappropriate whitespace after attribute name"); 1076 1077 if(pos >= data.length) { 1078 if(strict) 1079 assert(0, "this should have thrown in readAttributeName"); 1080 else { 1081 data ~= ">"; 1082 goto blankValue; 1083 } 1084 } 1085 if(data[pos] == '=') { 1086 pos++; 1087 1088 ateAny = eatWhitespace(); 1089 // the spec actually allows this! 1090 //if(strict && ateAny) 1091 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1092 1093 attrValue = readAttributeValue(); 1094 1095 eatWhitespace(); 1096 } 1097 1098 blankValue: 1099 1100 if(strict && attrName in attributes) 1101 throw new MarkupException("Repeated attribute: " ~ attrName); 1102 1103 if(attrName.strip().length) 1104 attributes[attrName] = attrValue; 1105 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1106 1107 if(!strict && pos < data.length && data[pos] == '<') { 1108 // this is the broken tag that doesn't have a > at the end 1109 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1110 // let's insert one as a hack 1111 goto case '>'; 1112 } 1113 1114 goto moreAttributes; 1115 } 1116 } 1117 } 1118 1119 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1120 //assert(0); 1121 } 1122 1123 eatWhitespace(); 1124 Ele r; 1125 do { 1126 r = readElement(); // there SHOULD only be one element... 1127 1128 if(r.type == 3 && r.element !is null) 1129 piecesBeforeRoot ~= r.element; 1130 1131 if(r.type == 4) 1132 break; // the document is completely empty... 1133 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1134 1135 root = r.element; 1136 root.parent_ = this; 1137 1138 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1139 while(r.type != 4) { 1140 r = readElement(); 1141 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1142 if(r.element !is null) 1143 piecesAfterRoot ~= r.element; 1144 } 1145 } 1146 1147 if(root is null) 1148 { 1149 if(strict) 1150 assert(0, "empty document should be impossible in strict mode"); 1151 else 1152 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1153 } 1154 1155 if(paragraphHackfixRequired) { 1156 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1157 1158 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1159 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1160 1161 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1162 // Kind of inefficient because we can't detect when we recurse back out of a node. 1163 Element[Element] insertLocations; 1164 auto iterator = root.tree; 1165 foreach(ele; iterator) { 1166 if(ele.parentNode is null) 1167 continue; 1168 1169 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1170 auto shouldBePreviousSibling = ele.parentNode; 1171 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1172 if (auto p = holder in insertLocations) { 1173 shouldBePreviousSibling = *p; 1174 assert(shouldBePreviousSibling.parentNode is holder); 1175 } 1176 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1177 insertLocations[holder] = ele; 1178 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1179 } 1180 } 1181 } 1182 } 1183 1184 /* end massive parse function */ 1185 1186 /// Gets the <title> element's innerText, if one exists 1187 @property string title() { 1188 bool doesItMatch(Element e) { 1189 return (e.tagName == "title"); 1190 } 1191 1192 auto e = findFirst(&doesItMatch); 1193 if(e) 1194 return e.innerText(); 1195 return ""; 1196 } 1197 1198 /// Sets the title of the page, creating a <title> element if needed. 1199 @property void title(string t) { 1200 bool doesItMatch(Element e) { 1201 return (e.tagName == "title"); 1202 } 1203 1204 auto e = findFirst(&doesItMatch); 1205 1206 if(!e) { 1207 e = createElement("title"); 1208 auto heads = getElementsByTagName("head"); 1209 if(heads.length) 1210 heads[0].appendChild(e); 1211 } 1212 1213 if(e) 1214 e.innerText = t; 1215 } 1216 1217 // FIXME: would it work to alias root this; ???? might be a good idea 1218 /// These functions all forward to the root element. See the documentation in the Element class. 1219 Element getElementById(string id) { 1220 return root.getElementById(id); 1221 } 1222 1223 /// ditto 1224 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1225 if( is(SomeElementType : Element)) 1226 out(ret) { assert(ret !is null); } 1227 do { 1228 return root.requireElementById!(SomeElementType)(id, file, line); 1229 } 1230 1231 /// ditto 1232 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1233 if( is(SomeElementType : Element)) 1234 out(ret) { assert(ret !is null); } 1235 do { 1236 auto e = cast(SomeElementType) querySelector(selector); 1237 if(e is null) 1238 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1239 return e; 1240 } 1241 1242 /// ditto 1243 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1244 if(is(SomeElementType : Element)) 1245 { 1246 auto e = cast(SomeElementType) querySelector(selector); 1247 return MaybeNullElement!SomeElementType(e); 1248 } 1249 1250 /// ditto 1251 @scriptable 1252 Element querySelector(string selector) { 1253 // see comment below on Document.querySelectorAll 1254 auto s = Selector(selector);//, !loose); 1255 foreach(ref comp; s.components) 1256 if(comp.parts.length && comp.parts[0].separation == 0) 1257 comp.parts[0].separation = -1; 1258 foreach(e; s.getMatchingElementsLazy(this.root)) 1259 return e; 1260 return null; 1261 1262 } 1263 1264 /// ditto 1265 @scriptable 1266 Element[] querySelectorAll(string selector) { 1267 // In standards-compliant code, the document is slightly magical 1268 // in that it is a pseudoelement at top level. It should actually 1269 // match the root as one of its children. 1270 // 1271 // In versions of dom.d before Dec 29 2019, this worked because 1272 // querySelectorAll was willing to return itself. With that bug fix 1273 // (search "arbitrary id asduiwh" in this file for associated unittest) 1274 // this would have failed. Hence adding back the root if it matches the 1275 // selector itself. 1276 // 1277 // I'd love to do this better later. 1278 1279 auto s = Selector(selector);//, !loose); 1280 foreach(ref comp; s.components) 1281 if(comp.parts.length && comp.parts[0].separation == 0) 1282 comp.parts[0].separation = -1; 1283 return s.getMatchingElements(this.root); 1284 } 1285 1286 /// ditto 1287 deprecated("use querySelectorAll instead") 1288 Element[] getElementsBySelector(string selector) { 1289 return root.getElementsBySelector(selector); 1290 } 1291 1292 /// ditto 1293 @scriptable 1294 Element[] getElementsByTagName(string tag) { 1295 return root.getElementsByTagName(tag); 1296 } 1297 1298 /// ditto 1299 @scriptable 1300 Element[] getElementsByClassName(string tag) { 1301 return root.getElementsByClassName(tag); 1302 } 1303 1304 /** FIXME: btw, this could just be a lazy range...... */ 1305 Element getFirstElementByTagName(string tag) { 1306 if(loose) 1307 tag = tag.toLower(); 1308 bool doesItMatch(Element e) { 1309 return e.tagName == tag; 1310 } 1311 return findFirst(&doesItMatch); 1312 } 1313 1314 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1315 Element mainBody() { 1316 return getFirstElementByTagName("body"); 1317 } 1318 1319 /// this uses a weird thing... it's [name=] if no colon and 1320 /// [property=] if colon 1321 string getMeta(string name) { 1322 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1323 auto e = querySelector("head meta["~thing~"="~name~"]"); 1324 if(e is null) 1325 return null; 1326 return e.content; 1327 } 1328 1329 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1330 void setMeta(string name, string value) { 1331 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1332 auto e = querySelector("head meta["~thing~"="~name~"]"); 1333 if(e is null) { 1334 e = requireSelector("head").addChild("meta"); 1335 e.setAttribute(thing, name); 1336 } 1337 1338 e.content = value; 1339 } 1340 1341 ///. 1342 Form[] forms() { 1343 return cast(Form[]) getElementsByTagName("form"); 1344 } 1345 1346 ///. 1347 Form createForm() 1348 out(ret) { 1349 assert(ret !is null); 1350 } 1351 do { 1352 return cast(Form) createElement("form"); 1353 } 1354 1355 ///. 1356 Element createElement(string name) { 1357 if(loose) 1358 name = name.toLower(); 1359 1360 auto e = Element.make(name, null, null, selfClosedElements); 1361 1362 return e; 1363 1364 // return new Element(this, name, null, selfClosed); 1365 } 1366 1367 ///. 1368 Element createFragment() { 1369 return new DocumentFragment(this); 1370 } 1371 1372 ///. 1373 Element createTextNode(string content) { 1374 return new TextNode(this, content); 1375 } 1376 1377 1378 ///. 1379 Element findFirst(bool delegate(Element) doesItMatch) { 1380 if(root is null) 1381 return null; 1382 Element result; 1383 1384 bool goThroughElement(Element e) { 1385 if(doesItMatch(e)) { 1386 result = e; 1387 return true; 1388 } 1389 1390 foreach(child; e.children) { 1391 if(goThroughElement(child)) 1392 return true; 1393 } 1394 1395 return false; 1396 } 1397 1398 goThroughElement(root); 1399 1400 return result; 1401 } 1402 1403 ///. 1404 void clear() { 1405 root = null; 1406 loose = false; 1407 } 1408 1409 ///. 1410 void setProlog(string d) { 1411 _prolog = d; 1412 prologWasSet = true; 1413 } 1414 1415 ///. 1416 private string _prolog = "<!DOCTYPE html>\n"; 1417 private bool prologWasSet = false; // set to true if the user changed it 1418 1419 @property string prolog() const { 1420 // if the user explicitly changed it, do what they want 1421 // or if we didn't keep/find stuff from the document itself, 1422 // we'll use the builtin one as a default. 1423 if(prologWasSet || piecesBeforeRoot.length == 0) 1424 return _prolog; 1425 1426 string p; 1427 foreach(e; piecesBeforeRoot) 1428 p ~= e.toString() ~ "\n"; 1429 return p; 1430 } 1431 1432 ///. 1433 override string toString() const { 1434 return prolog ~ root.toString(); 1435 } 1436 1437 /++ 1438 Writes it out with whitespace for easier eyeball debugging 1439 1440 Do NOT use for anything other than eyeball debugging, 1441 because whitespace may be significant content in XML. 1442 +/ 1443 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1444 import std.string; 1445 string s = prolog.strip; 1446 1447 /* 1448 if(insertComments) s ~= "<!--"; 1449 s ~= "\n"; 1450 if(insertComments) s ~= "-->"; 1451 */ 1452 1453 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1454 foreach(a; piecesAfterRoot) 1455 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1456 return s; 1457 } 1458 1459 ///. 1460 Element root; 1461 1462 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1463 Element[] piecesBeforeRoot; 1464 1465 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1466 Element[] piecesAfterRoot; 1467 1468 ///. 1469 bool loose; 1470 1471 1472 1473 // what follows are for mutation events that you can observe 1474 void delegate(DomMutationEvent)[] eventObservers; 1475 1476 void dispatchMutationEvent(DomMutationEvent e) { 1477 foreach(o; eventObservers) 1478 o(e); 1479 } 1480 } 1481 1482 interface DomParent { 1483 inout(Document) asDocument() inout; 1484 inout(Element) asElement() inout; 1485 } 1486 1487 /// This represents almost everything in the DOM. 1488 /// Group: core_functionality 1489 class Element : DomParent { 1490 inout(Document) asDocument() inout { return null; } 1491 inout(Element) asElement() inout { return this; } 1492 1493 /// Returns a collection of elements by selector. 1494 /// See: [Document.opIndex] 1495 ElementCollection opIndex(string selector) { 1496 auto e = ElementCollection(this); 1497 return e[selector]; 1498 } 1499 1500 /++ 1501 Returns the child node with the particular index. 1502 1503 Be aware that child nodes include text nodes, including 1504 whitespace-only nodes. 1505 +/ 1506 Element opIndex(size_t index) { 1507 if(index >= children.length) 1508 return null; 1509 return this.children[index]; 1510 } 1511 1512 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1513 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1514 if( 1515 is(SomeElementType : Element) 1516 ) 1517 out(ret) { 1518 assert(ret !is null); 1519 } 1520 do { 1521 auto e = cast(SomeElementType) getElementById(id); 1522 if(e is null) 1523 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1524 return e; 1525 } 1526 1527 /// ditto but with selectors instead of ids 1528 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1529 if( 1530 is(SomeElementType : Element) 1531 ) 1532 out(ret) { 1533 assert(ret !is null); 1534 } 1535 do { 1536 auto e = cast(SomeElementType) querySelector(selector); 1537 if(e is null) 1538 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1539 return e; 1540 } 1541 1542 1543 /++ 1544 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1545 +/ 1546 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1547 if(is(SomeElementType : Element)) 1548 { 1549 auto e = cast(SomeElementType) querySelector(selector); 1550 return MaybeNullElement!SomeElementType(e); 1551 } 1552 1553 1554 1555 /// get all the classes on this element 1556 @property string[] classes() { 1557 return split(className, " "); 1558 } 1559 1560 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1561 @scriptable 1562 Element addClass(string c) { 1563 if(hasClass(c)) 1564 return this; // don't add it twice 1565 1566 string cn = getAttribute("class"); 1567 if(cn.length == 0) { 1568 setAttribute("class", c); 1569 return this; 1570 } else { 1571 setAttribute("class", cn ~ " " ~ c); 1572 } 1573 1574 return this; 1575 } 1576 1577 /// Removes a particular class name. 1578 @scriptable 1579 Element removeClass(string c) { 1580 if(!hasClass(c)) 1581 return this; 1582 string n; 1583 foreach(name; classes) { 1584 if(c == name) 1585 continue; // cut it out 1586 if(n.length) 1587 n ~= " "; 1588 n ~= name; 1589 } 1590 1591 className = n.strip(); 1592 1593 return this; 1594 } 1595 1596 /// Returns whether the given class appears in this element. 1597 bool hasClass(string c) { 1598 string cn = className; 1599 1600 auto idx = cn.indexOf(c); 1601 if(idx == -1) 1602 return false; 1603 1604 foreach(cla; cn.split(" ")) 1605 if(cla == c) 1606 return true; 1607 return false; 1608 1609 /* 1610 int rightSide = idx + c.length; 1611 1612 bool checkRight() { 1613 if(rightSide == cn.length) 1614 return true; // it's the only class 1615 else if(iswhite(cn[rightSide])) 1616 return true; 1617 return false; // this is a substring of something else.. 1618 } 1619 1620 if(idx == 0) { 1621 return checkRight(); 1622 } else { 1623 if(!iswhite(cn[idx - 1])) 1624 return false; // substring 1625 return checkRight(); 1626 } 1627 1628 assert(0); 1629 */ 1630 } 1631 1632 1633 /* ******************************* 1634 DOM Mutation 1635 *********************************/ 1636 /// convenience function to quickly add a tag with some text or 1637 /// other relevant info (for example, it's a src for an <img> element 1638 /// instead of inner text) 1639 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1640 in { 1641 assert(tagName !is null); 1642 } 1643 out(e) { 1644 //assert(e.parentNode is this); 1645 //assert(e.parentDocument is this.parentDocument); 1646 } 1647 do { 1648 auto e = Element.make(tagName, childInfo, childInfo2); 1649 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1650 // return the parent. That will break existing code though. 1651 return appendChild(e); 1652 } 1653 1654 /// Another convenience function. Adds a child directly after the current one, returning 1655 /// the new child. 1656 /// 1657 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1658 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1659 in { 1660 assert(tagName !is null); 1661 assert(parentNode !is null); 1662 } 1663 out(e) { 1664 assert(e.parentNode is this.parentNode); 1665 assert(e.parentDocument is this.parentDocument); 1666 } 1667 do { 1668 auto e = Element.make(tagName, childInfo, childInfo2); 1669 return parentNode.insertAfter(this, e); 1670 } 1671 1672 /// 1673 Element addSibling(Element e) { 1674 return parentNode.insertAfter(this, e); 1675 } 1676 1677 /// 1678 Element addChild(Element e) { 1679 return this.appendChild(e); 1680 } 1681 1682 /// Convenience function to append text intermixed with other children. 1683 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1684 /// or div.addChildren("Hello, ", user.name, "!"); 1685 1686 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1687 void addChildren(T...)(T t) { 1688 foreach(item; t) { 1689 static if(is(item : Element)) 1690 appendChild(item); 1691 else static if (is(isSomeString!(item))) 1692 appendText(to!string(item)); 1693 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1694 } 1695 } 1696 1697 ///. 1698 Element addChild(string tagName, Element firstChild, string info2 = null) 1699 in { 1700 assert(firstChild !is null); 1701 } 1702 out(ret) { 1703 assert(ret !is null); 1704 assert(ret.parentNode is this); 1705 assert(firstChild.parentNode is ret); 1706 1707 assert(ret.parentDocument is this.parentDocument); 1708 //assert(firstChild.parentDocument is this.parentDocument); 1709 } 1710 do { 1711 auto e = Element.make(tagName, "", info2); 1712 e.appendChild(firstChild); 1713 this.appendChild(e); 1714 return e; 1715 } 1716 1717 /// 1718 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1719 in { 1720 } 1721 out(ret) { 1722 assert(ret !is null); 1723 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1724 assert(ret.parentDocument is this.parentDocument); 1725 } 1726 do { 1727 auto e = Element.make(tagName, "", info2); 1728 this.appendChild(e); 1729 e.innerHTML = innerHtml.source; 1730 return e; 1731 } 1732 1733 1734 /// . 1735 void appendChildren(Element[] children) { 1736 foreach(ele; children) 1737 appendChild(ele); 1738 } 1739 1740 ///. 1741 void reparent(Element newParent) 1742 in { 1743 assert(newParent !is null); 1744 assert(parentNode !is null); 1745 } 1746 out { 1747 assert(this.parentNode is newParent); 1748 //assert(isInArray(this, newParent.children)); 1749 } 1750 do { 1751 parentNode.removeChild(this); 1752 newParent.appendChild(this); 1753 } 1754 1755 /** 1756 Strips this tag out of the document, putting its inner html 1757 as children of the parent. 1758 1759 For example, given: `<p>hello <b>there</b></p>`, if you 1760 call `stripOut` on the `b` element, you'll be left with 1761 `<p>hello there<p>`. 1762 1763 The idea here is to make it easy to get rid of garbage 1764 markup you aren't interested in. 1765 */ 1766 void stripOut() 1767 in { 1768 assert(parentNode !is null); 1769 } 1770 out { 1771 assert(parentNode is null); 1772 assert(children.length == 0); 1773 } 1774 do { 1775 foreach(c; children) 1776 c.parentNode = null; // remove the parent 1777 if(children.length) 1778 parentNode.replaceChild(this, this.children); 1779 else 1780 parentNode.removeChild(this); 1781 this.children.length = 0; // we reparented them all above 1782 } 1783 1784 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1785 /// if the element already isn't in a tree, it does nothing. 1786 Element removeFromTree() 1787 in { 1788 1789 } 1790 out(var) { 1791 assert(this.parentNode is null); 1792 assert(var is this); 1793 } 1794 do { 1795 if(this.parentNode is null) 1796 return this; 1797 1798 this.parentNode.removeChild(this); 1799 1800 return this; 1801 } 1802 1803 /++ 1804 Wraps this element inside the given element. 1805 It's like `this.replaceWith(what); what.appendchild(this);` 1806 1807 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1808 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1809 +/ 1810 Element wrapIn(Element what) 1811 in { 1812 assert(what !is null); 1813 } 1814 out(ret) { 1815 assert(this.parentNode is what); 1816 assert(ret is what); 1817 } 1818 do { 1819 this.replaceWith(what); 1820 what.appendChild(this); 1821 1822 return what; 1823 } 1824 1825 /// Replaces this element with something else in the tree. 1826 Element replaceWith(Element e) 1827 in { 1828 assert(this.parentNode !is null); 1829 } 1830 do { 1831 e.removeFromTree(); 1832 this.parentNode.replaceChild(this, e); 1833 return e; 1834 } 1835 1836 /** 1837 Splits the className into an array of each class given 1838 */ 1839 string[] classNames() const { 1840 return className().split(" "); 1841 } 1842 1843 /** 1844 Fetches the first consecutive text nodes concatenated together. 1845 1846 1847 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1848 1849 See_also: [directText], [innerText] 1850 */ 1851 string firstInnerText() const { 1852 string s; 1853 foreach(child; children) { 1854 if(child.nodeType != NodeType.Text) 1855 break; 1856 1857 s ~= child.nodeValue(); 1858 } 1859 return s; 1860 } 1861 1862 1863 /** 1864 Returns the text directly under this element. 1865 1866 1867 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1868 past child tags. So, `<example>some <b>bold</b> text</example>` 1869 will return `some text` because it only gets the text, skipping non-text children. 1870 1871 See_also: [firstInnerText], [innerText] 1872 */ 1873 @property string directText() { 1874 string ret; 1875 foreach(e; children) { 1876 if(e.nodeType == NodeType.Text) 1877 ret ~= e.nodeValue(); 1878 } 1879 1880 return ret; 1881 } 1882 1883 /** 1884 Sets the direct text, without modifying other child nodes. 1885 1886 1887 Unlike [innerText], this does *not* remove existing elements in the element. 1888 1889 It only replaces the first text node it sees. 1890 1891 If there are no text nodes, it calls [appendText]. 1892 1893 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1894 */ 1895 @property void directText(string text) { 1896 foreach(e; children) { 1897 if(e.nodeType == NodeType.Text) { 1898 auto it = cast(TextNode) e; 1899 it.contents = text; 1900 return; 1901 } 1902 } 1903 1904 appendText(text); 1905 } 1906 1907 // do nothing, this is primarily a virtual hook 1908 // for links and forms 1909 void setValue(string field, string value) { } 1910 1911 1912 // this is a thing so i can remove observer support if it gets slow 1913 // I have not implemented all these yet 1914 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1915 if(parentDocument is null) return; 1916 DomMutationEvent me; 1917 me.operation = operation; 1918 me.target = this; 1919 me.relatedString = s1; 1920 me.relatedString2 = s2; 1921 me.related = r; 1922 me.related2 = r2; 1923 parentDocument.dispatchMutationEvent(me); 1924 } 1925 1926 // putting all the members up front 1927 1928 // this ought to be private. don't use it directly. 1929 Element[] children; 1930 1931 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1932 string tagName; 1933 1934 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1935 string[string] attributes; 1936 1937 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1938 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1939 private bool selfClosed; 1940 1941 private DomParent parent_; 1942 1943 /// Get the parent Document object that contains this element. 1944 /// It may be null, so remember to check for that. 1945 @property inout(Document) parentDocument() inout { 1946 if(this.parent_ is null) 1947 return null; 1948 auto p = cast() this.parent_.asElement; 1949 auto prev = cast() this; 1950 while(p) { 1951 prev = p; 1952 if(p.parent_ is null) 1953 return null; 1954 p = cast() p.parent_.asElement; 1955 } 1956 return cast(inout) prev.parent_.asDocument; 1957 } 1958 1959 deprecated @property void parentDocument(Document doc) { 1960 parent_ = doc; 1961 } 1962 1963 ///. 1964 inout(Element) parentNode() inout { 1965 if(parent_ is null) 1966 return null; 1967 1968 auto p = parent_.asElement; 1969 1970 if(cast(DocumentFragment) p) 1971 return p.parent_.asElement; 1972 1973 return p; 1974 } 1975 1976 //protected 1977 Element parentNode(Element e) { 1978 parent_ = e; 1979 return e; 1980 } 1981 1982 // these are here for event handlers. Don't forget that this library never fires events. 1983 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1984 1985 version(dom_with_events) { 1986 EventHandler[][string] bubblingEventHandlers; 1987 EventHandler[][string] capturingEventHandlers; 1988 EventHandler[string] defaultEventHandlers; 1989 1990 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1991 if(event.length > 2 && event[0..2] == "on") 1992 event = event[2 .. $]; 1993 1994 if(useCapture) 1995 capturingEventHandlers[event] ~= handler; 1996 else 1997 bubblingEventHandlers[event] ~= handler; 1998 } 1999 } 2000 2001 2002 // and now methods 2003 2004 /++ 2005 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2006 2007 History: 2008 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2009 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2010 the parameter. 2011 +/ 2012 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2013 bool selfClosed = tagName.isInArray(selfClosedElements); 2014 2015 Element e; 2016 // want to create the right kind of object for the given tag... 2017 switch(tagName) { 2018 case "#text": 2019 e = new TextNode(null, childInfo); 2020 return e; 2021 // break; 2022 case "table": 2023 e = new Table(null); 2024 break; 2025 case "a": 2026 e = new Link(null); 2027 break; 2028 case "form": 2029 e = new Form(null); 2030 break; 2031 case "tr": 2032 e = new TableRow(null); 2033 break; 2034 case "td", "th": 2035 e = new TableCell(null, tagName); 2036 break; 2037 default: 2038 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2039 } 2040 2041 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2042 e.tagName = tagName; 2043 e.selfClosed = selfClosed; 2044 2045 if(childInfo !is null) 2046 switch(tagName) { 2047 /* html5 convenience tags */ 2048 case "audio": 2049 if(childInfo.length) 2050 e.addChild("source", childInfo); 2051 if(childInfo2 !is null) 2052 e.appendText(childInfo2); 2053 break; 2054 case "source": 2055 e.src = childInfo; 2056 if(childInfo2 !is null) 2057 e.type = childInfo2; 2058 break; 2059 /* regular html 4 stuff */ 2060 case "img": 2061 e.src = childInfo; 2062 if(childInfo2 !is null) 2063 e.alt = childInfo2; 2064 break; 2065 case "link": 2066 e.href = childInfo; 2067 if(childInfo2 !is null) 2068 e.rel = childInfo2; 2069 break; 2070 case "option": 2071 e.innerText = childInfo; 2072 if(childInfo2 !is null) 2073 e.value = childInfo2; 2074 break; 2075 case "input": 2076 e.type = "hidden"; 2077 e.name = childInfo; 2078 if(childInfo2 !is null) 2079 e.value = childInfo2; 2080 break; 2081 case "button": 2082 e.innerText = childInfo; 2083 if(childInfo2 !is null) 2084 e.type = childInfo2; 2085 break; 2086 case "a": 2087 e.innerText = childInfo; 2088 if(childInfo2 !is null) 2089 e.href = childInfo2; 2090 break; 2091 case "script": 2092 case "style": 2093 e.innerRawSource = childInfo; 2094 break; 2095 case "meta": 2096 e.name = childInfo; 2097 if(childInfo2 !is null) 2098 e.content = childInfo2; 2099 break; 2100 /* generically, assume we were passed text and perhaps class */ 2101 default: 2102 e.innerText = childInfo; 2103 if(childInfo2.length) 2104 e.className = childInfo2; 2105 } 2106 2107 return e; 2108 } 2109 2110 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2111 // FIXME: childInfo2 is ignored when info1 is null 2112 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2113 m.innerHTML = innerHtml.source; 2114 return m; 2115 } 2116 2117 static Element make(string tagName, Element child, string childInfo2 = null) { 2118 auto m = Element.make(tagName, cast(string) null, childInfo2); 2119 m.appendChild(child); 2120 return m; 2121 } 2122 2123 2124 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2125 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2126 tagName = _tagName; 2127 if(_attributes !is null) 2128 attributes = _attributes; 2129 selfClosed = _selfClosed; 2130 2131 version(dom_node_indexes) 2132 this.dataset.nodeIndex = to!string(&(this.attributes)); 2133 2134 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2135 } 2136 2137 /++ 2138 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2139 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2140 2141 History: 2142 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2143 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2144 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2145 something here if you like. 2146 +/ 2147 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2148 tagName = _tagName; 2149 if(_attributes !is null) 2150 attributes = _attributes; 2151 selfClosed = tagName.isInArray(selfClosedElements); 2152 2153 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2154 //children.length = 8; 2155 //children.length = 0; 2156 2157 version(dom_node_indexes) 2158 this.dataset.nodeIndex = to!string(&(this.attributes)); 2159 } 2160 2161 private this(Document _parentDocument) { 2162 version(dom_node_indexes) 2163 this.dataset.nodeIndex = to!string(&(this.attributes)); 2164 } 2165 2166 2167 /* ******************************* 2168 Navigating the DOM 2169 *********************************/ 2170 2171 /// Returns the first child of this element. If it has no children, returns null. 2172 /// Remember, text nodes are children too. 2173 @property Element firstChild() { 2174 return children.length ? children[0] : null; 2175 } 2176 2177 /// 2178 @property Element lastChild() { 2179 return children.length ? children[$ - 1] : null; 2180 } 2181 2182 /// UNTESTED 2183 /// the next element you would encounter if you were reading it in the source 2184 Element nextInSource() { 2185 auto n = firstChild; 2186 if(n is null) 2187 n = nextSibling(); 2188 if(n is null) { 2189 auto p = this.parentNode; 2190 while(p !is null && n is null) { 2191 n = p.nextSibling; 2192 } 2193 } 2194 2195 return n; 2196 } 2197 2198 /// UNTESTED 2199 /// ditto 2200 Element previousInSource() { 2201 auto p = previousSibling; 2202 if(p is null) { 2203 auto par = parentNode; 2204 if(par) 2205 p = par.lastChild; 2206 if(p is null) 2207 p = par; 2208 } 2209 return p; 2210 } 2211 2212 ///. 2213 @property Element previousElementSibling() { 2214 return previousSibling("*"); 2215 } 2216 2217 ///. 2218 @property Element previousSibling(string tagName = null) { 2219 if(this.parentNode is null) 2220 return null; 2221 Element ps = null; 2222 foreach(e; this.parentNode.childNodes) { 2223 if(e is this) 2224 break; 2225 if(tagName == "*" && e.nodeType != NodeType.Text) { 2226 ps = e; 2227 } else if(tagName is null || e.tagName == tagName) 2228 ps = e; 2229 } 2230 2231 return ps; 2232 } 2233 2234 ///. 2235 @property Element nextElementSibling() { 2236 return nextSibling("*"); 2237 } 2238 2239 ///. 2240 @property Element nextSibling(string tagName = null) { 2241 if(this.parentNode is null) 2242 return null; 2243 Element ns = null; 2244 bool mightBe = false; 2245 foreach(e; this.parentNode.childNodes) { 2246 if(e is this) { 2247 mightBe = true; 2248 continue; 2249 } 2250 if(mightBe) { 2251 if(tagName == "*" && e.nodeType != NodeType.Text) { 2252 ns = e; 2253 break; 2254 } 2255 if(tagName is null || e.tagName == tagName) { 2256 ns = e; 2257 break; 2258 } 2259 } 2260 } 2261 2262 return ns; 2263 } 2264 2265 2266 /// Gets the nearest node, going up the chain, with the given tagName 2267 /// May return null or throw. 2268 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2269 if(tagName is null) { 2270 static if(is(T == Form)) 2271 tagName = "form"; 2272 else static if(is(T == Table)) 2273 tagName = "table"; 2274 else static if(is(T == Link)) 2275 tagName == "a"; 2276 } 2277 2278 auto par = this.parentNode; 2279 while(par !is null) { 2280 if(tagName is null || par.tagName == tagName) 2281 break; 2282 par = par.parentNode; 2283 } 2284 2285 static if(!is(T == Element)) { 2286 auto t = cast(T) par; 2287 if(t is null) 2288 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2289 } else 2290 auto t = par; 2291 2292 return t; 2293 } 2294 2295 ///. 2296 Element getElementById(string id) { 2297 // FIXME: I use this function a lot, and it's kinda slow 2298 // not terribly slow, but not great. 2299 foreach(e; tree) 2300 if(e.id == id) 2301 return e; 2302 return null; 2303 } 2304 2305 /++ 2306 Returns a child element that matches the given `selector`. 2307 2308 Note: you can give multiple selectors, separated by commas. 2309 It will return the first match it finds. 2310 2311 Tip: to use namespaces, escape the colon in the name: 2312 2313 --- 2314 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2315 --- 2316 +/ 2317 @scriptable 2318 Element querySelector(string selector) { 2319 Selector s = Selector(selector); 2320 foreach(ele; tree) 2321 if(s.matchesElement(ele)) 2322 return ele; 2323 return null; 2324 } 2325 2326 /// a more standards-compliant alias for getElementsBySelector 2327 @scriptable 2328 Element[] querySelectorAll(string selector) { 2329 return getElementsBySelector(selector); 2330 } 2331 2332 /// If the element matches the given selector. Previously known as `matchesSelector`. 2333 @scriptable 2334 bool matches(string selector) { 2335 /+ 2336 bool caseSensitiveTags = true; 2337 if(parentDocument && parentDocument.loose) 2338 caseSensitiveTags = false; 2339 +/ 2340 2341 Selector s = Selector(selector); 2342 return s.matchesElement(this); 2343 } 2344 2345 /// Returns itself or the closest parent that matches the given selector, or null if none found 2346 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2347 @scriptable 2348 Element closest(string selector) { 2349 Element e = this; 2350 while(e !is null) { 2351 if(e.matches(selector)) 2352 return e; 2353 e = e.parentNode; 2354 } 2355 return null; 2356 } 2357 2358 /** 2359 Returns elements that match the given CSS selector 2360 2361 * -- all, default if nothing else is there 2362 2363 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2364 2365 It is all additive 2366 2367 OP 2368 2369 space = descendant 2370 > = direct descendant 2371 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2372 2373 [foo] Foo is present as an attribute 2374 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2375 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2376 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2377 2378 [item$=sdas] ends with 2379 [item^-sdsad] begins with 2380 2381 Quotes are optional here. 2382 2383 Pseudos: 2384 :first-child 2385 :last-child 2386 :link (same as a[href] for our purposes here) 2387 2388 2389 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2390 2391 2392 2393 This ONLY cares about elements. text, etc, are ignored 2394 2395 2396 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2397 */ 2398 Element[] getElementsBySelector(string selector) { 2399 // FIXME: this function could probably use some performance attention 2400 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2401 2402 2403 bool caseSensitiveTags = true; 2404 if(parentDocument && parentDocument.loose) 2405 caseSensitiveTags = false; 2406 2407 Element[] ret; 2408 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2409 ret ~= sel.getElements(this); 2410 return ret; 2411 } 2412 2413 /// . 2414 Element[] getElementsByClassName(string cn) { 2415 // is this correct? 2416 return getElementsBySelector("." ~ cn); 2417 } 2418 2419 ///. 2420 Element[] getElementsByTagName(string tag) { 2421 if(parentDocument && parentDocument.loose) 2422 tag = tag.toLower(); 2423 Element[] ret; 2424 foreach(e; tree) 2425 if(e.tagName == tag) 2426 ret ~= e; 2427 return ret; 2428 } 2429 2430 2431 /* ******************************* 2432 Attributes 2433 *********************************/ 2434 2435 /** 2436 Gets the given attribute value, or null if the 2437 attribute is not set. 2438 2439 Note that the returned string is decoded, so it no longer contains any xml entities. 2440 */ 2441 @scriptable 2442 string getAttribute(string name) const { 2443 if(parentDocument && parentDocument.loose) 2444 name = name.toLower(); 2445 auto e = name in attributes; 2446 if(e) 2447 return *e; 2448 else 2449 return null; 2450 } 2451 2452 /** 2453 Sets an attribute. Returns this for easy chaining 2454 */ 2455 @scriptable 2456 Element setAttribute(string name, string value) { 2457 if(parentDocument && parentDocument.loose) 2458 name = name.toLower(); 2459 2460 // I never use this shit legitimately and neither should you 2461 auto it = name.toLower(); 2462 if(it == "href" || it == "src") { 2463 auto v = value.strip().toLower(); 2464 if(v.startsWith("vbscript:")) 2465 value = value[9..$]; 2466 if(v.startsWith("javascript:")) 2467 value = value[11..$]; 2468 } 2469 2470 attributes[name] = value; 2471 2472 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2473 2474 return this; 2475 } 2476 2477 /** 2478 Returns if the attribute exists. 2479 */ 2480 @scriptable 2481 bool hasAttribute(string name) { 2482 if(parentDocument && parentDocument.loose) 2483 name = name.toLower(); 2484 2485 if(name in attributes) 2486 return true; 2487 else 2488 return false; 2489 } 2490 2491 /** 2492 Removes the given attribute from the element. 2493 */ 2494 @scriptable 2495 Element removeAttribute(string name) 2496 out(ret) { 2497 assert(ret is this); 2498 } 2499 do { 2500 if(parentDocument && parentDocument.loose) 2501 name = name.toLower(); 2502 if(name in attributes) 2503 attributes.remove(name); 2504 2505 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2506 return this; 2507 } 2508 2509 /** 2510 Gets the class attribute's contents. Returns 2511 an empty string if it has no class. 2512 */ 2513 @property string className() const { 2514 auto c = getAttribute("class"); 2515 if(c is null) 2516 return ""; 2517 return c; 2518 } 2519 2520 ///. 2521 @property Element className(string c) { 2522 setAttribute("class", c); 2523 return this; 2524 } 2525 2526 /** 2527 Provides easy access to common HTML attributes, object style. 2528 2529 --- 2530 auto element = Element.make("a"); 2531 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2532 string where = a.href; // same as a.getAttribute("href"); 2533 --- 2534 2535 */ 2536 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2537 if(v !is null) 2538 setAttribute(name, v); 2539 return getAttribute(name); 2540 } 2541 2542 /** 2543 Old access to attributes. Use [attrs] instead. 2544 2545 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2546 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2547 2548 Instead, use element.attrs.attribute, element.attrs["attribute"], 2549 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2550 */ 2551 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2552 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2553 } 2554 2555 /* 2556 // this would be nice for convenience, but it broke the getter above. 2557 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2558 if(boolean) 2559 setAttribute(name, name); 2560 else 2561 removeAttribute(name); 2562 } 2563 */ 2564 2565 /** 2566 Returns the element's children. 2567 */ 2568 @property const(Element[]) childNodes() const { 2569 return children; 2570 } 2571 2572 /// Mutable version of the same 2573 @property Element[] childNodes() { // FIXME: the above should be inout 2574 return children; 2575 } 2576 2577 /++ 2578 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2579 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2580 +/ 2581 @property DataSet dataset() { 2582 return DataSet(this); 2583 } 2584 2585 /++ 2586 Gives dot/opIndex access to attributes 2587 --- 2588 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2589 --- 2590 +/ 2591 @property AttributeSet attrs() { 2592 return AttributeSet(this); 2593 } 2594 2595 /++ 2596 Provides both string and object style (like in Javascript) access to the style attribute. 2597 2598 --- 2599 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2600 --- 2601 +/ 2602 @property ElementStyle style() { 2603 return ElementStyle(this); 2604 } 2605 2606 /++ 2607 This sets the style attribute with a string. 2608 +/ 2609 @property ElementStyle style(string s) { 2610 this.setAttribute("style", s); 2611 return this.style; 2612 } 2613 2614 private void parseAttributes(string[] whichOnes = null) { 2615 /+ 2616 if(whichOnes is null) 2617 whichOnes = attributes.keys; 2618 foreach(attr; whichOnes) { 2619 switch(attr) { 2620 case "id": 2621 2622 break; 2623 case "class": 2624 2625 break; 2626 case "style": 2627 2628 break; 2629 default: 2630 // we don't care about it 2631 } 2632 } 2633 +/ 2634 } 2635 2636 2637 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2638 2639 // the next few methods are for implementing interactive kind of things 2640 private CssStyle _computedStyle; 2641 2642 /// Don't use this. 2643 @property CssStyle computedStyle() { 2644 if(_computedStyle is null) { 2645 auto style = this.getAttribute("style"); 2646 /* we'll treat shitty old html attributes as css here */ 2647 if(this.hasAttribute("width")) 2648 style ~= "; width: " ~ this.attrs.width; 2649 if(this.hasAttribute("height")) 2650 style ~= "; height: " ~ this.attrs.height; 2651 if(this.hasAttribute("bgcolor")) 2652 style ~= "; background-color: " ~ this.attrs.bgcolor; 2653 if(this.tagName == "body" && this.hasAttribute("text")) 2654 style ~= "; color: " ~ this.attrs.text; 2655 if(this.hasAttribute("color")) 2656 style ~= "; color: " ~ this.attrs.color; 2657 /* done */ 2658 2659 2660 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2661 } 2662 return _computedStyle; 2663 } 2664 2665 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2666 version(browser) { 2667 void* expansionHook; ///ditto 2668 int offsetWidth; ///ditto 2669 int offsetHeight; ///ditto 2670 int offsetLeft; ///ditto 2671 int offsetTop; ///ditto 2672 Element offsetParent; ///ditto 2673 bool hasLayout; ///ditto 2674 int zIndex; ///ditto 2675 2676 ///ditto 2677 int absoluteLeft() { 2678 int a = offsetLeft; 2679 auto p = offsetParent; 2680 while(p) { 2681 a += p.offsetLeft; 2682 p = p.offsetParent; 2683 } 2684 2685 return a; 2686 } 2687 2688 ///ditto 2689 int absoluteTop() { 2690 int a = offsetTop; 2691 auto p = offsetParent; 2692 while(p) { 2693 a += p.offsetTop; 2694 p = p.offsetParent; 2695 } 2696 2697 return a; 2698 } 2699 } 2700 2701 // Back to the regular dom functions 2702 2703 public: 2704 2705 2706 /* ******************************* 2707 DOM Mutation 2708 *********************************/ 2709 2710 /// Removes all inner content from the tag; all child text and elements are gone. 2711 void removeAllChildren() 2712 out { 2713 assert(this.children.length == 0); 2714 } 2715 do { 2716 foreach(child; children) 2717 child.parentNode = null; 2718 children = null; 2719 } 2720 2721 /// History: added June 13, 2020 2722 Element appendSibling(Element e) { 2723 parentNode.insertAfter(this, e); 2724 return e; 2725 } 2726 2727 /// History: added June 13, 2020 2728 Element prependSibling(Element e) { 2729 parentNode.insertBefore(this, e); 2730 return e; 2731 } 2732 2733 2734 /++ 2735 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2736 2737 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2738 2739 History: 2740 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2741 +/ 2742 Element appendChild(Element e) 2743 in { 2744 assert(e !is null); 2745 } 2746 out (ret) { 2747 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2748 assert(e.parentDocument is this.parentDocument); 2749 assert(e is ret); 2750 } 2751 do { 2752 if(e.parentNode !is null) 2753 e.parentNode.removeChild(e); 2754 2755 selfClosed = false; 2756 if(auto frag = cast(DocumentFragment) e) 2757 children ~= frag.children; 2758 else 2759 children ~= e; 2760 2761 e.parentNode = this; 2762 2763 /+ 2764 foreach(item; e.tree) 2765 item.parentDocument = this.parentDocument; 2766 +/ 2767 2768 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2769 2770 return e; 2771 } 2772 2773 /// Inserts the second element to this node, right before the first param 2774 Element insertBefore(in Element where, Element what) 2775 in { 2776 assert(where !is null); 2777 assert(where.parentNode is this); 2778 assert(what !is null); 2779 assert(what.parentNode is null); 2780 } 2781 out (ret) { 2782 assert(where.parentNode is this); 2783 assert(what.parentNode is this); 2784 2785 assert(what.parentDocument is this.parentDocument); 2786 assert(ret is what); 2787 } 2788 do { 2789 foreach(i, e; children) { 2790 if(e is where) { 2791 if(auto frag = cast(DocumentFragment) what) { 2792 children = children[0..i] ~ frag.children ~ children[i..$]; 2793 foreach(child; frag.children) 2794 child.parentNode = this; 2795 } else { 2796 children = children[0..i] ~ what ~ children[i..$]; 2797 } 2798 what.parentNode = this; 2799 return what; 2800 } 2801 } 2802 2803 return what; 2804 2805 assert(0); 2806 } 2807 2808 /++ 2809 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2810 +/ 2811 Element insertAfter(in Element where, Element what) 2812 in { 2813 assert(where !is null); 2814 assert(where.parentNode is this); 2815 assert(what !is null); 2816 assert(what.parentNode is null); 2817 } 2818 out (ret) { 2819 assert(where.parentNode is this); 2820 assert(what.parentNode is this); 2821 assert(what.parentDocument is this.parentDocument); 2822 assert(ret is what); 2823 } 2824 do { 2825 foreach(i, e; children) { 2826 if(e is where) { 2827 if(auto frag = cast(DocumentFragment) what) { 2828 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2829 foreach(child; frag.children) 2830 child.parentNode = this; 2831 } else 2832 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2833 what.parentNode = this; 2834 return what; 2835 } 2836 } 2837 2838 return what; 2839 2840 assert(0); 2841 } 2842 2843 /// swaps one child for a new thing. Returns the old child which is now parentless. 2844 Element swapNode(Element child, Element replacement) 2845 in { 2846 assert(child !is null); 2847 assert(replacement !is null); 2848 assert(child.parentNode is this); 2849 } 2850 out(ret) { 2851 assert(ret is child); 2852 assert(ret.parentNode is null); 2853 assert(replacement.parentNode is this); 2854 assert(replacement.parentDocument is this.parentDocument); 2855 } 2856 do { 2857 foreach(ref c; this.children) 2858 if(c is child) { 2859 c.parentNode = null; 2860 c = replacement; 2861 c.parentNode = this; 2862 return child; 2863 } 2864 assert(0); 2865 } 2866 2867 2868 /++ 2869 Appends the given to the node. 2870 2871 2872 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2873 yields `<example>text <b>bold</b> hi</example>`. 2874 2875 See_Also: 2876 [firstInnerText], [directText], [innerText], [appendChild] 2877 +/ 2878 @scriptable 2879 Element appendText(string text) { 2880 Element e = new TextNode(parentDocument, text); 2881 appendChild(e); 2882 return this; 2883 } 2884 2885 /++ 2886 Returns child elements which are of a tag type (excludes text, comments, etc.). 2887 2888 2889 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2890 2891 Params: 2892 tagName = filter results to only the child elements with the given tag name. 2893 +/ 2894 @property Element[] childElements(string tagName = null) { 2895 Element[] ret; 2896 foreach(c; children) 2897 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2898 ret ~= c; 2899 return ret; 2900 } 2901 2902 /++ 2903 Appends the given html to the element, returning the elements appended 2904 2905 2906 This is similar to `element.innerHTML += "html string";` in Javascript. 2907 +/ 2908 @scriptable 2909 Element[] appendHtml(string html) { 2910 Document d = new Document("<root>" ~ html ~ "</root>"); 2911 return stealChildren(d.root); 2912 } 2913 2914 2915 ///. 2916 void insertChildAfter(Element child, Element where) 2917 in { 2918 assert(child !is null); 2919 assert(where !is null); 2920 assert(where.parentNode is this); 2921 assert(!selfClosed); 2922 //assert(isInArray(where, children)); 2923 } 2924 out { 2925 assert(child.parentNode is this); 2926 assert(where.parentNode is this); 2927 //assert(isInArray(where, children)); 2928 //assert(isInArray(child, children)); 2929 } 2930 do { 2931 foreach(ref i, c; children) { 2932 if(c is where) { 2933 i++; 2934 if(auto frag = cast(DocumentFragment) child) { 2935 children = children[0..i] ~ child.children ~ children[i..$]; 2936 //foreach(child; frag.children) 2937 //child.parentNode = this; 2938 } else 2939 children = children[0..i] ~ child ~ children[i..$]; 2940 child.parentNode = this; 2941 break; 2942 } 2943 } 2944 } 2945 2946 /++ 2947 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2948 2949 Params: 2950 e = the element whose children you want to steal 2951 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2952 +/ 2953 Element[] stealChildren(Element e, Element position = null) 2954 in { 2955 assert(!selfClosed); 2956 assert(e !is null); 2957 //if(position !is null) 2958 //assert(isInArray(position, children)); 2959 } 2960 out (ret) { 2961 assert(e.children.length == 0); 2962 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2963 version(none) 2964 debug foreach(child; ret) { 2965 assert(child.parentNode is this); 2966 assert(child.parentDocument is this.parentDocument); 2967 } 2968 } 2969 do { 2970 foreach(c; e.children) { 2971 c.parentNode = this; 2972 } 2973 if(position is null) 2974 children ~= e.children; 2975 else { 2976 foreach(i, child; children) { 2977 if(child is position) { 2978 children = children[0..i] ~ 2979 e.children ~ 2980 children[i..$]; 2981 break; 2982 } 2983 } 2984 } 2985 2986 auto ret = e.children[]; 2987 e.children.length = 0; 2988 2989 return ret; 2990 } 2991 2992 /// Puts the current element first in our children list. The given element must not have a parent already. 2993 Element prependChild(Element e) 2994 in { 2995 assert(e.parentNode is null); 2996 assert(!selfClosed); 2997 } 2998 out { 2999 assert(e.parentNode is this); 3000 assert(e.parentDocument is this.parentDocument); 3001 assert(children[0] is e); 3002 } 3003 do { 3004 if(auto frag = cast(DocumentFragment) e) { 3005 children = e.children ~ children; 3006 foreach(child; frag.children) 3007 child.parentNode = this; 3008 } else 3009 children = e ~ children; 3010 e.parentNode = this; 3011 return e; 3012 } 3013 3014 3015 /** 3016 Returns a string containing all child elements, formatted such that it could be pasted into 3017 an XML file. 3018 */ 3019 @property string innerHTML(Appender!string where = appender!string()) const { 3020 if(children is null) 3021 return ""; 3022 3023 auto start = where.data.length; 3024 3025 foreach(child; children) { 3026 assert(child !is null); 3027 3028 child.writeToAppender(where); 3029 } 3030 3031 return where.data[start .. $]; 3032 } 3033 3034 /** 3035 Takes some html and replaces the element's children with the tree made from the string. 3036 */ 3037 @property Element innerHTML(string html, bool strict = false) { 3038 if(html.length) 3039 selfClosed = false; 3040 3041 if(html.length == 0) { 3042 // I often say innerHTML = ""; as a shortcut to clear it out, 3043 // so let's optimize that slightly. 3044 removeAllChildren(); 3045 return this; 3046 } 3047 3048 auto doc = new Document(); 3049 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3050 3051 children = doc.root.children; 3052 foreach(c; children) { 3053 c.parentNode = this; 3054 } 3055 3056 doc.root.children = null; 3057 3058 return this; 3059 } 3060 3061 /// ditto 3062 @property Element innerHTML(Html html) { 3063 return this.innerHTML = html.source; 3064 } 3065 3066 /** 3067 Replaces this node with the given html string, which is parsed 3068 3069 Note: this invalidates the this reference, since it is removed 3070 from the tree. 3071 3072 Returns the new children that replace this. 3073 */ 3074 @property Element[] outerHTML(string html) { 3075 auto doc = new Document(); 3076 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3077 3078 children = doc.root.children; 3079 foreach(c; children) { 3080 c.parentNode = this; 3081 } 3082 3083 stripOut(); 3084 3085 return doc.root.children; 3086 } 3087 3088 /++ 3089 Returns all the html for this element, including the tag itself. 3090 3091 This is equivalent to calling toString(). 3092 +/ 3093 @property string outerHTML() { 3094 return this.toString(); 3095 } 3096 3097 /// This sets the inner content of the element *without* trying to parse it. 3098 /// You can inject any code in there; this serves as an escape hatch from the dom. 3099 /// 3100 /// The only times you might actually need it are for < style > and < script > tags in html. 3101 /// Other than that, innerHTML and/or innerText should do the job. 3102 @property void innerRawSource(string rawSource) { 3103 children.length = 0; 3104 auto rs = new RawSource(parentDocument, rawSource); 3105 children ~= rs; 3106 rs.parentNode = this; 3107 } 3108 3109 ///. 3110 Element replaceChild(Element find, Element replace) 3111 in { 3112 assert(find !is null); 3113 assert(find.parentNode is this); 3114 assert(replace !is null); 3115 assert(replace.parentNode is null); 3116 } 3117 out(ret) { 3118 assert(ret is replace); 3119 assert(replace.parentNode is this); 3120 assert(replace.parentDocument is this.parentDocument); 3121 assert(find.parentNode is null); 3122 } 3123 do { 3124 // FIXME 3125 //if(auto frag = cast(DocumentFragment) replace) 3126 //return this.replaceChild(frag, replace.children); 3127 for(int i = 0; i < children.length; i++) { 3128 if(children[i] is find) { 3129 replace.parentNode = this; 3130 children[i].parentNode = null; 3131 children[i] = replace; 3132 return replace; 3133 } 3134 } 3135 3136 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3137 } 3138 3139 /** 3140 Replaces the given element with a whole group. 3141 */ 3142 void replaceChild(Element find, Element[] replace) 3143 in { 3144 assert(find !is null); 3145 assert(replace !is null); 3146 assert(find.parentNode is this); 3147 debug foreach(r; replace) 3148 assert(r.parentNode is null); 3149 } 3150 out { 3151 assert(find.parentNode is null); 3152 assert(children.length >= replace.length); 3153 debug foreach(child; children) 3154 assert(child !is find); 3155 debug foreach(r; replace) 3156 assert(r.parentNode is this); 3157 } 3158 do { 3159 if(replace.length == 0) { 3160 removeChild(find); 3161 return; 3162 } 3163 assert(replace.length); 3164 for(int i = 0; i < children.length; i++) { 3165 if(children[i] is find) { 3166 children[i].parentNode = null; // this element should now be dead 3167 children[i] = replace[0]; 3168 foreach(e; replace) { 3169 e.parentNode = this; 3170 } 3171 3172 children = .insertAfter(children, i, replace[1..$]); 3173 3174 return; 3175 } 3176 } 3177 3178 throw new Exception("no such child"); 3179 } 3180 3181 3182 /** 3183 Removes the given child from this list. 3184 3185 Returns the removed element. 3186 */ 3187 Element removeChild(Element c) 3188 in { 3189 assert(c !is null); 3190 assert(c.parentNode is this); 3191 } 3192 out { 3193 debug foreach(child; children) 3194 assert(child !is c); 3195 assert(c.parentNode is null); 3196 } 3197 do { 3198 foreach(i, e; children) { 3199 if(e is c) { 3200 children = children[0..i] ~ children [i+1..$]; 3201 c.parentNode = null; 3202 return c; 3203 } 3204 } 3205 3206 throw new Exception("no such child"); 3207 } 3208 3209 /// This removes all the children from this element, returning the old list. 3210 Element[] removeChildren() 3211 out (ret) { 3212 assert(children.length == 0); 3213 debug foreach(r; ret) 3214 assert(r.parentNode is null); 3215 } 3216 do { 3217 Element[] oldChildren = children.dup; 3218 foreach(c; oldChildren) 3219 c.parentNode = null; 3220 3221 children.length = 0; 3222 3223 return oldChildren; 3224 } 3225 3226 /** 3227 Fetch the inside text, with all tags stripped out. 3228 3229 <p>cool <b>api</b> & code dude<p> 3230 innerText of that is "cool api & code dude". 3231 3232 This does not match what real innerText does! 3233 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3234 3235 It is more like textContent. 3236 */ 3237 @scriptable 3238 @property string innerText() const { 3239 string s; 3240 foreach(child; children) { 3241 if(child.nodeType != NodeType.Text) 3242 s ~= child.innerText; 3243 else 3244 s ~= child.nodeValue(); 3245 } 3246 return s; 3247 } 3248 3249 /// 3250 alias textContent = innerText; 3251 3252 /** 3253 Sets the inside text, replacing all children. You don't 3254 have to worry about entity encoding. 3255 */ 3256 @scriptable 3257 @property void innerText(string text) { 3258 selfClosed = false; 3259 Element e = new TextNode(parentDocument, text); 3260 children = [e]; 3261 e.parentNode = this; 3262 } 3263 3264 /** 3265 Strips this node out of the document, replacing it with the given text 3266 */ 3267 @property void outerText(string text) { 3268 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3269 } 3270 3271 /** 3272 Same result as innerText; the tag with all inner tags stripped out 3273 */ 3274 @property string outerText() const { 3275 return innerText; 3276 } 3277 3278 3279 /* ******************************* 3280 Miscellaneous 3281 *********************************/ 3282 3283 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3284 @property Element cloned() 3285 /+ 3286 out(ret) { 3287 // FIXME: not sure why these fail... 3288 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3289 assert(ret.tagName == this.tagName); 3290 } 3291 do { 3292 +/ 3293 { 3294 return this.cloneNode(true); 3295 } 3296 3297 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3298 Element cloneNode(bool deepClone) { 3299 auto e = Element.make(this.tagName); 3300 e.attributes = this.attributes.aadup; 3301 e.selfClosed = this.selfClosed; 3302 3303 if(deepClone) { 3304 foreach(child; children) { 3305 e.appendChild(child.cloneNode(true)); 3306 } 3307 } 3308 3309 3310 return e; 3311 } 3312 3313 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3314 string nodeValue() const { 3315 return ""; 3316 } 3317 3318 // should return int 3319 ///. 3320 @property int nodeType() const { 3321 return 1; 3322 } 3323 3324 3325 invariant () { 3326 debug assert(tagName.indexOf(" ") == -1); 3327 3328 // commented cuz it gets into recursive pain and eff dat. 3329 /+ 3330 if(children !is null) 3331 foreach(child; children) { 3332 // assert(parentNode !is null); 3333 assert(child !is null); 3334 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3335 assert(child !is this); 3336 //assert(child !is parentNode); 3337 } 3338 +/ 3339 3340 /+ 3341 // this isn't helping 3342 if(parent_ && parent_.asElement) { 3343 bool found = false; 3344 foreach(child; parent_.asElement.children) 3345 if(child is this) 3346 found = true; 3347 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3348 } 3349 +/ 3350 3351 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3352 if(parentNode !is null) { 3353 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3354 auto lol = cast(TextNode) this; 3355 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3356 } 3357 +/ 3358 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3359 // reason is so you can create these without needing a reference to the document 3360 } 3361 3362 /** 3363 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3364 an XML file. 3365 */ 3366 override string toString() const { 3367 return writeToAppender(); 3368 } 3369 3370 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3371 if(indentWith is null) 3372 return null; 3373 string s; 3374 3375 if(insertComments) s ~= "<!--"; 3376 s ~= "\n"; 3377 foreach(indent; 0 .. indentationLevel) 3378 s ~= indentWith; 3379 if(insertComments) s ~= "-->"; 3380 3381 return s; 3382 } 3383 3384 /++ 3385 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3386 for eyeball debugging. 3387 +/ 3388 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3389 3390 // first step is to concatenate any consecutive text nodes to simplify 3391 // the white space analysis. this changes the tree! but i'm allowed since 3392 // the comment always says it changes the comments 3393 // 3394 // actually i'm not allowed cuz it is const so i will cheat and lie 3395 /+ 3396 TextNode lastTextChild = null; 3397 for(int a = 0; a < this.children.length; a++) { 3398 auto child = this.children[a]; 3399 if(auto tn = cast(TextNode) child) { 3400 if(lastTextChild) { 3401 lastTextChild.contents ~= tn.contents; 3402 for(int b = a; b < this.children.length - 1; b++) 3403 this.children[b] = this.children[b + 1]; 3404 this.children = this.children[0 .. $-1]; 3405 } else { 3406 lastTextChild = tn; 3407 } 3408 } else { 3409 lastTextChild = null; 3410 } 3411 } 3412 +/ 3413 3414 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 3415 3416 const(Element)[] children; 3417 3418 TextNode lastTextChild = null; 3419 for(int a = 0; a < this.children.length; a++) { 3420 auto child = this.children[a]; 3421 if(auto tn = cast(const(TextNode)) child) { 3422 if(lastTextChild !is null) { 3423 lastTextChild.contents ~= tn.contents; 3424 } else { 3425 lastTextChild = new TextNode(""); 3426 lastTextChild.parentNode = cast(Element) this; 3427 lastTextChild.contents ~= tn.contents; 3428 children ~= lastTextChild; 3429 } 3430 } else { 3431 lastTextChild = null; 3432 children ~= child; 3433 } 3434 } 3435 3436 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3437 3438 s ~= "<"; 3439 s ~= tagName; 3440 3441 // i sort these for consistent output. might be more legible 3442 // but especially it keeps it the same for diff purposes. 3443 import std.algorithm : sort; 3444 auto keys = sort(attributes.keys); 3445 foreach(n; keys) { 3446 auto v = attributes[n]; 3447 s ~= " "; 3448 s ~= n; 3449 s ~= "=\""; 3450 s ~= htmlEntitiesEncode(v); 3451 s ~= "\""; 3452 } 3453 3454 if(selfClosed){ 3455 s ~= " />"; 3456 return s; 3457 } 3458 3459 s ~= ">"; 3460 3461 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3462 // just keep them on the same line 3463 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 3464 foreach(child; children) { 3465 s ~= child.toString();//toPrettyString(false, 0, null); 3466 } 3467 } else { 3468 foreach(child; children) { 3469 assert(child !is null); 3470 3471 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3472 } 3473 3474 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3475 } 3476 3477 s ~= "</"; 3478 s ~= tagName; 3479 s ~= ">"; 3480 3481 return s; 3482 } 3483 3484 /+ 3485 /// Writes out the opening tag only, if applicable. 3486 string writeTagOnly(Appender!string where = appender!string()) const { 3487 +/ 3488 3489 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3490 /// Note: the ordering of attributes in the string is undefined. 3491 /// Returns the string it creates. 3492 string writeToAppender(Appender!string where = appender!string()) const { 3493 assert(tagName !is null); 3494 3495 where.reserve((this.children.length + 1) * 512); 3496 3497 auto start = where.data.length; 3498 3499 where.put("<"); 3500 where.put(tagName); 3501 3502 import std.algorithm : sort; 3503 auto keys = sort(attributes.keys); 3504 foreach(n; keys) { 3505 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3506 //assert(v !is null); 3507 where.put(" "); 3508 where.put(n); 3509 where.put("=\""); 3510 htmlEntitiesEncode(v, where); 3511 where.put("\""); 3512 } 3513 3514 if(selfClosed){ 3515 where.put(" />"); 3516 return where.data[start .. $]; 3517 } 3518 3519 where.put('>'); 3520 3521 innerHTML(where); 3522 3523 where.put("</"); 3524 where.put(tagName); 3525 where.put('>'); 3526 3527 return where.data[start .. $]; 3528 } 3529 3530 /** 3531 Returns a lazy range of all its children, recursively. 3532 */ 3533 @property ElementStream tree() { 3534 return new ElementStream(this); 3535 } 3536 3537 // I moved these from Form because they are generally useful. 3538 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3539 /// Tags: HTML, HTML5 3540 // FIXME: add overloads for other label types... 3541 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3542 auto fs = this; 3543 auto i = fs.addChild("label"); 3544 3545 if(!(type == "checkbox" || type == "radio")) 3546 i.addChild("span", label); 3547 3548 Element input; 3549 if(type == "textarea") 3550 input = i.addChild("textarea"). 3551 setAttribute("name", name). 3552 setAttribute("rows", "6"); 3553 else 3554 input = i.addChild("input"). 3555 setAttribute("name", name). 3556 setAttribute("type", type); 3557 3558 if(type == "checkbox" || type == "radio") 3559 i.addChild("span", label); 3560 3561 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3562 fieldOptions.applyToElement(input); 3563 return i; 3564 } 3565 3566 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3567 auto fs = this; 3568 auto i = fs.addChild("label"); 3569 i.addChild(label); 3570 Element input; 3571 if(type == "textarea") 3572 input = i.addChild("textarea"). 3573 setAttribute("name", name). 3574 setAttribute("rows", "6"); 3575 else 3576 input = i.addChild("input"). 3577 setAttribute("name", name). 3578 setAttribute("type", type); 3579 3580 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3581 fieldOptions.applyToElement(input); 3582 return i; 3583 } 3584 3585 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3586 return addField(label, name, "text", fieldOptions); 3587 } 3588 3589 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3590 auto fs = this; 3591 auto i = fs.addChild("label"); 3592 i.addChild("span", label); 3593 auto sel = i.addChild("select").setAttribute("name", name); 3594 3595 foreach(k, opt; options) 3596 sel.addChild("option", opt, k); 3597 3598 // FIXME: implement requirements somehow 3599 3600 return i; 3601 } 3602 3603 Element addSubmitButton(string label = null) { 3604 auto t = this; 3605 auto holder = t.addChild("div"); 3606 holder.addClass("submit-holder"); 3607 auto i = holder.addChild("input"); 3608 i.type = "submit"; 3609 if(label.length) 3610 i.value = label; 3611 return holder; 3612 } 3613 3614 } 3615 // computedStyle could argubaly be removed to bring size down 3616 //pragma(msg, __traits(classInstanceSize, Element)); 3617 //pragma(msg, Element.tupleof); 3618 3619 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3620 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3621 /// Group: core_functionality 3622 class XmlDocument : Document { 3623 this(string data) { 3624 selfClosedElements = null; 3625 inlineElements = null; 3626 contentType = "text/xml; charset=utf-8"; 3627 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3628 3629 parseStrict(data); 3630 } 3631 } 3632 3633 3634 3635 3636 import std.string; 3637 3638 /* domconvenience follows { */ 3639 3640 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3641 /// Group: core_functionality 3642 Element[] findComments(Document document, string txt) { 3643 return findComments(document.root, txt); 3644 } 3645 3646 /// ditto 3647 Element[] findComments(Element element, string txt) { 3648 txt = txt.strip().toLower(); 3649 Element[] ret; 3650 3651 foreach(comment; element.getElementsByTagName("#comment")) { 3652 string t = comment.nodeValue().strip().toLower(); 3653 if(t == txt) 3654 ret ~= comment; 3655 } 3656 3657 return ret; 3658 } 3659 3660 /// An option type that propagates null. See: [Element.optionSelector] 3661 /// Group: implementations 3662 struct MaybeNullElement(SomeElementType) { 3663 this(SomeElementType ele) { 3664 this.element = ele; 3665 } 3666 SomeElementType element; 3667 3668 /// Forwards to the element, wit a null check inserted that propagates null. 3669 auto opDispatch(string method, T...)(T args) { 3670 alias type = typeof(__traits(getMember, element, method)(args)); 3671 static if(is(type : Element)) { 3672 if(element is null) 3673 return MaybeNullElement!type(null); 3674 return __traits(getMember, element, method)(args); 3675 } else static if(is(type == string)) { 3676 if(element is null) 3677 return cast(string) null; 3678 return __traits(getMember, element, method)(args); 3679 } else static if(is(type == void)) { 3680 if(element is null) 3681 return; 3682 __traits(getMember, element, method)(args); 3683 } else { 3684 static assert(0); 3685 } 3686 } 3687 3688 /// Allows implicit casting to the wrapped element. 3689 alias element this; 3690 } 3691 3692 /++ 3693 A collection of elements which forwards methods to the children. 3694 +/ 3695 /// Group: implementations 3696 struct ElementCollection { 3697 /// 3698 this(Element e) { 3699 elements = [e]; 3700 } 3701 3702 /// 3703 this(Element e, string selector) { 3704 elements = e.querySelectorAll(selector); 3705 } 3706 3707 /// 3708 this(Element[] e) { 3709 elements = e; 3710 } 3711 3712 Element[] elements; 3713 //alias elements this; // let it implicitly convert to the underlying array 3714 3715 /// 3716 ElementCollection opIndex(string selector) { 3717 ElementCollection ec; 3718 foreach(e; elements) 3719 ec.elements ~= e.getElementsBySelector(selector); 3720 return ec; 3721 } 3722 3723 /// 3724 Element opIndex(int i) { 3725 return elements[i]; 3726 } 3727 3728 /// if you slice it, give the underlying array for easy forwarding of the 3729 /// collection to range expecting algorithms or looping over. 3730 Element[] opSlice() { 3731 return elements; 3732 } 3733 3734 /// And input range primitives so we can foreach over this 3735 void popFront() { 3736 elements = elements[1..$]; 3737 } 3738 3739 /// ditto 3740 Element front() { 3741 return elements[0]; 3742 } 3743 3744 /// ditto 3745 bool empty() { 3746 return !elements.length; 3747 } 3748 3749 /++ 3750 Collects strings from the collection, concatenating them together 3751 Kinda like running reduce and ~= on it. 3752 3753 --- 3754 document["p"].collect!"innerText"; 3755 --- 3756 +/ 3757 string collect(string method)(string separator = "") { 3758 string text; 3759 foreach(e; elements) { 3760 text ~= mixin("e." ~ method); 3761 text ~= separator; 3762 } 3763 return text; 3764 } 3765 3766 /// Forward method calls to each individual [Element|element] of the collection 3767 /// returns this so it can be chained. 3768 ElementCollection opDispatch(string name, T...)(T t) { 3769 foreach(e; elements) { 3770 mixin("e." ~ name)(t); 3771 } 3772 return this; 3773 } 3774 3775 /++ 3776 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3777 +/ 3778 ElementCollection wrapIn(Element what) { 3779 foreach(e; elements) { 3780 e.wrapIn(what.cloneNode(false)); 3781 } 3782 3783 return this; 3784 } 3785 3786 /// Concatenates two ElementCollection together. 3787 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3788 return ElementCollection(this.elements ~ rhs.elements); 3789 } 3790 } 3791 3792 3793 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3794 /// Group: implementations 3795 mixin template JavascriptStyleDispatch() { 3796 /// 3797 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3798 if(v !is null) 3799 return set(name, v); 3800 return get(name); 3801 } 3802 3803 /// 3804 string opIndex(string key) const { 3805 return get(key); 3806 } 3807 3808 /// 3809 string opIndexAssign(string value, string field) { 3810 return set(field, value); 3811 } 3812 3813 // FIXME: doesn't seem to work 3814 string* opBinary(string op)(string key) if(op == "in") { 3815 return key in fields; 3816 } 3817 } 3818 3819 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3820 /// 3821 /// Do not create this object directly. 3822 /// Group: implementations 3823 struct DataSet { 3824 /// 3825 this(Element e) { 3826 this._element = e; 3827 } 3828 3829 private Element _element; 3830 /// 3831 string set(string name, string value) { 3832 _element.setAttribute("data-" ~ unCamelCase(name), value); 3833 return value; 3834 } 3835 3836 /// 3837 string get(string name) const { 3838 return _element.getAttribute("data-" ~ unCamelCase(name)); 3839 } 3840 3841 /// 3842 mixin JavascriptStyleDispatch!(); 3843 } 3844 3845 /// Proxy object for attributes which will replace the main opDispatch eventually 3846 /// Group: implementations 3847 struct AttributeSet { 3848 /// 3849 this(Element e) { 3850 this._element = e; 3851 } 3852 3853 private Element _element; 3854 /// 3855 string set(string name, string value) { 3856 _element.setAttribute(name, value); 3857 return value; 3858 } 3859 3860 /// 3861 string get(string name) const { 3862 return _element.getAttribute(name); 3863 } 3864 3865 /// 3866 mixin JavascriptStyleDispatch!(); 3867 } 3868 3869 3870 3871 /// for style, i want to be able to set it with a string like a plain attribute, 3872 /// but also be able to do properties Javascript style. 3873 3874 /// Group: implementations 3875 struct ElementStyle { 3876 this(Element parent) { 3877 _element = parent; 3878 } 3879 3880 Element _element; 3881 3882 @property ref inout(string) _attribute() inout { 3883 auto s = "style" in _element.attributes; 3884 if(s is null) { 3885 auto e = cast() _element; // const_cast 3886 e.attributes["style"] = ""; // we need something to reference 3887 s = cast(inout) ("style" in e.attributes); 3888 } 3889 3890 assert(s !is null); 3891 return *s; 3892 } 3893 3894 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3895 3896 string set(string name, string value) { 3897 if(name.length == 0) 3898 return value; 3899 if(name == "cssFloat") 3900 name = "float"; 3901 else 3902 name = unCamelCase(name); 3903 auto r = rules(); 3904 r[name] = value; 3905 3906 _attribute = ""; 3907 foreach(k, v; r) { 3908 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3909 continue; 3910 if(_attribute.length) 3911 _attribute ~= " "; 3912 _attribute ~= k ~ ": " ~ v ~ ";"; 3913 } 3914 3915 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3916 3917 return value; 3918 } 3919 string get(string name) const { 3920 if(name == "cssFloat") 3921 name = "float"; 3922 else 3923 name = unCamelCase(name); 3924 auto r = rules(); 3925 if(name in r) 3926 return r[name]; 3927 return null; 3928 } 3929 3930 string[string] rules() const { 3931 string[string] ret; 3932 foreach(rule; _attribute.split(";")) { 3933 rule = rule.strip(); 3934 if(rule.length == 0) 3935 continue; 3936 auto idx = rule.indexOf(":"); 3937 if(idx == -1) 3938 ret[rule] = ""; 3939 else { 3940 auto name = rule[0 .. idx].strip(); 3941 auto value = rule[idx + 1 .. $].strip(); 3942 3943 ret[name] = value; 3944 } 3945 } 3946 3947 return ret; 3948 } 3949 3950 mixin JavascriptStyleDispatch!(); 3951 } 3952 3953 /// Converts a camel cased propertyName to a css style dashed property-name 3954 string unCamelCase(string a) { 3955 string ret; 3956 foreach(c; a) 3957 if((c >= 'A' && c <= 'Z')) 3958 ret ~= "-" ~ toLower("" ~ c)[0]; 3959 else 3960 ret ~= c; 3961 return ret; 3962 } 3963 3964 /// Translates a css style property-name to a camel cased propertyName 3965 string camelCase(string a) { 3966 string ret; 3967 bool justSawDash = false; 3968 foreach(c; a) 3969 if(c == '-') { 3970 justSawDash = true; 3971 } else { 3972 if(justSawDash) { 3973 justSawDash = false; 3974 ret ~= toUpper("" ~ c); 3975 } else 3976 ret ~= c; 3977 } 3978 return ret; 3979 } 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 // domconvenience ends } 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 // @safe: 4002 4003 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 4004 // Instead, override writeToAppender(); 4005 4006 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 4007 4008 // Stripping them is useful for reading php as html.... but adding them 4009 // is good for building php. 4010 4011 // I need to maintain compatibility with the way it is now too. 4012 4013 import std.string; 4014 import std.exception; 4015 import std.uri; 4016 import std.array; 4017 import std.range; 4018 4019 //import std.stdio; 4020 4021 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 4022 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 4023 // most likely a typo so I say kill kill kill. 4024 4025 4026 /++ 4027 This might belong in another module, but it represents a file with a mime type and some data. 4028 Document implements this interface with type = text/html (see Document.contentType for more info) 4029 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 4030 +/ 4031 /// Group: bonus_functionality 4032 interface FileResource { 4033 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 4034 @property string contentType() const; 4035 /// the data 4036 immutable(ubyte)[] getData() const; 4037 /++ 4038 filename, return null if none 4039 4040 History: 4041 Added December 25, 2020 4042 +/ 4043 @property string filename() const; 4044 } 4045 4046 4047 4048 4049 ///. 4050 /// Group: bonus_functionality 4051 enum NodeType { Text = 3 } 4052 4053 4054 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 4055 /// Group: core_functionality 4056 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 4057 in {} 4058 out(ret) { assert(ret !is null); } 4059 do { 4060 auto ret = cast(T) e; 4061 if(ret is null) 4062 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 4063 return ret; 4064 } 4065 4066 4067 ///. 4068 /// Group: core_functionality 4069 class DocumentFragment : Element { 4070 ///. 4071 this(Document _parentDocument) { 4072 tagName = "#fragment"; 4073 super(_parentDocument); 4074 } 4075 4076 /++ 4077 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4078 4079 Since: March 29, 2018 (or git tagged v2.1.0) 4080 +/ 4081 this(Html html) { 4082 this(null); 4083 4084 this.innerHTML = html.source; 4085 } 4086 4087 ///. 4088 override string writeToAppender(Appender!string where = appender!string()) const { 4089 return this.innerHTML(where); 4090 } 4091 4092 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 4093 string s; 4094 foreach(child; children) 4095 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 4096 return s; 4097 } 4098 4099 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4100 /* 4101 override inout(Element) parentNode() inout { 4102 return children.length ? children[0].parentNode : null; 4103 } 4104 */ 4105 /+ 4106 override Element parentNode(Element p) { 4107 this.parentNode = p; 4108 foreach(child; children) 4109 child.parentNode = p; 4110 return p; 4111 } 4112 +/ 4113 } 4114 4115 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4116 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4117 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4118 /// 4119 /// The output parameter can be given to append to an existing buffer. You don't have to 4120 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4121 /// Group: core_functionality 4122 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4123 // if there's no entities, we can save a lot of time by not bothering with the 4124 // decoding loop. This check cuts the net toString time by better than half in my test. 4125 // let me know if it made your tests worse though, since if you use an entity in just about 4126 // every location, the check will add time... but I suspect the average experience is like mine 4127 // since the check gives up as soon as it can anyway. 4128 4129 bool shortcut = true; 4130 foreach(char c; data) { 4131 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4132 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4133 shortcut = false; // there's actual work to be done 4134 break; 4135 } 4136 } 4137 4138 if(shortcut) { 4139 output.put(data); 4140 return data; 4141 } 4142 4143 auto start = output.data.length; 4144 4145 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4146 4147 foreach(dchar d; data) { 4148 if(d == '&') 4149 output.put("&"); 4150 else if (d == '<') 4151 output.put("<"); 4152 else if (d == '>') 4153 output.put(">"); 4154 else if (d == '\"') 4155 output.put("""); 4156 // else if (d == '\'') 4157 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4158 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4159 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4160 // idk about apostrophes though. Might be worth it, might not. 4161 else if (!encodeNonAscii || (d < 128 && d > 0)) 4162 output.put(d); 4163 else 4164 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4165 } 4166 4167 //assert(output !is null); // this fails on empty attributes..... 4168 return output.data[start .. $]; 4169 4170 // data = data.replace("\u00a0", " "); 4171 } 4172 4173 /// An alias for htmlEntitiesEncode; it works for xml too 4174 /// Group: core_functionality 4175 string xmlEntitiesEncode(string data) { 4176 return htmlEntitiesEncode(data); 4177 } 4178 4179 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4180 /// Group: core_functionality 4181 dchar parseEntity(in dchar[] entity) { 4182 switch(entity[1..$-1]) { 4183 case "quot": 4184 return '"'; 4185 case "apos": 4186 return '\''; 4187 case "lt": 4188 return '<'; 4189 case "gt": 4190 return '>'; 4191 case "amp": 4192 return '&'; 4193 // the next are html rather than xml 4194 4195 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4196 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4197 case "Tab": return '\u0009'; 4198 case "NewLine": return '\u000A'; 4199 case "excl": return '\u0021'; 4200 case "QUOT": return '\u0022'; 4201 case "num": return '\u0023'; 4202 case "dollar": return '\u0024'; 4203 case "percnt": return '\u0025'; 4204 case "AMP": return '\u0026'; 4205 case "lpar": return '\u0028'; 4206 case "rpar": return '\u0029'; 4207 case "ast": case "midast": return '\u002A'; 4208 case "plus": return '\u002B'; 4209 case "comma": return '\u002C'; 4210 case "period": return '\u002E'; 4211 case "sol": return '\u002F'; 4212 case "colon": return '\u003A'; 4213 case "semi": return '\u003B'; 4214 case "LT": return '\u003C'; 4215 case "equals": return '\u003D'; 4216 case "GT": return '\u003E'; 4217 case "quest": return '\u003F'; 4218 case "commat": return '\u0040'; 4219 case "lsqb": case "lbrack": return '\u005B'; 4220 case "bsol": return '\u005C'; 4221 case "rsqb": case "rbrack": return '\u005D'; 4222 case "Hat": return '\u005E'; 4223 case "lowbar": case "UnderBar": return '\u005F'; 4224 case "grave": case "DiacriticalGrave": return '\u0060'; 4225 case "lcub": case "lbrace": return '\u007B'; 4226 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4227 case "rcub": case "rbrace": return '\u007D'; 4228 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4229 case "iexcl": return '\u00A1'; 4230 case "cent": return '\u00A2'; 4231 case "pound": return '\u00A3'; 4232 case "curren": return '\u00A4'; 4233 case "yen": return '\u00A5'; 4234 case "brvbar": return '\u00A6'; 4235 case "sect": return '\u00A7'; 4236 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4237 case "copy": case "COPY": return '\u00A9'; 4238 case "ordf": return '\u00AA'; 4239 case "laquo": return '\u00AB'; 4240 case "not": return '\u00AC'; 4241 case "shy": return '\u00AD'; 4242 case "reg": case "circledR": case "REG": return '\u00AE'; 4243 case "macr": case "strns": return '\u00AF'; 4244 case "deg": return '\u00B0'; 4245 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4246 case "sup2": return '\u00B2'; 4247 case "sup3": return '\u00B3'; 4248 case "acute": case "DiacriticalAcute": return '\u00B4'; 4249 case "micro": return '\u00B5'; 4250 case "para": return '\u00B6'; 4251 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4252 case "cedil": case "Cedilla": return '\u00B8'; 4253 case "sup1": return '\u00B9'; 4254 case "ordm": return '\u00BA'; 4255 case "raquo": return '\u00BB'; 4256 case "frac14": return '\u00BC'; 4257 case "frac12": case "half": return '\u00BD'; 4258 case "frac34": return '\u00BE'; 4259 case "iquest": return '\u00BF'; 4260 case "Agrave": return '\u00C0'; 4261 case "Aacute": return '\u00C1'; 4262 case "Acirc": return '\u00C2'; 4263 case "Atilde": return '\u00C3'; 4264 case "Auml": return '\u00C4'; 4265 case "Aring": case "angst": return '\u00C5'; 4266 case "AElig": return '\u00C6'; 4267 case "Ccedil": return '\u00C7'; 4268 case "Egrave": return '\u00C8'; 4269 case "Eacute": return '\u00C9'; 4270 case "Ecirc": return '\u00CA'; 4271 case "Euml": return '\u00CB'; 4272 case "Igrave": return '\u00CC'; 4273 case "Iacute": return '\u00CD'; 4274 case "Icirc": return '\u00CE'; 4275 case "Iuml": return '\u00CF'; 4276 case "ETH": return '\u00D0'; 4277 case "Ntilde": return '\u00D1'; 4278 case "Ograve": return '\u00D2'; 4279 case "Oacute": return '\u00D3'; 4280 case "Ocirc": return '\u00D4'; 4281 case "Otilde": return '\u00D5'; 4282 case "Ouml": return '\u00D6'; 4283 case "times": return '\u00D7'; 4284 case "Oslash": return '\u00D8'; 4285 case "Ugrave": return '\u00D9'; 4286 case "Uacute": return '\u00DA'; 4287 case "Ucirc": return '\u00DB'; 4288 case "Uuml": return '\u00DC'; 4289 case "Yacute": return '\u00DD'; 4290 case "THORN": return '\u00DE'; 4291 case "szlig": return '\u00DF'; 4292 case "agrave": return '\u00E0'; 4293 case "aacute": return '\u00E1'; 4294 case "acirc": return '\u00E2'; 4295 case "atilde": return '\u00E3'; 4296 case "auml": return '\u00E4'; 4297 case "aring": return '\u00E5'; 4298 case "aelig": return '\u00E6'; 4299 case "ccedil": return '\u00E7'; 4300 case "egrave": return '\u00E8'; 4301 case "eacute": return '\u00E9'; 4302 case "ecirc": return '\u00EA'; 4303 case "euml": return '\u00EB'; 4304 case "igrave": return '\u00EC'; 4305 case "iacute": return '\u00ED'; 4306 case "icirc": return '\u00EE'; 4307 case "iuml": return '\u00EF'; 4308 case "eth": return '\u00F0'; 4309 case "ntilde": return '\u00F1'; 4310 case "ograve": return '\u00F2'; 4311 case "oacute": return '\u00F3'; 4312 case "ocirc": return '\u00F4'; 4313 case "otilde": return '\u00F5'; 4314 case "ouml": return '\u00F6'; 4315 case "divide": case "div": return '\u00F7'; 4316 case "oslash": return '\u00F8'; 4317 case "ugrave": return '\u00F9'; 4318 case "uacute": return '\u00FA'; 4319 case "ucirc": return '\u00FB'; 4320 case "uuml": return '\u00FC'; 4321 case "yacute": return '\u00FD'; 4322 case "thorn": return '\u00FE'; 4323 case "yuml": return '\u00FF'; 4324 case "Amacr": return '\u0100'; 4325 case "amacr": return '\u0101'; 4326 case "Abreve": return '\u0102'; 4327 case "abreve": return '\u0103'; 4328 case "Aogon": return '\u0104'; 4329 case "aogon": return '\u0105'; 4330 case "Cacute": return '\u0106'; 4331 case "cacute": return '\u0107'; 4332 case "Ccirc": return '\u0108'; 4333 case "ccirc": return '\u0109'; 4334 case "Cdot": return '\u010A'; 4335 case "cdot": return '\u010B'; 4336 case "Ccaron": return '\u010C'; 4337 case "ccaron": return '\u010D'; 4338 case "Dcaron": return '\u010E'; 4339 case "dcaron": return '\u010F'; 4340 case "Dstrok": return '\u0110'; 4341 case "dstrok": return '\u0111'; 4342 case "Emacr": return '\u0112'; 4343 case "emacr": return '\u0113'; 4344 case "Edot": return '\u0116'; 4345 case "edot": return '\u0117'; 4346 case "Eogon": return '\u0118'; 4347 case "eogon": return '\u0119'; 4348 case "Ecaron": return '\u011A'; 4349 case "ecaron": return '\u011B'; 4350 case "Gcirc": return '\u011C'; 4351 case "gcirc": return '\u011D'; 4352 case "Gbreve": return '\u011E'; 4353 case "gbreve": return '\u011F'; 4354 case "Gdot": return '\u0120'; 4355 case "gdot": return '\u0121'; 4356 case "Gcedil": return '\u0122'; 4357 case "Hcirc": return '\u0124'; 4358 case "hcirc": return '\u0125'; 4359 case "Hstrok": return '\u0126'; 4360 case "hstrok": return '\u0127'; 4361 case "Itilde": return '\u0128'; 4362 case "itilde": return '\u0129'; 4363 case "Imacr": return '\u012A'; 4364 case "imacr": return '\u012B'; 4365 case "Iogon": return '\u012E'; 4366 case "iogon": return '\u012F'; 4367 case "Idot": return '\u0130'; 4368 case "imath": case "inodot": return '\u0131'; 4369 case "IJlig": return '\u0132'; 4370 case "ijlig": return '\u0133'; 4371 case "Jcirc": return '\u0134'; 4372 case "jcirc": return '\u0135'; 4373 case "Kcedil": return '\u0136'; 4374 case "kcedil": return '\u0137'; 4375 case "kgreen": return '\u0138'; 4376 case "Lacute": return '\u0139'; 4377 case "lacute": return '\u013A'; 4378 case "Lcedil": return '\u013B'; 4379 case "lcedil": return '\u013C'; 4380 case "Lcaron": return '\u013D'; 4381 case "lcaron": return '\u013E'; 4382 case "Lmidot": return '\u013F'; 4383 case "lmidot": return '\u0140'; 4384 case "Lstrok": return '\u0141'; 4385 case "lstrok": return '\u0142'; 4386 case "Nacute": return '\u0143'; 4387 case "nacute": return '\u0144'; 4388 case "Ncedil": return '\u0145'; 4389 case "ncedil": return '\u0146'; 4390 case "Ncaron": return '\u0147'; 4391 case "ncaron": return '\u0148'; 4392 case "napos": return '\u0149'; 4393 case "ENG": return '\u014A'; 4394 case "eng": return '\u014B'; 4395 case "Omacr": return '\u014C'; 4396 case "omacr": return '\u014D'; 4397 case "Odblac": return '\u0150'; 4398 case "odblac": return '\u0151'; 4399 case "OElig": return '\u0152'; 4400 case "oelig": return '\u0153'; 4401 case "Racute": return '\u0154'; 4402 case "racute": return '\u0155'; 4403 case "Rcedil": return '\u0156'; 4404 case "rcedil": return '\u0157'; 4405 case "Rcaron": return '\u0158'; 4406 case "rcaron": return '\u0159'; 4407 case "Sacute": return '\u015A'; 4408 case "sacute": return '\u015B'; 4409 case "Scirc": return '\u015C'; 4410 case "scirc": return '\u015D'; 4411 case "Scedil": return '\u015E'; 4412 case "scedil": return '\u015F'; 4413 case "Scaron": return '\u0160'; 4414 case "scaron": return '\u0161'; 4415 case "Tcedil": return '\u0162'; 4416 case "tcedil": return '\u0163'; 4417 case "Tcaron": return '\u0164'; 4418 case "tcaron": return '\u0165'; 4419 case "Tstrok": return '\u0166'; 4420 case "tstrok": return '\u0167'; 4421 case "Utilde": return '\u0168'; 4422 case "utilde": return '\u0169'; 4423 case "Umacr": return '\u016A'; 4424 case "umacr": return '\u016B'; 4425 case "Ubreve": return '\u016C'; 4426 case "ubreve": return '\u016D'; 4427 case "Uring": return '\u016E'; 4428 case "uring": return '\u016F'; 4429 case "Udblac": return '\u0170'; 4430 case "udblac": return '\u0171'; 4431 case "Uogon": return '\u0172'; 4432 case "uogon": return '\u0173'; 4433 case "Wcirc": return '\u0174'; 4434 case "wcirc": return '\u0175'; 4435 case "Ycirc": return '\u0176'; 4436 case "ycirc": return '\u0177'; 4437 case "Yuml": return '\u0178'; 4438 case "Zacute": return '\u0179'; 4439 case "zacute": return '\u017A'; 4440 case "Zdot": return '\u017B'; 4441 case "zdot": return '\u017C'; 4442 case "Zcaron": return '\u017D'; 4443 case "zcaron": return '\u017E'; 4444 case "fnof": return '\u0192'; 4445 case "imped": return '\u01B5'; 4446 case "gacute": return '\u01F5'; 4447 case "jmath": return '\u0237'; 4448 case "circ": return '\u02C6'; 4449 case "caron": case "Hacek": return '\u02C7'; 4450 case "breve": case "Breve": return '\u02D8'; 4451 case "dot": case "DiacriticalDot": return '\u02D9'; 4452 case "ring": return '\u02DA'; 4453 case "ogon": return '\u02DB'; 4454 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4455 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4456 case "DownBreve": return '\u0311'; 4457 case "Alpha": return '\u0391'; 4458 case "Beta": return '\u0392'; 4459 case "Gamma": return '\u0393'; 4460 case "Delta": return '\u0394'; 4461 case "Epsilon": return '\u0395'; 4462 case "Zeta": return '\u0396'; 4463 case "Eta": return '\u0397'; 4464 case "Theta": return '\u0398'; 4465 case "Iota": return '\u0399'; 4466 case "Kappa": return '\u039A'; 4467 case "Lambda": return '\u039B'; 4468 case "Mu": return '\u039C'; 4469 case "Nu": return '\u039D'; 4470 case "Xi": return '\u039E'; 4471 case "Omicron": return '\u039F'; 4472 case "Pi": return '\u03A0'; 4473 case "Rho": return '\u03A1'; 4474 case "Sigma": return '\u03A3'; 4475 case "Tau": return '\u03A4'; 4476 case "Upsilon": return '\u03A5'; 4477 case "Phi": return '\u03A6'; 4478 case "Chi": return '\u03A7'; 4479 case "Psi": return '\u03A8'; 4480 case "Omega": case "ohm": return '\u03A9'; 4481 case "alpha": return '\u03B1'; 4482 case "beta": return '\u03B2'; 4483 case "gamma": return '\u03B3'; 4484 case "delta": return '\u03B4'; 4485 case "epsi": case "epsilon": return '\u03B5'; 4486 case "zeta": return '\u03B6'; 4487 case "eta": return '\u03B7'; 4488 case "theta": return '\u03B8'; 4489 case "iota": return '\u03B9'; 4490 case "kappa": return '\u03BA'; 4491 case "lambda": return '\u03BB'; 4492 case "mu": return '\u03BC'; 4493 case "nu": return '\u03BD'; 4494 case "xi": return '\u03BE'; 4495 case "omicron": return '\u03BF'; 4496 case "pi": return '\u03C0'; 4497 case "rho": return '\u03C1'; 4498 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4499 case "sigma": return '\u03C3'; 4500 case "tau": return '\u03C4'; 4501 case "upsi": case "upsilon": return '\u03C5'; 4502 case "phi": return '\u03C6'; 4503 case "chi": return '\u03C7'; 4504 case "psi": return '\u03C8'; 4505 case "omega": return '\u03C9'; 4506 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4507 case "Upsi": case "upsih": return '\u03D2'; 4508 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4509 case "piv": case "varpi": return '\u03D6'; 4510 case "Gammad": return '\u03DC'; 4511 case "gammad": case "digamma": return '\u03DD'; 4512 case "kappav": case "varkappa": return '\u03F0'; 4513 case "rhov": case "varrho": return '\u03F1'; 4514 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4515 case "bepsi": case "backepsilon": return '\u03F6'; 4516 case "IOcy": return '\u0401'; 4517 case "DJcy": return '\u0402'; 4518 case "GJcy": return '\u0403'; 4519 case "Jukcy": return '\u0404'; 4520 case "DScy": return '\u0405'; 4521 case "Iukcy": return '\u0406'; 4522 case "YIcy": return '\u0407'; 4523 case "Jsercy": return '\u0408'; 4524 case "LJcy": return '\u0409'; 4525 case "NJcy": return '\u040A'; 4526 case "TSHcy": return '\u040B'; 4527 case "KJcy": return '\u040C'; 4528 case "Ubrcy": return '\u040E'; 4529 case "DZcy": return '\u040F'; 4530 case "Acy": return '\u0410'; 4531 case "Bcy": return '\u0411'; 4532 case "Vcy": return '\u0412'; 4533 case "Gcy": return '\u0413'; 4534 case "Dcy": return '\u0414'; 4535 case "IEcy": return '\u0415'; 4536 case "ZHcy": return '\u0416'; 4537 case "Zcy": return '\u0417'; 4538 case "Icy": return '\u0418'; 4539 case "Jcy": return '\u0419'; 4540 case "Kcy": return '\u041A'; 4541 case "Lcy": return '\u041B'; 4542 case "Mcy": return '\u041C'; 4543 case "Ncy": return '\u041D'; 4544 case "Ocy": return '\u041E'; 4545 case "Pcy": return '\u041F'; 4546 case "Rcy": return '\u0420'; 4547 case "Scy": return '\u0421'; 4548 case "Tcy": return '\u0422'; 4549 case "Ucy": return '\u0423'; 4550 case "Fcy": return '\u0424'; 4551 case "KHcy": return '\u0425'; 4552 case "TScy": return '\u0426'; 4553 case "CHcy": return '\u0427'; 4554 case "SHcy": return '\u0428'; 4555 case "SHCHcy": return '\u0429'; 4556 case "HARDcy": return '\u042A'; 4557 case "Ycy": return '\u042B'; 4558 case "SOFTcy": return '\u042C'; 4559 case "Ecy": return '\u042D'; 4560 case "YUcy": return '\u042E'; 4561 case "YAcy": return '\u042F'; 4562 case "acy": return '\u0430'; 4563 case "bcy": return '\u0431'; 4564 case "vcy": return '\u0432'; 4565 case "gcy": return '\u0433'; 4566 case "dcy": return '\u0434'; 4567 case "iecy": return '\u0435'; 4568 case "zhcy": return '\u0436'; 4569 case "zcy": return '\u0437'; 4570 case "icy": return '\u0438'; 4571 case "jcy": return '\u0439'; 4572 case "kcy": return '\u043A'; 4573 case "lcy": return '\u043B'; 4574 case "mcy": return '\u043C'; 4575 case "ncy": return '\u043D'; 4576 case "ocy": return '\u043E'; 4577 case "pcy": return '\u043F'; 4578 case "rcy": return '\u0440'; 4579 case "scy": return '\u0441'; 4580 case "tcy": return '\u0442'; 4581 case "ucy": return '\u0443'; 4582 case "fcy": return '\u0444'; 4583 case "khcy": return '\u0445'; 4584 case "tscy": return '\u0446'; 4585 case "chcy": return '\u0447'; 4586 case "shcy": return '\u0448'; 4587 case "shchcy": return '\u0449'; 4588 case "hardcy": return '\u044A'; 4589 case "ycy": return '\u044B'; 4590 case "softcy": return '\u044C'; 4591 case "ecy": return '\u044D'; 4592 case "yucy": return '\u044E'; 4593 case "yacy": return '\u044F'; 4594 case "iocy": return '\u0451'; 4595 case "djcy": return '\u0452'; 4596 case "gjcy": return '\u0453'; 4597 case "jukcy": return '\u0454'; 4598 case "dscy": return '\u0455'; 4599 case "iukcy": return '\u0456'; 4600 case "yicy": return '\u0457'; 4601 case "jsercy": return '\u0458'; 4602 case "ljcy": return '\u0459'; 4603 case "njcy": return '\u045A'; 4604 case "tshcy": return '\u045B'; 4605 case "kjcy": return '\u045C'; 4606 case "ubrcy": return '\u045E'; 4607 case "dzcy": return '\u045F'; 4608 case "ensp": return '\u2002'; 4609 case "emsp": return '\u2003'; 4610 case "emsp13": return '\u2004'; 4611 case "emsp14": return '\u2005'; 4612 case "numsp": return '\u2007'; 4613 case "puncsp": return '\u2008'; 4614 case "thinsp": case "ThinSpace": return '\u2009'; 4615 case "hairsp": case "VeryThinSpace": return '\u200A'; 4616 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4617 case "zwnj": return '\u200C'; 4618 case "zwj": return '\u200D'; 4619 case "lrm": return '\u200E'; 4620 case "rlm": return '\u200F'; 4621 case "hyphen": case "dash": return '\u2010'; 4622 case "ndash": return '\u2013'; 4623 case "mdash": return '\u2014'; 4624 case "horbar": return '\u2015'; 4625 case "Verbar": case "Vert": return '\u2016'; 4626 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4627 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4628 case "lsquor": case "sbquo": return '\u201A'; 4629 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4630 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4631 case "ldquor": case "bdquo": return '\u201E'; 4632 case "dagger": return '\u2020'; 4633 case "Dagger": case "ddagger": return '\u2021'; 4634 case "bull": case "bullet": return '\u2022'; 4635 case "nldr": return '\u2025'; 4636 case "hellip": case "mldr": return '\u2026'; 4637 case "permil": return '\u2030'; 4638 case "pertenk": return '\u2031'; 4639 case "prime": return '\u2032'; 4640 case "Prime": return '\u2033'; 4641 case "tprime": return '\u2034'; 4642 case "bprime": case "backprime": return '\u2035'; 4643 case "lsaquo": return '\u2039'; 4644 case "rsaquo": return '\u203A'; 4645 case "oline": case "OverBar": return '\u203E'; 4646 case "caret": return '\u2041'; 4647 case "hybull": return '\u2043'; 4648 case "frasl": return '\u2044'; 4649 case "bsemi": return '\u204F'; 4650 case "qprime": return '\u2057'; 4651 case "MediumSpace": return '\u205F'; 4652 case "NoBreak": return '\u2060'; 4653 case "ApplyFunction": case "af": return '\u2061'; 4654 case "InvisibleTimes": case "it": return '\u2062'; 4655 case "InvisibleComma": case "ic": return '\u2063'; 4656 case "euro": return '\u20AC'; 4657 case "tdot": case "TripleDot": return '\u20DB'; 4658 case "DotDot": return '\u20DC'; 4659 case "Copf": case "complexes": return '\u2102'; 4660 case "incare": return '\u2105'; 4661 case "gscr": return '\u210A'; 4662 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4663 case "Hfr": case "Poincareplane": return '\u210C'; 4664 case "quaternions": case "Hopf": return '\u210D'; 4665 case "planckh": return '\u210E'; 4666 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4667 case "Iscr": case "imagline": return '\u2110'; 4668 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4669 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4670 case "ell": return '\u2113'; 4671 case "Nopf": case "naturals": return '\u2115'; 4672 case "numero": return '\u2116'; 4673 case "copysr": return '\u2117'; 4674 case "weierp": case "wp": return '\u2118'; 4675 case "Popf": case "primes": return '\u2119'; 4676 case "rationals": case "Qopf": return '\u211A'; 4677 case "Rscr": case "realine": return '\u211B'; 4678 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4679 case "reals": case "Ropf": return '\u211D'; 4680 case "rx": return '\u211E'; 4681 case "trade": case "TRADE": return '\u2122'; 4682 case "integers": case "Zopf": return '\u2124'; 4683 case "mho": return '\u2127'; 4684 case "Zfr": case "zeetrf": return '\u2128'; 4685 case "iiota": return '\u2129'; 4686 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4687 case "Cfr": case "Cayleys": return '\u212D'; 4688 case "escr": return '\u212F'; 4689 case "Escr": case "expectation": return '\u2130'; 4690 case "Fscr": case "Fouriertrf": return '\u2131'; 4691 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4692 case "order": case "orderof": case "oscr": return '\u2134'; 4693 case "alefsym": case "aleph": return '\u2135'; 4694 case "beth": return '\u2136'; 4695 case "gimel": return '\u2137'; 4696 case "daleth": return '\u2138'; 4697 case "CapitalDifferentialD": case "DD": return '\u2145'; 4698 case "DifferentialD": case "dd": return '\u2146'; 4699 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4700 case "ImaginaryI": case "ii": return '\u2148'; 4701 case "frac13": return '\u2153'; 4702 case "frac23": return '\u2154'; 4703 case "frac15": return '\u2155'; 4704 case "frac25": return '\u2156'; 4705 case "frac35": return '\u2157'; 4706 case "frac45": return '\u2158'; 4707 case "frac16": return '\u2159'; 4708 case "frac56": return '\u215A'; 4709 case "frac18": return '\u215B'; 4710 case "frac38": return '\u215C'; 4711 case "frac58": return '\u215D'; 4712 case "frac78": return '\u215E'; 4713 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4714 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4715 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4716 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4717 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4718 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4719 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4720 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4721 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4722 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4723 case "nlarr": case "nleftarrow": return '\u219A'; 4724 case "nrarr": case "nrightarrow": return '\u219B'; 4725 case "rarrw": case "rightsquigarrow": return '\u219D'; 4726 case "Larr": case "twoheadleftarrow": return '\u219E'; 4727 case "Uarr": return '\u219F'; 4728 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4729 case "Darr": return '\u21A1'; 4730 case "larrtl": case "leftarrowtail": return '\u21A2'; 4731 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4732 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4733 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4734 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4735 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4736 case "larrhk": case "hookleftarrow": return '\u21A9'; 4737 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4738 case "larrlp": case "looparrowleft": return '\u21AB'; 4739 case "rarrlp": case "looparrowright": return '\u21AC'; 4740 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4741 case "nharr": case "nleftrightarrow": return '\u21AE'; 4742 case "lsh": case "Lsh": return '\u21B0'; 4743 case "rsh": case "Rsh": return '\u21B1'; 4744 case "ldsh": return '\u21B2'; 4745 case "rdsh": return '\u21B3'; 4746 case "crarr": return '\u21B5'; 4747 case "cularr": case "curvearrowleft": return '\u21B6'; 4748 case "curarr": case "curvearrowright": return '\u21B7'; 4749 case "olarr": case "circlearrowleft": return '\u21BA'; 4750 case "orarr": case "circlearrowright": return '\u21BB'; 4751 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4752 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4753 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4754 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4755 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4756 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4757 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4758 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4759 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4760 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4761 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4762 case "llarr": case "leftleftarrows": return '\u21C7'; 4763 case "uuarr": case "upuparrows": return '\u21C8'; 4764 case "rrarr": case "rightrightarrows": return '\u21C9'; 4765 case "ddarr": case "downdownarrows": return '\u21CA'; 4766 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4767 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4768 case "nlArr": case "nLeftarrow": return '\u21CD'; 4769 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4770 case "nrArr": case "nRightarrow": return '\u21CF'; 4771 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4772 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4773 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4774 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4775 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4776 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4777 case "nwArr": return '\u21D6'; 4778 case "neArr": return '\u21D7'; 4779 case "seArr": return '\u21D8'; 4780 case "swArr": return '\u21D9'; 4781 case "lAarr": case "Lleftarrow": return '\u21DA'; 4782 case "rAarr": case "Rrightarrow": return '\u21DB'; 4783 case "zigrarr": return '\u21DD'; 4784 case "larrb": case "LeftArrowBar": return '\u21E4'; 4785 case "rarrb": case "RightArrowBar": return '\u21E5'; 4786 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4787 case "loarr": return '\u21FD'; 4788 case "roarr": return '\u21FE'; 4789 case "hoarr": return '\u21FF'; 4790 case "forall": case "ForAll": return '\u2200'; 4791 case "comp": case "complement": return '\u2201'; 4792 case "part": case "PartialD": return '\u2202'; 4793 case "exist": case "Exists": return '\u2203'; 4794 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4795 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4796 case "nabla": case "Del": return '\u2207'; 4797 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4798 case "notin": case "NotElement": case "notinva": return '\u2209'; 4799 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4800 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4801 case "prod": case "Product": return '\u220F'; 4802 case "coprod": case "Coproduct": return '\u2210'; 4803 case "sum": case "Sum": return '\u2211'; 4804 case "minus": return '\u2212'; 4805 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4806 case "plusdo": case "dotplus": return '\u2214'; 4807 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4808 case "lowast": return '\u2217'; 4809 case "compfn": case "SmallCircle": return '\u2218'; 4810 case "radic": case "Sqrt": return '\u221A'; 4811 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4812 case "infin": return '\u221E'; 4813 case "angrt": return '\u221F'; 4814 case "ang": case "angle": return '\u2220'; 4815 case "angmsd": case "measuredangle": return '\u2221'; 4816 case "angsph": return '\u2222'; 4817 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4818 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4819 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4820 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4821 case "and": case "wedge": return '\u2227'; 4822 case "or": case "vee": return '\u2228'; 4823 case "cap": return '\u2229'; 4824 case "cup": return '\u222A'; 4825 case "int": case "Integral": return '\u222B'; 4826 case "Int": return '\u222C'; 4827 case "tint": case "iiint": return '\u222D'; 4828 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4829 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4830 case "Cconint": return '\u2230'; 4831 case "cwint": return '\u2231'; 4832 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4833 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4834 case "there4": case "therefore": case "Therefore": return '\u2234'; 4835 case "becaus": case "because": case "Because": return '\u2235'; 4836 case "ratio": return '\u2236'; 4837 case "Colon": case "Proportion": return '\u2237'; 4838 case "minusd": case "dotminus": return '\u2238'; 4839 case "mDDot": return '\u223A'; 4840 case "homtht": return '\u223B'; 4841 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4842 case "bsim": case "backsim": return '\u223D'; 4843 case "ac": case "mstpos": return '\u223E'; 4844 case "acd": return '\u223F'; 4845 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4846 case "nsim": case "NotTilde": return '\u2241'; 4847 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4848 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4849 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4850 case "cong": case "TildeFullEqual": return '\u2245'; 4851 case "simne": return '\u2246'; 4852 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4853 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4854 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4855 case "ape": case "approxeq": return '\u224A'; 4856 case "apid": return '\u224B'; 4857 case "bcong": case "backcong": return '\u224C'; 4858 case "asympeq": case "CupCap": return '\u224D'; 4859 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4860 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4861 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4862 case "eDot": case "doteqdot": return '\u2251'; 4863 case "efDot": case "fallingdotseq": return '\u2252'; 4864 case "erDot": case "risingdotseq": return '\u2253'; 4865 case "colone": case "coloneq": case "Assign": return '\u2254'; 4866 case "ecolon": case "eqcolon": return '\u2255'; 4867 case "ecir": case "eqcirc": return '\u2256'; 4868 case "cire": case "circeq": return '\u2257'; 4869 case "wedgeq": return '\u2259'; 4870 case "veeeq": return '\u225A'; 4871 case "trie": case "triangleq": return '\u225C'; 4872 case "equest": case "questeq": return '\u225F'; 4873 case "ne": case "NotEqual": return '\u2260'; 4874 case "equiv": case "Congruent": return '\u2261'; 4875 case "nequiv": case "NotCongruent": return '\u2262'; 4876 case "le": case "leq": return '\u2264'; 4877 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4878 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4879 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4880 case "lnE": case "lneqq": return '\u2268'; 4881 case "gnE": case "gneqq": return '\u2269'; 4882 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4883 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4884 case "twixt": case "between": return '\u226C'; 4885 case "NotCupCap": return '\u226D'; 4886 case "nlt": case "NotLess": case "nless": return '\u226E'; 4887 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4888 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4889 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4890 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4891 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4892 case "nlsim": case "NotLessTilde": return '\u2274'; 4893 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4894 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4895 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4896 case "ntlg": case "NotLessGreater": return '\u2278'; 4897 case "ntgl": case "NotGreaterLess": return '\u2279'; 4898 case "pr": case "Precedes": case "prec": return '\u227A'; 4899 case "sc": case "Succeeds": case "succ": return '\u227B'; 4900 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4901 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4902 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4903 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4904 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4905 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4906 case "sub": case "subset": return '\u2282'; 4907 case "sup": case "supset": case "Superset": return '\u2283'; 4908 case "nsub": return '\u2284'; 4909 case "nsup": return '\u2285'; 4910 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4911 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4912 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4913 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4914 case "subne": case "subsetneq": return '\u228A'; 4915 case "supne": case "supsetneq": return '\u228B'; 4916 case "cupdot": return '\u228D'; 4917 case "uplus": case "UnionPlus": return '\u228E'; 4918 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4919 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4920 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4921 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4922 case "sqcap": case "SquareIntersection": return '\u2293'; 4923 case "sqcup": case "SquareUnion": return '\u2294'; 4924 case "oplus": case "CirclePlus": return '\u2295'; 4925 case "ominus": case "CircleMinus": return '\u2296'; 4926 case "otimes": case "CircleTimes": return '\u2297'; 4927 case "osol": return '\u2298'; 4928 case "odot": case "CircleDot": return '\u2299'; 4929 case "ocir": case "circledcirc": return '\u229A'; 4930 case "oast": case "circledast": return '\u229B'; 4931 case "odash": case "circleddash": return '\u229D'; 4932 case "plusb": case "boxplus": return '\u229E'; 4933 case "minusb": case "boxminus": return '\u229F'; 4934 case "timesb": case "boxtimes": return '\u22A0'; 4935 case "sdotb": case "dotsquare": return '\u22A1'; 4936 case "vdash": case "RightTee": return '\u22A2'; 4937 case "dashv": case "LeftTee": return '\u22A3'; 4938 case "top": case "DownTee": return '\u22A4'; 4939 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4940 case "models": return '\u22A7'; 4941 case "vDash": case "DoubleRightTee": return '\u22A8'; 4942 case "Vdash": return '\u22A9'; 4943 case "Vvdash": return '\u22AA'; 4944 case "VDash": return '\u22AB'; 4945 case "nvdash": return '\u22AC'; 4946 case "nvDash": return '\u22AD'; 4947 case "nVdash": return '\u22AE'; 4948 case "nVDash": return '\u22AF'; 4949 case "prurel": return '\u22B0'; 4950 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4951 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4952 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4953 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4954 case "origof": return '\u22B6'; 4955 case "imof": return '\u22B7'; 4956 case "mumap": case "multimap": return '\u22B8'; 4957 case "hercon": return '\u22B9'; 4958 case "intcal": case "intercal": return '\u22BA'; 4959 case "veebar": return '\u22BB'; 4960 case "barvee": return '\u22BD'; 4961 case "angrtvb": return '\u22BE'; 4962 case "lrtri": return '\u22BF'; 4963 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4964 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4965 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4966 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4967 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4968 case "sdot": return '\u22C5'; 4969 case "sstarf": case "Star": return '\u22C6'; 4970 case "divonx": case "divideontimes": return '\u22C7'; 4971 case "bowtie": return '\u22C8'; 4972 case "ltimes": return '\u22C9'; 4973 case "rtimes": return '\u22CA'; 4974 case "lthree": case "leftthreetimes": return '\u22CB'; 4975 case "rthree": case "rightthreetimes": return '\u22CC'; 4976 case "bsime": case "backsimeq": return '\u22CD'; 4977 case "cuvee": case "curlyvee": return '\u22CE'; 4978 case "cuwed": case "curlywedge": return '\u22CF'; 4979 case "Sub": case "Subset": return '\u22D0'; 4980 case "Sup": case "Supset": return '\u22D1'; 4981 case "Cap": return '\u22D2'; 4982 case "Cup": return '\u22D3'; 4983 case "fork": case "pitchfork": return '\u22D4'; 4984 case "epar": return '\u22D5'; 4985 case "ltdot": case "lessdot": return '\u22D6'; 4986 case "gtdot": case "gtrdot": return '\u22D7'; 4987 case "Ll": return '\u22D8'; 4988 case "Gg": case "ggg": return '\u22D9'; 4989 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4990 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4991 case "cuepr": case "curlyeqprec": return '\u22DE'; 4992 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4993 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4994 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4995 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4996 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4997 case "lnsim": return '\u22E6'; 4998 case "gnsim": return '\u22E7'; 4999 case "prnsim": case "precnsim": return '\u22E8'; 5000 case "scnsim": case "succnsim": return '\u22E9'; 5001 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 5002 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 5003 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 5004 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 5005 case "vellip": return '\u22EE'; 5006 case "ctdot": return '\u22EF'; 5007 case "utdot": return '\u22F0'; 5008 case "dtdot": return '\u22F1'; 5009 case "disin": return '\u22F2'; 5010 case "isinsv": return '\u22F3'; 5011 case "isins": return '\u22F4'; 5012 case "isindot": return '\u22F5'; 5013 case "notinvc": return '\u22F6'; 5014 case "notinvb": return '\u22F7'; 5015 case "isinE": return '\u22F9'; 5016 case "nisd": return '\u22FA'; 5017 case "xnis": return '\u22FB'; 5018 case "nis": return '\u22FC'; 5019 case "notnivc": return '\u22FD'; 5020 case "notnivb": return '\u22FE'; 5021 case "barwed": case "barwedge": return '\u2305'; 5022 case "Barwed": case "doublebarwedge": return '\u2306'; 5023 case "lceil": case "LeftCeiling": return '\u2308'; 5024 case "rceil": case "RightCeiling": return '\u2309'; 5025 case "lfloor": case "LeftFloor": return '\u230A'; 5026 case "rfloor": case "RightFloor": return '\u230B'; 5027 case "drcrop": return '\u230C'; 5028 case "dlcrop": return '\u230D'; 5029 case "urcrop": return '\u230E'; 5030 case "ulcrop": return '\u230F'; 5031 case "bnot": return '\u2310'; 5032 case "profline": return '\u2312'; 5033 case "profsurf": return '\u2313'; 5034 case "telrec": return '\u2315'; 5035 case "target": return '\u2316'; 5036 case "ulcorn": case "ulcorner": return '\u231C'; 5037 case "urcorn": case "urcorner": return '\u231D'; 5038 case "dlcorn": case "llcorner": return '\u231E'; 5039 case "drcorn": case "lrcorner": return '\u231F'; 5040 case "frown": case "sfrown": return '\u2322'; 5041 case "smile": case "ssmile": return '\u2323'; 5042 case "cylcty": return '\u232D'; 5043 case "profalar": return '\u232E'; 5044 case "topbot": return '\u2336'; 5045 case "ovbar": return '\u233D'; 5046 case "solbar": return '\u233F'; 5047 case "angzarr": return '\u237C'; 5048 case "lmoust": case "lmoustache": return '\u23B0'; 5049 case "rmoust": case "rmoustache": return '\u23B1'; 5050 case "tbrk": case "OverBracket": return '\u23B4'; 5051 case "bbrk": case "UnderBracket": return '\u23B5'; 5052 case "bbrktbrk": return '\u23B6'; 5053 case "OverParenthesis": return '\u23DC'; 5054 case "UnderParenthesis": return '\u23DD'; 5055 case "OverBrace": return '\u23DE'; 5056 case "UnderBrace": return '\u23DF'; 5057 case "trpezium": return '\u23E2'; 5058 case "elinters": return '\u23E7'; 5059 case "blank": return '\u2423'; 5060 case "oS": case "circledS": return '\u24C8'; 5061 case "boxh": case "HorizontalLine": return '\u2500'; 5062 case "boxv": return '\u2502'; 5063 case "boxdr": return '\u250C'; 5064 case "boxdl": return '\u2510'; 5065 case "boxur": return '\u2514'; 5066 case "boxul": return '\u2518'; 5067 case "boxvr": return '\u251C'; 5068 case "boxvl": return '\u2524'; 5069 case "boxhd": return '\u252C'; 5070 case "boxhu": return '\u2534'; 5071 case "boxvh": return '\u253C'; 5072 case "boxH": return '\u2550'; 5073 case "boxV": return '\u2551'; 5074 case "boxdR": return '\u2552'; 5075 case "boxDr": return '\u2553'; 5076 case "boxDR": return '\u2554'; 5077 case "boxdL": return '\u2555'; 5078 case "boxDl": return '\u2556'; 5079 case "boxDL": return '\u2557'; 5080 case "boxuR": return '\u2558'; 5081 case "boxUr": return '\u2559'; 5082 case "boxUR": return '\u255A'; 5083 case "boxuL": return '\u255B'; 5084 case "boxUl": return '\u255C'; 5085 case "boxUL": return '\u255D'; 5086 case "boxvR": return '\u255E'; 5087 case "boxVr": return '\u255F'; 5088 case "boxVR": return '\u2560'; 5089 case "boxvL": return '\u2561'; 5090 case "boxVl": return '\u2562'; 5091 case "boxVL": return '\u2563'; 5092 case "boxHd": return '\u2564'; 5093 case "boxhD": return '\u2565'; 5094 case "boxHD": return '\u2566'; 5095 case "boxHu": return '\u2567'; 5096 case "boxhU": return '\u2568'; 5097 case "boxHU": return '\u2569'; 5098 case "boxvH": return '\u256A'; 5099 case "boxVh": return '\u256B'; 5100 case "boxVH": return '\u256C'; 5101 case "uhblk": return '\u2580'; 5102 case "lhblk": return '\u2584'; 5103 case "block": return '\u2588'; 5104 case "blk14": return '\u2591'; 5105 case "blk12": return '\u2592'; 5106 case "blk34": return '\u2593'; 5107 case "squ": case "square": case "Square": return '\u25A1'; 5108 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 5109 case "EmptyVerySmallSquare": return '\u25AB'; 5110 case "rect": return '\u25AD'; 5111 case "marker": return '\u25AE'; 5112 case "fltns": return '\u25B1'; 5113 case "xutri": case "bigtriangleup": return '\u25B3'; 5114 case "utrif": case "blacktriangle": return '\u25B4'; 5115 case "utri": case "triangle": return '\u25B5'; 5116 case "rtrif": case "blacktriangleright": return '\u25B8'; 5117 case "rtri": case "triangleright": return '\u25B9'; 5118 case "xdtri": case "bigtriangledown": return '\u25BD'; 5119 case "dtrif": case "blacktriangledown": return '\u25BE'; 5120 case "dtri": case "triangledown": return '\u25BF'; 5121 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5122 case "ltri": case "triangleleft": return '\u25C3'; 5123 case "loz": case "lozenge": return '\u25CA'; 5124 case "cir": return '\u25CB'; 5125 case "tridot": return '\u25EC'; 5126 case "xcirc": case "bigcirc": return '\u25EF'; 5127 case "ultri": return '\u25F8'; 5128 case "urtri": return '\u25F9'; 5129 case "lltri": return '\u25FA'; 5130 case "EmptySmallSquare": return '\u25FB'; 5131 case "FilledSmallSquare": return '\u25FC'; 5132 case "starf": case "bigstar": return '\u2605'; 5133 case "star": return '\u2606'; 5134 case "phone": return '\u260E'; 5135 case "female": return '\u2640'; 5136 case "male": return '\u2642'; 5137 case "spades": case "spadesuit": return '\u2660'; 5138 case "clubs": case "clubsuit": return '\u2663'; 5139 case "hearts": case "heartsuit": return '\u2665'; 5140 case "diams": case "diamondsuit": return '\u2666'; 5141 case "sung": return '\u266A'; 5142 case "flat": return '\u266D'; 5143 case "natur": case "natural": return '\u266E'; 5144 case "sharp": return '\u266F'; 5145 case "check": case "checkmark": return '\u2713'; 5146 case "cross": return '\u2717'; 5147 case "malt": case "maltese": return '\u2720'; 5148 case "sext": return '\u2736'; 5149 case "VerticalSeparator": return '\u2758'; 5150 case "lbbrk": return '\u2772'; 5151 case "rbbrk": return '\u2773'; 5152 case "bsolhsub": return '\u27C8'; 5153 case "suphsol": return '\u27C9'; 5154 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5155 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5156 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5157 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5158 case "Lang": return '\u27EA'; 5159 case "Rang": return '\u27EB'; 5160 case "loang": return '\u27EC'; 5161 case "roang": return '\u27ED'; 5162 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5163 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5164 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5165 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5166 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5167 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5168 case "xmap": case "longmapsto": return '\u27FC'; 5169 case "dzigrarr": return '\u27FF'; 5170 case "nvlArr": return '\u2902'; 5171 case "nvrArr": return '\u2903'; 5172 case "nvHarr": return '\u2904'; 5173 case "Map": return '\u2905'; 5174 case "lbarr": return '\u290C'; 5175 case "rbarr": case "bkarow": return '\u290D'; 5176 case "lBarr": return '\u290E'; 5177 case "rBarr": case "dbkarow": return '\u290F'; 5178 case "RBarr": case "drbkarow": return '\u2910'; 5179 case "DDotrahd": return '\u2911'; 5180 case "UpArrowBar": return '\u2912'; 5181 case "DownArrowBar": return '\u2913'; 5182 case "Rarrtl": return '\u2916'; 5183 case "latail": return '\u2919'; 5184 case "ratail": return '\u291A'; 5185 case "lAtail": return '\u291B'; 5186 case "rAtail": return '\u291C'; 5187 case "larrfs": return '\u291D'; 5188 case "rarrfs": return '\u291E'; 5189 case "larrbfs": return '\u291F'; 5190 case "rarrbfs": return '\u2920'; 5191 case "nwarhk": return '\u2923'; 5192 case "nearhk": return '\u2924'; 5193 case "searhk": case "hksearow": return '\u2925'; 5194 case "swarhk": case "hkswarow": return '\u2926'; 5195 case "nwnear": return '\u2927'; 5196 case "nesear": case "toea": return '\u2928'; 5197 case "seswar": case "tosa": return '\u2929'; 5198 case "swnwar": return '\u292A'; 5199 case "rarrc": return '\u2933'; 5200 case "cudarrr": return '\u2935'; 5201 case "ldca": return '\u2936'; 5202 case "rdca": return '\u2937'; 5203 case "cudarrl": return '\u2938'; 5204 case "larrpl": return '\u2939'; 5205 case "curarrm": return '\u293C'; 5206 case "cularrp": return '\u293D'; 5207 case "rarrpl": return '\u2945'; 5208 case "harrcir": return '\u2948'; 5209 case "Uarrocir": return '\u2949'; 5210 case "lurdshar": return '\u294A'; 5211 case "ldrushar": return '\u294B'; 5212 case "LeftRightVector": return '\u294E'; 5213 case "RightUpDownVector": return '\u294F'; 5214 case "DownLeftRightVector": return '\u2950'; 5215 case "LeftUpDownVector": return '\u2951'; 5216 case "LeftVectorBar": return '\u2952'; 5217 case "RightVectorBar": return '\u2953'; 5218 case "RightUpVectorBar": return '\u2954'; 5219 case "RightDownVectorBar": return '\u2955'; 5220 case "DownLeftVectorBar": return '\u2956'; 5221 case "DownRightVectorBar": return '\u2957'; 5222 case "LeftUpVectorBar": return '\u2958'; 5223 case "LeftDownVectorBar": return '\u2959'; 5224 case "LeftTeeVector": return '\u295A'; 5225 case "RightTeeVector": return '\u295B'; 5226 case "RightUpTeeVector": return '\u295C'; 5227 case "RightDownTeeVector": return '\u295D'; 5228 case "DownLeftTeeVector": return '\u295E'; 5229 case "DownRightTeeVector": return '\u295F'; 5230 case "LeftUpTeeVector": return '\u2960'; 5231 case "LeftDownTeeVector": return '\u2961'; 5232 case "lHar": return '\u2962'; 5233 case "uHar": return '\u2963'; 5234 case "rHar": return '\u2964'; 5235 case "dHar": return '\u2965'; 5236 case "luruhar": return '\u2966'; 5237 case "ldrdhar": return '\u2967'; 5238 case "ruluhar": return '\u2968'; 5239 case "rdldhar": return '\u2969'; 5240 case "lharul": return '\u296A'; 5241 case "llhard": return '\u296B'; 5242 case "rharul": return '\u296C'; 5243 case "lrhard": return '\u296D'; 5244 case "udhar": case "UpEquilibrium": return '\u296E'; 5245 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5246 case "RoundImplies": return '\u2970'; 5247 case "erarr": return '\u2971'; 5248 case "simrarr": return '\u2972'; 5249 case "larrsim": return '\u2973'; 5250 case "rarrsim": return '\u2974'; 5251 case "rarrap": return '\u2975'; 5252 case "ltlarr": return '\u2976'; 5253 case "gtrarr": return '\u2978'; 5254 case "subrarr": return '\u2979'; 5255 case "suplarr": return '\u297B'; 5256 case "lfisht": return '\u297C'; 5257 case "rfisht": return '\u297D'; 5258 case "ufisht": return '\u297E'; 5259 case "dfisht": return '\u297F'; 5260 case "lopar": return '\u2985'; 5261 case "ropar": return '\u2986'; 5262 case "lbrke": return '\u298B'; 5263 case "rbrke": return '\u298C'; 5264 case "lbrkslu": return '\u298D'; 5265 case "rbrksld": return '\u298E'; 5266 case "lbrksld": return '\u298F'; 5267 case "rbrkslu": return '\u2990'; 5268 case "langd": return '\u2991'; 5269 case "rangd": return '\u2992'; 5270 case "lparlt": return '\u2993'; 5271 case "rpargt": return '\u2994'; 5272 case "gtlPar": return '\u2995'; 5273 case "ltrPar": return '\u2996'; 5274 case "vzigzag": return '\u299A'; 5275 case "vangrt": return '\u299C'; 5276 case "angrtvbd": return '\u299D'; 5277 case "ange": return '\u29A4'; 5278 case "range": return '\u29A5'; 5279 case "dwangle": return '\u29A6'; 5280 case "uwangle": return '\u29A7'; 5281 case "angmsdaa": return '\u29A8'; 5282 case "angmsdab": return '\u29A9'; 5283 case "angmsdac": return '\u29AA'; 5284 case "angmsdad": return '\u29AB'; 5285 case "angmsdae": return '\u29AC'; 5286 case "angmsdaf": return '\u29AD'; 5287 case "angmsdag": return '\u29AE'; 5288 case "angmsdah": return '\u29AF'; 5289 case "bemptyv": return '\u29B0'; 5290 case "demptyv": return '\u29B1'; 5291 case "cemptyv": return '\u29B2'; 5292 case "raemptyv": return '\u29B3'; 5293 case "laemptyv": return '\u29B4'; 5294 case "ohbar": return '\u29B5'; 5295 case "omid": return '\u29B6'; 5296 case "opar": return '\u29B7'; 5297 case "operp": return '\u29B9'; 5298 case "olcross": return '\u29BB'; 5299 case "odsold": return '\u29BC'; 5300 case "olcir": return '\u29BE'; 5301 case "ofcir": return '\u29BF'; 5302 case "olt": return '\u29C0'; 5303 case "ogt": return '\u29C1'; 5304 case "cirscir": return '\u29C2'; 5305 case "cirE": return '\u29C3'; 5306 case "solb": return '\u29C4'; 5307 case "bsolb": return '\u29C5'; 5308 case "boxbox": return '\u29C9'; 5309 case "trisb": return '\u29CD'; 5310 case "rtriltri": return '\u29CE'; 5311 case "LeftTriangleBar": return '\u29CF'; 5312 case "RightTriangleBar": return '\u29D0'; 5313 case "iinfin": return '\u29DC'; 5314 case "infintie": return '\u29DD'; 5315 case "nvinfin": return '\u29DE'; 5316 case "eparsl": return '\u29E3'; 5317 case "smeparsl": return '\u29E4'; 5318 case "eqvparsl": return '\u29E5'; 5319 case "lozf": case "blacklozenge": return '\u29EB'; 5320 case "RuleDelayed": return '\u29F4'; 5321 case "dsol": return '\u29F6'; 5322 case "xodot": case "bigodot": return '\u2A00'; 5323 case "xoplus": case "bigoplus": return '\u2A01'; 5324 case "xotime": case "bigotimes": return '\u2A02'; 5325 case "xuplus": case "biguplus": return '\u2A04'; 5326 case "xsqcup": case "bigsqcup": return '\u2A06'; 5327 case "qint": case "iiiint": return '\u2A0C'; 5328 case "fpartint": return '\u2A0D'; 5329 case "cirfnint": return '\u2A10'; 5330 case "awint": return '\u2A11'; 5331 case "rppolint": return '\u2A12'; 5332 case "scpolint": return '\u2A13'; 5333 case "npolint": return '\u2A14'; 5334 case "pointint": return '\u2A15'; 5335 case "quatint": return '\u2A16'; 5336 case "intlarhk": return '\u2A17'; 5337 case "pluscir": return '\u2A22'; 5338 case "plusacir": return '\u2A23'; 5339 case "simplus": return '\u2A24'; 5340 case "plusdu": return '\u2A25'; 5341 case "plussim": return '\u2A26'; 5342 case "plustwo": return '\u2A27'; 5343 case "mcomma": return '\u2A29'; 5344 case "minusdu": return '\u2A2A'; 5345 case "loplus": return '\u2A2D'; 5346 case "roplus": return '\u2A2E'; 5347 case "Cross": return '\u2A2F'; 5348 case "timesd": return '\u2A30'; 5349 case "timesbar": return '\u2A31'; 5350 case "smashp": return '\u2A33'; 5351 case "lotimes": return '\u2A34'; 5352 case "rotimes": return '\u2A35'; 5353 case "otimesas": return '\u2A36'; 5354 case "Otimes": return '\u2A37'; 5355 case "odiv": return '\u2A38'; 5356 case "triplus": return '\u2A39'; 5357 case "triminus": return '\u2A3A'; 5358 case "tritime": return '\u2A3B'; 5359 case "iprod": case "intprod": return '\u2A3C'; 5360 case "amalg": return '\u2A3F'; 5361 case "capdot": return '\u2A40'; 5362 case "ncup": return '\u2A42'; 5363 case "ncap": return '\u2A43'; 5364 case "capand": return '\u2A44'; 5365 case "cupor": return '\u2A45'; 5366 case "cupcap": return '\u2A46'; 5367 case "capcup": return '\u2A47'; 5368 case "cupbrcap": return '\u2A48'; 5369 case "capbrcup": return '\u2A49'; 5370 case "cupcup": return '\u2A4A'; 5371 case "capcap": return '\u2A4B'; 5372 case "ccups": return '\u2A4C'; 5373 case "ccaps": return '\u2A4D'; 5374 case "ccupssm": return '\u2A50'; 5375 case "And": return '\u2A53'; 5376 case "Or": return '\u2A54'; 5377 case "andand": return '\u2A55'; 5378 case "oror": return '\u2A56'; 5379 case "orslope": return '\u2A57'; 5380 case "andslope": return '\u2A58'; 5381 case "andv": return '\u2A5A'; 5382 case "orv": return '\u2A5B'; 5383 case "andd": return '\u2A5C'; 5384 case "ord": return '\u2A5D'; 5385 case "wedbar": return '\u2A5F'; 5386 case "sdote": return '\u2A66'; 5387 case "simdot": return '\u2A6A'; 5388 case "congdot": return '\u2A6D'; 5389 case "easter": return '\u2A6E'; 5390 case "apacir": return '\u2A6F'; 5391 case "apE": return '\u2A70'; 5392 case "eplus": return '\u2A71'; 5393 case "pluse": return '\u2A72'; 5394 case "Esim": return '\u2A73'; 5395 case "Colone": return '\u2A74'; 5396 case "Equal": return '\u2A75'; 5397 case "eDDot": case "ddotseq": return '\u2A77'; 5398 case "equivDD": return '\u2A78'; 5399 case "ltcir": return '\u2A79'; 5400 case "gtcir": return '\u2A7A'; 5401 case "ltquest": return '\u2A7B'; 5402 case "gtquest": return '\u2A7C'; 5403 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5404 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5405 case "lesdot": return '\u2A7F'; 5406 case "gesdot": return '\u2A80'; 5407 case "lesdoto": return '\u2A81'; 5408 case "gesdoto": return '\u2A82'; 5409 case "lesdotor": return '\u2A83'; 5410 case "gesdotol": return '\u2A84'; 5411 case "lap": case "lessapprox": return '\u2A85'; 5412 case "gap": case "gtrapprox": return '\u2A86'; 5413 case "lne": case "lneq": return '\u2A87'; 5414 case "gne": case "gneq": return '\u2A88'; 5415 case "lnap": case "lnapprox": return '\u2A89'; 5416 case "gnap": case "gnapprox": return '\u2A8A'; 5417 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5418 case "gEl": case "gtreqqless": return '\u2A8C'; 5419 case "lsime": return '\u2A8D'; 5420 case "gsime": return '\u2A8E'; 5421 case "lsimg": return '\u2A8F'; 5422 case "gsiml": return '\u2A90'; 5423 case "lgE": return '\u2A91'; 5424 case "glE": return '\u2A92'; 5425 case "lesges": return '\u2A93'; 5426 case "gesles": return '\u2A94'; 5427 case "els": case "eqslantless": return '\u2A95'; 5428 case "egs": case "eqslantgtr": return '\u2A96'; 5429 case "elsdot": return '\u2A97'; 5430 case "egsdot": return '\u2A98'; 5431 case "el": return '\u2A99'; 5432 case "eg": return '\u2A9A'; 5433 case "siml": return '\u2A9D'; 5434 case "simg": return '\u2A9E'; 5435 case "simlE": return '\u2A9F'; 5436 case "simgE": return '\u2AA0'; 5437 case "LessLess": return '\u2AA1'; 5438 case "GreaterGreater": return '\u2AA2'; 5439 case "glj": return '\u2AA4'; 5440 case "gla": return '\u2AA5'; 5441 case "ltcc": return '\u2AA6'; 5442 case "gtcc": return '\u2AA7'; 5443 case "lescc": return '\u2AA8'; 5444 case "gescc": return '\u2AA9'; 5445 case "smt": return '\u2AAA'; 5446 case "lat": return '\u2AAB'; 5447 case "smte": return '\u2AAC'; 5448 case "late": return '\u2AAD'; 5449 case "bumpE": return '\u2AAE'; 5450 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5451 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5452 case "prE": return '\u2AB3'; 5453 case "scE": return '\u2AB4'; 5454 case "prnE": case "precneqq": return '\u2AB5'; 5455 case "scnE": case "succneqq": return '\u2AB6'; 5456 case "prap": case "precapprox": return '\u2AB7'; 5457 case "scap": case "succapprox": return '\u2AB8'; 5458 case "prnap": case "precnapprox": return '\u2AB9'; 5459 case "scnap": case "succnapprox": return '\u2ABA'; 5460 case "Pr": return '\u2ABB'; 5461 case "Sc": return '\u2ABC'; 5462 case "subdot": return '\u2ABD'; 5463 case "supdot": return '\u2ABE'; 5464 case "subplus": return '\u2ABF'; 5465 case "supplus": return '\u2AC0'; 5466 case "submult": return '\u2AC1'; 5467 case "supmult": return '\u2AC2'; 5468 case "subedot": return '\u2AC3'; 5469 case "supedot": return '\u2AC4'; 5470 case "subE": case "subseteqq": return '\u2AC5'; 5471 case "supE": case "supseteqq": return '\u2AC6'; 5472 case "subsim": return '\u2AC7'; 5473 case "supsim": return '\u2AC8'; 5474 case "subnE": case "subsetneqq": return '\u2ACB'; 5475 case "supnE": case "supsetneqq": return '\u2ACC'; 5476 case "csub": return '\u2ACF'; 5477 case "csup": return '\u2AD0'; 5478 case "csube": return '\u2AD1'; 5479 case "csupe": return '\u2AD2'; 5480 case "subsup": return '\u2AD3'; 5481 case "supsub": return '\u2AD4'; 5482 case "subsub": return '\u2AD5'; 5483 case "supsup": return '\u2AD6'; 5484 case "suphsub": return '\u2AD7'; 5485 case "supdsub": return '\u2AD8'; 5486 case "forkv": return '\u2AD9'; 5487 case "topfork": return '\u2ADA'; 5488 case "mlcp": return '\u2ADB'; 5489 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5490 case "Vdashl": return '\u2AE6'; 5491 case "Barv": return '\u2AE7'; 5492 case "vBar": return '\u2AE8'; 5493 case "vBarv": return '\u2AE9'; 5494 case "Vbar": return '\u2AEB'; 5495 case "Not": return '\u2AEC'; 5496 case "bNot": return '\u2AED'; 5497 case "rnmid": return '\u2AEE'; 5498 case "cirmid": return '\u2AEF'; 5499 case "midcir": return '\u2AF0'; 5500 case "topcir": return '\u2AF1'; 5501 case "nhpar": return '\u2AF2'; 5502 case "parsim": return '\u2AF3'; 5503 case "parsl": return '\u2AFD'; 5504 case "fflig": return '\uFB00'; 5505 case "filig": return '\uFB01'; 5506 case "fllig": return '\uFB02'; 5507 case "ffilig": return '\uFB03'; 5508 case "ffllig": return '\uFB04'; 5509 case "Ascr": return '\U0001D49C'; 5510 case "Cscr": return '\U0001D49E'; 5511 case "Dscr": return '\U0001D49F'; 5512 case "Gscr": return '\U0001D4A2'; 5513 case "Jscr": return '\U0001D4A5'; 5514 case "Kscr": return '\U0001D4A6'; 5515 case "Nscr": return '\U0001D4A9'; 5516 case "Oscr": return '\U0001D4AA'; 5517 case "Pscr": return '\U0001D4AB'; 5518 case "Qscr": return '\U0001D4AC'; 5519 case "Sscr": return '\U0001D4AE'; 5520 case "Tscr": return '\U0001D4AF'; 5521 case "Uscr": return '\U0001D4B0'; 5522 case "Vscr": return '\U0001D4B1'; 5523 case "Wscr": return '\U0001D4B2'; 5524 case "Xscr": return '\U0001D4B3'; 5525 case "Yscr": return '\U0001D4B4'; 5526 case "Zscr": return '\U0001D4B5'; 5527 case "ascr": return '\U0001D4B6'; 5528 case "bscr": return '\U0001D4B7'; 5529 case "cscr": return '\U0001D4B8'; 5530 case "dscr": return '\U0001D4B9'; 5531 case "fscr": return '\U0001D4BB'; 5532 case "hscr": return '\U0001D4BD'; 5533 case "iscr": return '\U0001D4BE'; 5534 case "jscr": return '\U0001D4BF'; 5535 case "kscr": return '\U0001D4C0'; 5536 case "lscr": return '\U0001D4C1'; 5537 case "mscr": return '\U0001D4C2'; 5538 case "nscr": return '\U0001D4C3'; 5539 case "pscr": return '\U0001D4C5'; 5540 case "qscr": return '\U0001D4C6'; 5541 case "rscr": return '\U0001D4C7'; 5542 case "sscr": return '\U0001D4C8'; 5543 case "tscr": return '\U0001D4C9'; 5544 case "uscr": return '\U0001D4CA'; 5545 case "vscr": return '\U0001D4CB'; 5546 case "wscr": return '\U0001D4CC'; 5547 case "xscr": return '\U0001D4CD'; 5548 case "yscr": return '\U0001D4CE'; 5549 case "zscr": return '\U0001D4CF'; 5550 case "Afr": return '\U0001D504'; 5551 case "Bfr": return '\U0001D505'; 5552 case "Dfr": return '\U0001D507'; 5553 case "Efr": return '\U0001D508'; 5554 case "Ffr": return '\U0001D509'; 5555 case "Gfr": return '\U0001D50A'; 5556 case "Jfr": return '\U0001D50D'; 5557 case "Kfr": return '\U0001D50E'; 5558 case "Lfr": return '\U0001D50F'; 5559 case "Mfr": return '\U0001D510'; 5560 case "Nfr": return '\U0001D511'; 5561 case "Ofr": return '\U0001D512'; 5562 case "Pfr": return '\U0001D513'; 5563 case "Qfr": return '\U0001D514'; 5564 case "Sfr": return '\U0001D516'; 5565 case "Tfr": return '\U0001D517'; 5566 case "Ufr": return '\U0001D518'; 5567 case "Vfr": return '\U0001D519'; 5568 case "Wfr": return '\U0001D51A'; 5569 case "Xfr": return '\U0001D51B'; 5570 case "Yfr": return '\U0001D51C'; 5571 case "afr": return '\U0001D51E'; 5572 case "bfr": return '\U0001D51F'; 5573 case "cfr": return '\U0001D520'; 5574 case "dfr": return '\U0001D521'; 5575 case "efr": return '\U0001D522'; 5576 case "ffr": return '\U0001D523'; 5577 case "gfr": return '\U0001D524'; 5578 case "hfr": return '\U0001D525'; 5579 case "ifr": return '\U0001D526'; 5580 case "jfr": return '\U0001D527'; 5581 case "kfr": return '\U0001D528'; 5582 case "lfr": return '\U0001D529'; 5583 case "mfr": return '\U0001D52A'; 5584 case "nfr": return '\U0001D52B'; 5585 case "ofr": return '\U0001D52C'; 5586 case "pfr": return '\U0001D52D'; 5587 case "qfr": return '\U0001D52E'; 5588 case "rfr": return '\U0001D52F'; 5589 case "sfr": return '\U0001D530'; 5590 case "tfr": return '\U0001D531'; 5591 case "ufr": return '\U0001D532'; 5592 case "vfr": return '\U0001D533'; 5593 case "wfr": return '\U0001D534'; 5594 case "xfr": return '\U0001D535'; 5595 case "yfr": return '\U0001D536'; 5596 case "zfr": return '\U0001D537'; 5597 case "Aopf": return '\U0001D538'; 5598 case "Bopf": return '\U0001D539'; 5599 case "Dopf": return '\U0001D53B'; 5600 case "Eopf": return '\U0001D53C'; 5601 case "Fopf": return '\U0001D53D'; 5602 case "Gopf": return '\U0001D53E'; 5603 case "Iopf": return '\U0001D540'; 5604 case "Jopf": return '\U0001D541'; 5605 case "Kopf": return '\U0001D542'; 5606 case "Lopf": return '\U0001D543'; 5607 case "Mopf": return '\U0001D544'; 5608 case "Oopf": return '\U0001D546'; 5609 case "Sopf": return '\U0001D54A'; 5610 case "Topf": return '\U0001D54B'; 5611 case "Uopf": return '\U0001D54C'; 5612 case "Vopf": return '\U0001D54D'; 5613 case "Wopf": return '\U0001D54E'; 5614 case "Xopf": return '\U0001D54F'; 5615 case "Yopf": return '\U0001D550'; 5616 case "aopf": return '\U0001D552'; 5617 case "bopf": return '\U0001D553'; 5618 case "copf": return '\U0001D554'; 5619 case "dopf": return '\U0001D555'; 5620 case "eopf": return '\U0001D556'; 5621 case "fopf": return '\U0001D557'; 5622 case "gopf": return '\U0001D558'; 5623 case "hopf": return '\U0001D559'; 5624 case "iopf": return '\U0001D55A'; 5625 case "jopf": return '\U0001D55B'; 5626 case "kopf": return '\U0001D55C'; 5627 case "lopf": return '\U0001D55D'; 5628 case "mopf": return '\U0001D55E'; 5629 case "nopf": return '\U0001D55F'; 5630 case "oopf": return '\U0001D560'; 5631 case "popf": return '\U0001D561'; 5632 case "qopf": return '\U0001D562'; 5633 case "ropf": return '\U0001D563'; 5634 case "sopf": return '\U0001D564'; 5635 case "topf": return '\U0001D565'; 5636 case "uopf": return '\U0001D566'; 5637 case "vopf": return '\U0001D567'; 5638 case "wopf": return '\U0001D568'; 5639 case "xopf": return '\U0001D569'; 5640 case "yopf": return '\U0001D56A'; 5641 case "zopf": return '\U0001D56B'; 5642 5643 // and handling numeric entities 5644 default: 5645 if(entity[1] == '#') { 5646 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5647 auto hex = entity[3..$-1]; 5648 5649 auto p = intFromHex(to!string(hex).toLower()); 5650 return cast(dchar) p; 5651 } else { 5652 auto decimal = entity[2..$-1]; 5653 5654 // dealing with broken html entities 5655 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5656 decimal = decimal[1 .. $]; 5657 5658 if(decimal.length == 0) 5659 return ' '; // this is really broken html 5660 // done with dealing with broken stuff 5661 5662 auto p = std.conv.to!int(decimal); 5663 return cast(dchar) p; 5664 } 5665 } else 5666 return '\ufffd'; // replacement character diamond thing 5667 } 5668 5669 assert(0); 5670 } 5671 5672 import std.utf; 5673 import std.stdio; 5674 5675 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5676 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5677 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5678 /// Group: core_functionality 5679 string htmlEntitiesDecode(string data, bool strict = false) { 5680 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5681 if(data.indexOf("&") == -1) // all html entities begin with & 5682 return data; // if there are no entities in here, we can return the original slice and save some time 5683 5684 char[] a; // this seems to do a *better* job than appender! 5685 5686 char[4] buffer; 5687 5688 bool tryingEntity = false; 5689 dchar[16] entityBeingTried; 5690 int entityBeingTriedLength = 0; 5691 int entityAttemptIndex = 0; 5692 5693 foreach(dchar ch; data) { 5694 if(tryingEntity) { 5695 entityAttemptIndex++; 5696 entityBeingTried[entityBeingTriedLength++] = ch; 5697 5698 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5699 if(ch == '&') { 5700 if(strict) 5701 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5702 5703 // if not strict, let's try to parse both. 5704 5705 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5706 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5707 else 5708 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5709 5710 // tryingEntity is still true 5711 entityBeingTriedLength = 1; 5712 entityAttemptIndex = 0; // restarting o this 5713 } else 5714 if(ch == ';') { 5715 tryingEntity = false; 5716 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5717 } else if(ch == ' ') { 5718 // e.g. you & i 5719 if(strict) 5720 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5721 else { 5722 tryingEntity = false; 5723 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5724 } 5725 } else { 5726 if(entityAttemptIndex >= 9) { 5727 if(strict) 5728 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5729 else { 5730 tryingEntity = false; 5731 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5732 } 5733 } 5734 } 5735 } else { 5736 if(ch == '&') { 5737 tryingEntity = true; 5738 entityBeingTriedLength = 0; 5739 entityBeingTried[entityBeingTriedLength++] = ch; 5740 entityAttemptIndex = 0; 5741 } else { 5742 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5743 } 5744 } 5745 } 5746 5747 if(tryingEntity) { 5748 if(strict) 5749 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5750 5751 // otherwise, let's try to recover, at least so we don't drop any data 5752 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5753 // FIXME: what if we have "cool &"? should we try to parse it? 5754 } 5755 5756 return cast(string) a; // assumeUnique is actually kinda slow, lol 5757 } 5758 5759 /// Group: implementations 5760 abstract class SpecialElement : Element { 5761 this(Document _parentDocument) { 5762 super(_parentDocument); 5763 } 5764 5765 ///. 5766 override Element appendChild(Element e) { 5767 assert(0, "Cannot append to a special node"); 5768 } 5769 5770 ///. 5771 @property override int nodeType() const { 5772 return 100; 5773 } 5774 } 5775 5776 ///. 5777 /// Group: implementations 5778 class RawSource : SpecialElement { 5779 ///. 5780 this(Document _parentDocument, string s) { 5781 super(_parentDocument); 5782 source = s; 5783 tagName = "#raw"; 5784 } 5785 5786 ///. 5787 override string nodeValue() const { 5788 return this.toString(); 5789 } 5790 5791 ///. 5792 override string writeToAppender(Appender!string where = appender!string()) const { 5793 where.put(source); 5794 return source; 5795 } 5796 5797 override string toPrettyString(bool, int, string) const { 5798 return source; 5799 } 5800 5801 5802 override RawSource cloneNode(bool deep) { 5803 return new RawSource(parentDocument, source); 5804 } 5805 5806 ///. 5807 string source; 5808 } 5809 5810 /// Group: implementations 5811 abstract class ServerSideCode : SpecialElement { 5812 this(Document _parentDocument, string type) { 5813 super(_parentDocument); 5814 tagName = "#" ~ type; 5815 } 5816 5817 ///. 5818 override string nodeValue() const { 5819 return this.source; 5820 } 5821 5822 ///. 5823 override string writeToAppender(Appender!string where = appender!string()) const { 5824 auto start = where.data.length; 5825 where.put("<"); 5826 where.put(source); 5827 where.put(">"); 5828 return where.data[start .. $]; 5829 } 5830 5831 override string toPrettyString(bool, int, string) const { 5832 return "<" ~ source ~ ">"; 5833 } 5834 5835 ///. 5836 string source; 5837 } 5838 5839 ///. 5840 /// Group: implementations 5841 class PhpCode : ServerSideCode { 5842 ///. 5843 this(Document _parentDocument, string s) { 5844 super(_parentDocument, "php"); 5845 source = s; 5846 } 5847 5848 override PhpCode cloneNode(bool deep) { 5849 return new PhpCode(parentDocument, source); 5850 } 5851 } 5852 5853 ///. 5854 /// Group: implementations 5855 class AspCode : ServerSideCode { 5856 ///. 5857 this(Document _parentDocument, string s) { 5858 super(_parentDocument, "asp"); 5859 source = s; 5860 } 5861 5862 override AspCode cloneNode(bool deep) { 5863 return new AspCode(parentDocument, source); 5864 } 5865 } 5866 5867 ///. 5868 /// Group: implementations 5869 class BangInstruction : SpecialElement { 5870 ///. 5871 this(Document _parentDocument, string s) { 5872 super(_parentDocument); 5873 source = s; 5874 tagName = "#bpi"; 5875 } 5876 5877 ///. 5878 override string nodeValue() const { 5879 return this.source; 5880 } 5881 5882 override BangInstruction cloneNode(bool deep) { 5883 return new BangInstruction(parentDocument, source); 5884 } 5885 5886 ///. 5887 override string writeToAppender(Appender!string where = appender!string()) const { 5888 auto start = where.data.length; 5889 where.put("<!"); 5890 where.put(source); 5891 where.put(">"); 5892 return where.data[start .. $]; 5893 } 5894 5895 override string toPrettyString(bool, int, string) const { 5896 string s; 5897 s ~= "<!"; 5898 s ~= source; 5899 s ~= ">"; 5900 return s; 5901 } 5902 5903 ///. 5904 string source; 5905 } 5906 5907 ///. 5908 /// Group: implementations 5909 class QuestionInstruction : SpecialElement { 5910 ///. 5911 this(Document _parentDocument, string s) { 5912 super(_parentDocument); 5913 source = s; 5914 tagName = "#qpi"; 5915 } 5916 5917 override QuestionInstruction cloneNode(bool deep) { 5918 return new QuestionInstruction(parentDocument, source); 5919 } 5920 5921 ///. 5922 override string nodeValue() const { 5923 return this.source; 5924 } 5925 5926 ///. 5927 override string writeToAppender(Appender!string where = appender!string()) const { 5928 auto start = where.data.length; 5929 where.put("<"); 5930 where.put(source); 5931 where.put(">"); 5932 return where.data[start .. $]; 5933 } 5934 5935 override string toPrettyString(bool, int, string) const { 5936 string s; 5937 s ~= "<"; 5938 s ~= source; 5939 s ~= ">"; 5940 return s; 5941 } 5942 5943 5944 ///. 5945 string source; 5946 } 5947 5948 ///. 5949 /// Group: implementations 5950 class HtmlComment : SpecialElement { 5951 ///. 5952 this(Document _parentDocument, string s) { 5953 super(_parentDocument); 5954 source = s; 5955 tagName = "#comment"; 5956 } 5957 5958 override HtmlComment cloneNode(bool deep) { 5959 return new HtmlComment(parentDocument, source); 5960 } 5961 5962 ///. 5963 override string nodeValue() const { 5964 return this.source; 5965 } 5966 5967 ///. 5968 override string writeToAppender(Appender!string where = appender!string()) const { 5969 auto start = where.data.length; 5970 where.put("<!--"); 5971 where.put(source); 5972 where.put("-->"); 5973 return where.data[start .. $]; 5974 } 5975 5976 override string toPrettyString(bool, int, string) const { 5977 string s; 5978 s ~= "<!--"; 5979 s ~= source; 5980 s ~= "-->"; 5981 return s; 5982 } 5983 5984 5985 ///. 5986 string source; 5987 } 5988 5989 5990 5991 5992 ///. 5993 /// Group: implementations 5994 class TextNode : Element { 5995 public: 5996 ///. 5997 this(Document _parentDocument, string e) { 5998 super(_parentDocument); 5999 contents = e; 6000 tagName = "#text"; 6001 } 6002 6003 /// 6004 this(string e) { 6005 this(null, e); 6006 } 6007 6008 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 6009 6010 ///. 6011 static TextNode fromUndecodedString(Document _parentDocument, string html) { 6012 auto e = new TextNode(_parentDocument, ""); 6013 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 6014 return e; 6015 } 6016 6017 ///. 6018 override @property TextNode cloneNode(bool deep) { 6019 auto n = new TextNode(parentDocument, contents); 6020 return n; 6021 } 6022 6023 ///. 6024 override string nodeValue() const { 6025 return this.contents; //toString(); 6026 } 6027 6028 ///. 6029 @property override int nodeType() const { 6030 return NodeType.Text; 6031 } 6032 6033 ///. 6034 override string writeToAppender(Appender!string where = appender!string()) const { 6035 string s; 6036 if(contents.length) 6037 s = htmlEntitiesEncode(contents, where); 6038 else 6039 s = ""; 6040 6041 assert(s !is null); 6042 return s; 6043 } 6044 6045 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 6046 string s; 6047 6048 string contents = this.contents; 6049 // we will first collapse the whitespace per html 6050 // sort of. note this can break stuff yo!!!! 6051 if(this.parentNode is null || this.parentNode.tagName != "pre") { 6052 string n = ""; 6053 bool lastWasWhitespace = indentationLevel > 0; 6054 foreach(char c; contents) { 6055 if(c.isSimpleWhite) { 6056 if(!lastWasWhitespace) 6057 n ~= ' '; 6058 lastWasWhitespace = true; 6059 } else { 6060 n ~= c; 6061 lastWasWhitespace = false; 6062 } 6063 } 6064 6065 contents = n; 6066 } 6067 6068 if(this.parentNode !is null && this.parentNode.tagName != "p") { 6069 contents = contents.strip; 6070 } 6071 6072 auto e = htmlEntitiesEncode(contents); 6073 import std.algorithm.iteration : splitter; 6074 bool first = true; 6075 foreach(line; splitter(e, "\n")) { 6076 if(first) { 6077 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 6078 first = false; 6079 } else { 6080 s ~= "\n"; 6081 if(insertComments) 6082 s ~= "<!--"; 6083 foreach(i; 0 .. indentationLevel) 6084 s ~= "\t"; 6085 if(insertComments) 6086 s ~= "-->"; 6087 } 6088 s ~= line.stripRight; 6089 } 6090 return s; 6091 } 6092 6093 ///. 6094 override Element appendChild(Element e) { 6095 assert(0, "Cannot append to a text node"); 6096 } 6097 6098 ///. 6099 string contents; 6100 // alias contents content; // I just mistype this a lot, 6101 } 6102 6103 /** 6104 There are subclasses of Element offering improved helper 6105 functions for the element in HTML. 6106 */ 6107 6108 ///. 6109 /// Group: implementations 6110 class Link : Element { 6111 6112 ///. 6113 this(Document _parentDocument) { 6114 super(_parentDocument); 6115 this.tagName = "a"; 6116 } 6117 6118 6119 ///. 6120 this(string href, string text) { 6121 super("a"); 6122 setAttribute("href", href); 6123 innerText = text; 6124 } 6125 /+ 6126 /// Returns everything in the href EXCEPT the query string 6127 @property string targetSansQuery() { 6128 6129 } 6130 6131 ///. 6132 @property string domainName() { 6133 6134 } 6135 6136 ///. 6137 @property string path 6138 +/ 6139 /// This gets a variable from the URL's query string. 6140 string getValue(string name) { 6141 auto vars = variablesHash(); 6142 if(name in vars) 6143 return vars[name]; 6144 return null; 6145 } 6146 6147 private string[string] variablesHash() { 6148 string href = getAttribute("href"); 6149 if(href is null) 6150 return null; 6151 6152 auto ques = href.indexOf("?"); 6153 string str = ""; 6154 if(ques != -1) { 6155 str = href[ques+1..$]; 6156 6157 auto fragment = str.indexOf("#"); 6158 if(fragment != -1) 6159 str = str[0..fragment]; 6160 } 6161 6162 string[] variables = str.split("&"); 6163 6164 string[string] hash; 6165 6166 foreach(var; variables) { 6167 auto index = var.indexOf("="); 6168 if(index == -1) 6169 hash[var] = ""; 6170 else { 6171 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6172 } 6173 } 6174 6175 return hash; 6176 } 6177 6178 ///. 6179 /*private*/ void updateQueryString(string[string] vars) { 6180 string href = getAttribute("href"); 6181 6182 auto question = href.indexOf("?"); 6183 if(question != -1) 6184 href = href[0..question]; 6185 6186 string frag = ""; 6187 auto fragment = href.indexOf("#"); 6188 if(fragment != -1) { 6189 frag = href[fragment..$]; 6190 href = href[0..fragment]; 6191 } 6192 6193 string query = "?"; 6194 bool first = true; 6195 foreach(name, value; vars) { 6196 if(!first) 6197 query ~= "&"; 6198 else 6199 first = false; 6200 6201 query ~= encodeComponent(name); 6202 if(value.length) 6203 query ~= "=" ~ encodeComponent(value); 6204 } 6205 6206 if(query != "?") 6207 href ~= query; 6208 6209 href ~= frag; 6210 6211 setAttribute("href", href); 6212 } 6213 6214 /// Sets or adds the variable with the given name to the given value 6215 /// It automatically URI encodes the values and takes care of the ? and &. 6216 override void setValue(string name, string variable) { 6217 auto vars = variablesHash(); 6218 vars[name] = variable; 6219 6220 updateQueryString(vars); 6221 } 6222 6223 /// Removes the given variable from the query string 6224 void removeValue(string name) { 6225 auto vars = variablesHash(); 6226 vars.remove(name); 6227 6228 updateQueryString(vars); 6229 } 6230 6231 /* 6232 ///. 6233 override string toString() { 6234 6235 } 6236 6237 ///. 6238 override string getAttribute(string name) { 6239 if(name == "href") { 6240 6241 } else 6242 return super.getAttribute(name); 6243 } 6244 */ 6245 } 6246 6247 ///. 6248 /// Group: implementations 6249 class Form : Element { 6250 6251 ///. 6252 this(Document _parentDocument) { 6253 super(_parentDocument); 6254 tagName = "form"; 6255 } 6256 6257 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6258 auto t = this.querySelector("fieldset div"); 6259 if(t is null) 6260 return super.addField(label, name, type, fieldOptions); 6261 else 6262 return t.addField(label, name, type, fieldOptions); 6263 } 6264 6265 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6266 auto type = "text"; 6267 auto t = this.querySelector("fieldset div"); 6268 if(t is null) 6269 return super.addField(label, name, type, fieldOptions); 6270 else 6271 return t.addField(label, name, type, fieldOptions); 6272 } 6273 6274 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6275 auto t = this.querySelector("fieldset div"); 6276 if(t is null) 6277 return super.addField(label, name, options, fieldOptions); 6278 else 6279 return t.addField(label, name, options, fieldOptions); 6280 } 6281 6282 override void setValue(string field, string value) { 6283 setValue(field, value, true); 6284 } 6285 6286 // FIXME: doesn't handle arrays; multiple fields can have the same name 6287 6288 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6289 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6290 /// the checked/selected attribute from all, and adds it to the one matching the value. 6291 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6292 6293 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6294 /// Otherwise, it makes a new input with type=hidden to keep the value. 6295 void setValue(string field, string value, bool makeNew) { 6296 auto eles = getField(field); 6297 if(eles.length == 0) { 6298 if(makeNew) { 6299 addInput(field, value); 6300 return; 6301 } else 6302 throw new Exception("form field does not exist"); 6303 } 6304 6305 if(eles.length == 1) { 6306 auto e = eles[0]; 6307 switch(e.tagName) { 6308 default: assert(0); 6309 case "textarea": 6310 e.innerText = value; 6311 break; 6312 case "input": 6313 string type = e.getAttribute("type"); 6314 if(type is null) { 6315 e.value = value; 6316 return; 6317 } 6318 switch(type) { 6319 case "checkbox": 6320 case "radio": 6321 if(value.length && value != "false") 6322 e.setAttribute("checked", "checked"); 6323 else 6324 e.removeAttribute("checked"); 6325 break; 6326 default: 6327 e.value = value; 6328 return; 6329 } 6330 break; 6331 case "select": 6332 bool found = false; 6333 foreach(child; e.tree) { 6334 if(child.tagName != "option") 6335 continue; 6336 string val = child.getAttribute("value"); 6337 if(val is null) 6338 val = child.innerText; 6339 if(val == value) { 6340 child.setAttribute("selected", "selected"); 6341 found = true; 6342 } else 6343 child.removeAttribute("selected"); 6344 } 6345 6346 if(!found) { 6347 e.addChild("option", value) 6348 .setAttribute("selected", "selected"); 6349 } 6350 break; 6351 } 6352 } else { 6353 // assume radio boxes 6354 foreach(e; eles) { 6355 string val = e.getAttribute("value"); 6356 //if(val is null) 6357 // throw new Exception("don't know what to do with radio boxes with null value"); 6358 if(val == value) 6359 e.setAttribute("checked", "checked"); 6360 else 6361 e.removeAttribute("checked"); 6362 } 6363 } 6364 } 6365 6366 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6367 /// it makes no attempt to find and modify existing elements in the form to the new values. 6368 void addValueArray(string key, string[] arrayOfValues) { 6369 foreach(arr; arrayOfValues) 6370 addChild("input", key, arr); 6371 } 6372 6373 /// Gets the value of the field; what would be given if it submitted right now. (so 6374 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6375 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6376 string getValue(string field) { 6377 auto eles = getField(field); 6378 if(eles.length == 0) 6379 return ""; 6380 if(eles.length == 1) { 6381 auto e = eles[0]; 6382 switch(e.tagName) { 6383 default: assert(0); 6384 case "input": 6385 if(e.type == "checkbox") { 6386 if(e.checked) 6387 return e.value.length ? e.value : "checked"; 6388 return ""; 6389 } else 6390 return e.value; 6391 case "textarea": 6392 return e.innerText; 6393 case "select": 6394 foreach(child; e.tree) { 6395 if(child.tagName != "option") 6396 continue; 6397 if(child.selected) 6398 return child.value; 6399 } 6400 break; 6401 } 6402 } else { 6403 // assuming radio 6404 foreach(e; eles) { 6405 if(e.checked) 6406 return e.value; 6407 } 6408 } 6409 6410 return ""; 6411 } 6412 6413 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6414 ///. 6415 string getPostableData() { 6416 bool[string] namesDone; 6417 6418 string ret; 6419 bool outputted = false; 6420 6421 foreach(e; getElementsBySelector("[name]")) { 6422 if(e.name in namesDone) 6423 continue; 6424 6425 if(outputted) 6426 ret ~= "&"; 6427 else 6428 outputted = true; 6429 6430 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6431 6432 namesDone[e.name] = true; 6433 } 6434 6435 return ret; 6436 } 6437 6438 /// Gets the actual elements with the given name 6439 Element[] getField(string name) { 6440 Element[] ret; 6441 foreach(e; tree) { 6442 if(e.name == name) 6443 ret ~= e; 6444 } 6445 return ret; 6446 } 6447 6448 /// Grabs the <label> with the given for tag, if there is one. 6449 Element getLabel(string forId) { 6450 foreach(e; tree) 6451 if(e.tagName == "label" && e.getAttribute("for") == forId) 6452 return e; 6453 return null; 6454 } 6455 6456 /// Adds a new INPUT field to the end of the form with the given attributes. 6457 Element addInput(string name, string value, string type = "hidden") { 6458 auto e = new Element(parentDocument, "input", null, true); 6459 e.name = name; 6460 e.value = value; 6461 e.type = type; 6462 6463 appendChild(e); 6464 6465 return e; 6466 } 6467 6468 /// Removes the given field from the form. It finds the element and knocks it right out. 6469 void removeField(string name) { 6470 foreach(e; getField(name)) 6471 e.parentNode.removeChild(e); 6472 } 6473 6474 /+ 6475 /// Returns all form members. 6476 @property Element[] elements() { 6477 6478 } 6479 6480 ///. 6481 string opDispatch(string name)(string v = null) 6482 // filter things that should actually be attributes on the form 6483 if( name != "method" && name != "action" && name != "enctype" 6484 && name != "style" && name != "name" && name != "id" && name != "class") 6485 { 6486 6487 } 6488 +/ 6489 /+ 6490 void submit() { 6491 // take its elements and submit them through http 6492 } 6493 +/ 6494 } 6495 6496 import std.conv; 6497 6498 ///. 6499 /// Group: implementations 6500 class Table : Element { 6501 6502 ///. 6503 this(Document _parentDocument) { 6504 super(_parentDocument); 6505 tagName = "table"; 6506 } 6507 6508 /// Creates an element with the given type and content. 6509 Element th(T)(T t) { 6510 Element e; 6511 if(parentDocument !is null) 6512 e = parentDocument.createElement("th"); 6513 else 6514 e = Element.make("th"); 6515 static if(is(T == Html)) 6516 e.innerHTML = t; 6517 else 6518 e.innerText = to!string(t); 6519 return e; 6520 } 6521 6522 /// ditto 6523 Element td(T)(T t) { 6524 Element e; 6525 if(parentDocument !is null) 6526 e = parentDocument.createElement("td"); 6527 else 6528 e = Element.make("td"); 6529 static if(is(T == Html)) 6530 e.innerHTML = t; 6531 else 6532 e.innerText = to!string(t); 6533 return e; 6534 } 6535 6536 /// . 6537 Element appendHeaderRow(T...)(T t) { 6538 return appendRowInternal("th", "thead", t); 6539 } 6540 6541 /// . 6542 Element appendFooterRow(T...)(T t) { 6543 return appendRowInternal("td", "tfoot", t); 6544 } 6545 6546 /// . 6547 Element appendRow(T...)(T t) { 6548 return appendRowInternal("td", "tbody", t); 6549 } 6550 6551 void addColumnClasses(string[] classes...) { 6552 auto grid = getGrid(); 6553 foreach(row; grid) 6554 foreach(i, cl; classes) { 6555 if(cl.length) 6556 if(i < row.length) 6557 row[i].addClass(cl); 6558 } 6559 } 6560 6561 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6562 Element row = Element.make("tr"); 6563 6564 foreach(e; t) { 6565 static if(is(typeof(e) : Element)) { 6566 if(e.tagName == "td" || e.tagName == "th") 6567 row.appendChild(e); 6568 else { 6569 Element a = Element.make(innerType); 6570 6571 a.appendChild(e); 6572 6573 row.appendChild(a); 6574 } 6575 } else static if(is(typeof(e) == Html)) { 6576 Element a = Element.make(innerType); 6577 a.innerHTML = e.source; 6578 row.appendChild(a); 6579 } else static if(is(typeof(e) == Element[])) { 6580 Element a = Element.make(innerType); 6581 foreach(ele; e) 6582 a.appendChild(ele); 6583 row.appendChild(a); 6584 } else static if(is(typeof(e) == string[])) { 6585 foreach(ele; e) { 6586 Element a = Element.make(innerType); 6587 a.innerText = to!string(ele); 6588 row.appendChild(a); 6589 } 6590 } else { 6591 Element a = Element.make(innerType); 6592 a.innerText = to!string(e); 6593 row.appendChild(a); 6594 } 6595 } 6596 6597 foreach(e; children) { 6598 if(e.tagName == findType) { 6599 e.appendChild(row); 6600 return row; 6601 } 6602 } 6603 6604 // the type was not found if we are here... let's add it so it is well-formed 6605 auto lol = this.addChild(findType); 6606 lol.appendChild(row); 6607 6608 return row; 6609 } 6610 6611 ///. 6612 Element captionElement() { 6613 Element cap; 6614 foreach(c; children) { 6615 if(c.tagName == "caption") { 6616 cap = c; 6617 break; 6618 } 6619 } 6620 6621 if(cap is null) { 6622 cap = Element.make("caption"); 6623 appendChild(cap); 6624 } 6625 6626 return cap; 6627 } 6628 6629 ///. 6630 @property string caption() { 6631 return captionElement().innerText; 6632 } 6633 6634 ///. 6635 @property void caption(string text) { 6636 captionElement().innerText = text; 6637 } 6638 6639 /// Gets the logical layout of the table as a rectangular grid of 6640 /// cells. It considers rowspan and colspan. A cell with a large 6641 /// span is represented in the grid by being referenced several times. 6642 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6643 /// <tfoot> portion if you pass one. 6644 /// 6645 /// Note: the rectangular grid might include null cells. 6646 /// 6647 /// This is kinda expensive so you should call once when you want the grid, 6648 /// then do lookups on the returned array. 6649 TableCell[][] getGrid(Element tablePortition = null) 6650 in { 6651 if(tablePortition is null) 6652 assert(tablePortition is null); 6653 else { 6654 assert(tablePortition !is null); 6655 assert(tablePortition.parentNode is this); 6656 assert( 6657 tablePortition.tagName == "tbody" 6658 || 6659 tablePortition.tagName == "tfoot" 6660 || 6661 tablePortition.tagName == "thead" 6662 ); 6663 } 6664 } 6665 do { 6666 if(tablePortition is null) 6667 tablePortition = this; 6668 6669 TableCell[][] ret; 6670 6671 // FIXME: will also return rows of sub tables! 6672 auto rows = tablePortition.getElementsByTagName("tr"); 6673 ret.length = rows.length; 6674 6675 int maxLength = 0; 6676 6677 int insertCell(int row, int position, TableCell cell) { 6678 if(row >= ret.length) 6679 return position; // not supposed to happen - a rowspan is prolly too big. 6680 6681 if(position == -1) { 6682 position++; 6683 foreach(item; ret[row]) { 6684 if(item is null) 6685 break; 6686 position++; 6687 } 6688 } 6689 6690 if(position < ret[row].length) 6691 ret[row][position] = cell; 6692 else 6693 foreach(i; ret[row].length .. position + 1) { 6694 if(i == position) 6695 ret[row] ~= cell; 6696 else 6697 ret[row] ~= null; 6698 } 6699 return position; 6700 } 6701 6702 foreach(i, rowElement; rows) { 6703 auto row = cast(TableRow) rowElement; 6704 assert(row !is null); 6705 assert(i < ret.length); 6706 6707 int position = 0; 6708 foreach(cellElement; rowElement.childNodes) { 6709 auto cell = cast(TableCell) cellElement; 6710 if(cell is null) 6711 continue; 6712 6713 // FIXME: colspan == 0 or rowspan == 0 6714 // is supposed to mean fill in the rest of 6715 // the table, not skip it 6716 foreach(int j; 0 .. cell.colspan) { 6717 foreach(int k; 0 .. cell.rowspan) 6718 // if the first row, always append. 6719 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6720 position++; 6721 } 6722 } 6723 6724 if(ret[i].length > maxLength) 6725 maxLength = cast(int) ret[i].length; 6726 } 6727 6728 // want to ensure it's rectangular 6729 foreach(ref r; ret) { 6730 foreach(i; r.length .. maxLength) 6731 r ~= null; 6732 } 6733 6734 return ret; 6735 } 6736 } 6737 6738 /// Represents a table row element - a <tr> 6739 /// Group: implementations 6740 class TableRow : Element { 6741 ///. 6742 this(Document _parentDocument) { 6743 super(_parentDocument); 6744 tagName = "tr"; 6745 } 6746 6747 // FIXME: the standard says there should be a lot more in here, 6748 // but meh, I never use it and it's a pain to implement. 6749 } 6750 6751 /// Represents anything that can be a table cell - <td> or <th> html. 6752 /// Group: implementations 6753 class TableCell : Element { 6754 ///. 6755 this(Document _parentDocument, string _tagName) { 6756 super(_parentDocument, _tagName); 6757 } 6758 6759 @property int rowspan() const { 6760 int ret = 1; 6761 auto it = getAttribute("rowspan"); 6762 if(it.length) 6763 ret = to!int(it); 6764 return ret; 6765 } 6766 6767 @property int colspan() const { 6768 int ret = 1; 6769 auto it = getAttribute("colspan"); 6770 if(it.length) 6771 ret = to!int(it); 6772 return ret; 6773 } 6774 6775 @property int rowspan(int i) { 6776 setAttribute("rowspan", to!string(i)); 6777 return i; 6778 } 6779 6780 @property int colspan(int i) { 6781 setAttribute("colspan", to!string(i)); 6782 return i; 6783 } 6784 6785 } 6786 6787 6788 ///. 6789 /// Group: implementations 6790 class MarkupException : Exception { 6791 6792 ///. 6793 this(string message, string file = __FILE__, size_t line = __LINE__) { 6794 super(message, file, line); 6795 } 6796 } 6797 6798 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6799 /// Group: implementations 6800 class ElementNotFoundException : Exception { 6801 6802 /// type == kind of element you were looking for and search == a selector describing the search. 6803 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6804 this.searchContext = searchContext; 6805 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6806 } 6807 6808 Element searchContext; 6809 } 6810 6811 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6812 /// 6813 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6814 /// Group: core_functionality 6815 struct Html { 6816 /// This string holds the actual html. Use it to retrieve the contents. 6817 string source; 6818 } 6819 6820 // for the observers 6821 enum DomMutationOperations { 6822 setAttribute, 6823 removeAttribute, 6824 appendChild, // tagname, attributes[], innerHTML 6825 insertBefore, 6826 truncateChildren, 6827 removeChild, 6828 appendHtml, 6829 replaceHtml, 6830 appendText, 6831 replaceText, 6832 replaceTextOnly 6833 } 6834 6835 // and for observers too 6836 struct DomMutationEvent { 6837 DomMutationOperations operation; 6838 Element target; 6839 Element related; // what this means differs with the operation 6840 Element related2; 6841 string relatedString; 6842 string relatedString2; 6843 } 6844 6845 6846 private immutable static string[] htmlSelfClosedElements = [ 6847 // html 4 6848 "img", "hr", "input", "br", "col", "link", "meta", 6849 // html 5 6850 "source" ]; 6851 6852 private immutable static string[] htmlInlineElements = [ 6853 "span", "strong", "em", "b", "i", "a" 6854 ]; 6855 6856 6857 static import std.conv; 6858 6859 ///. 6860 int intFromHex(string hex) { 6861 int place = 1; 6862 int value = 0; 6863 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6864 int v; 6865 char q = hex[a]; 6866 if( q >= '0' && q <= '9') 6867 v = q - '0'; 6868 else if (q >= 'a' && q <= 'f') 6869 v = q - 'a' + 10; 6870 else throw new Exception("Illegal hex character: " ~ q); 6871 6872 value += v * place; 6873 6874 place *= 16; 6875 } 6876 6877 return value; 6878 } 6879 6880 6881 // CSS selector handling 6882 6883 // EXTENSIONS 6884 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6885 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6886 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6887 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6888 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6889 6890 6891 6892 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6893 // That might be useful to implement, though I do have parent selectors too. 6894 6895 ///. 6896 static immutable string[] selectorTokens = [ 6897 // It is important that the 2 character possibilities go first here for accurate lexing 6898 "~=", "*=", "|=", "^=", "$=", "!=", 6899 "::", ">>", 6900 "<<", // my any-parent extension (reciprocal of whitespace) 6901 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6902 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6903 ]; // other is white space or a name. 6904 6905 ///. 6906 sizediff_t idToken(string str, sizediff_t position) { 6907 sizediff_t tid = -1; 6908 char c = str[position]; 6909 foreach(a, token; selectorTokens) 6910 6911 if(c == token[0]) { 6912 if(token.length > 1) { 6913 if(position + 1 >= str.length || str[position+1] != token[1]) 6914 continue; // not this token 6915 } 6916 tid = a; 6917 break; 6918 } 6919 return tid; 6920 } 6921 6922 ///. 6923 // look, ma, no phobos! 6924 // new lexer by ketmar 6925 string[] lexSelector (string selstr) { 6926 6927 static sizediff_t idToken (string str, size_t stpos) { 6928 char c = str[stpos]; 6929 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6930 if (c == token[0]) { 6931 if (token.length > 1) { 6932 assert(token.length == 2, token); // we don't have 3-char tokens yet 6933 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6934 } 6935 return tidx; 6936 } 6937 } 6938 return -1; 6939 } 6940 6941 // skip spaces and comments 6942 static string removeLeadingBlanks (string str) { 6943 size_t curpos = 0; 6944 while (curpos < str.length) { 6945 immutable char ch = str[curpos]; 6946 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6947 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6948 // comment 6949 curpos += 2; 6950 while (curpos < str.length) { 6951 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6952 curpos += 2; 6953 break; 6954 } 6955 ++curpos; 6956 } 6957 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6958 ++curpos; 6959 6960 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6961 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6962 // That is not the same as ".foo.bar". If the space is stripped, important 6963 // information is lost, despite the tokens being separatable anyway. 6964 // 6965 // The parser really needs to be aware of the presence of a space. 6966 } else { 6967 break; 6968 } 6969 } 6970 return str[curpos..$]; 6971 } 6972 6973 static bool isBlankAt() (string str, size_t pos) { 6974 // we should consider unicode spaces too, but... unicode sux anyway. 6975 return 6976 (pos < str.length && // in string 6977 (str[pos] <= 32 || // space 6978 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6979 } 6980 6981 string[] tokens; 6982 // lexx it! 6983 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6984 if(selstr[0] == '\"' || selstr[0] == '\'') { 6985 auto end = selstr[0]; 6986 auto pos = 1; 6987 bool escaping; 6988 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6989 if(escaping) 6990 escaping = false; 6991 else if(selstr[pos] == '\\') 6992 escaping = true; 6993 pos++; 6994 } 6995 6996 // FIXME: do better unescaping 6997 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6998 if(pos+1 >= selstr.length) 6999 assert(0, selstr); 7000 selstr = selstr[pos + 1.. $]; 7001 continue; 7002 } 7003 7004 7005 // no tokens starts with escape 7006 immutable tid = idToken(selstr, 0); 7007 if (tid >= 0) { 7008 // special token 7009 tokens ~= selectorTokens[tid]; // it's funnier this way 7010 selstr = selstr[selectorTokens[tid].length..$]; 7011 continue; 7012 } 7013 // from start to space or special token 7014 size_t escapePos = size_t.max; 7015 size_t curpos = 0; // i can has chizburger^w escape at the start 7016 while (curpos < selstr.length) { 7017 if (selstr[curpos] == '\\') { 7018 // this is escape, just skip it and next char 7019 if (escapePos == size_t.max) escapePos = curpos; 7020 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 7021 } else { 7022 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 7023 ++curpos; 7024 } 7025 } 7026 // identifier 7027 if (escapePos != size_t.max) { 7028 // i hate it when it happens 7029 string id = selstr[0..escapePos]; 7030 while (escapePos < curpos) { 7031 if (curpos-escapePos < 2) break; 7032 id ~= selstr[escapePos+1]; // escaped char 7033 escapePos += 2; 7034 immutable stp = escapePos; 7035 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 7036 if (escapePos > stp) id ~= selstr[stp..escapePos]; 7037 } 7038 if (id.length > 0) tokens ~= id; 7039 } else { 7040 tokens ~= selstr[0..curpos]; 7041 } 7042 selstr = selstr[curpos..$]; 7043 } 7044 return tokens; 7045 } 7046 version(unittest_domd_lexer) unittest { 7047 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 7048 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 7049 assert(lexSelector(r" < <") == ["<", "<"]); 7050 assert(lexSelector(r" <<") == ["<<"]); 7051 assert(lexSelector(r" <</") == ["<<", "/"]); 7052 assert(lexSelector(r" <</*") == ["<<"]); 7053 assert(lexSelector(r" <\</*") == ["<", "<"]); 7054 assert(lexSelector(r"heh\") == ["heh"]); 7055 assert(lexSelector(r"alice \") == ["alice"]); 7056 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 7057 } 7058 7059 ///. 7060 struct SelectorPart { 7061 string tagNameFilter; ///. 7062 string[] attributesPresent; /// [attr] 7063 string[2][] attributesEqual; /// [attr=value] 7064 string[2][] attributesStartsWith; /// [attr^=value] 7065 string[2][] attributesEndsWith; /// [attr$=value] 7066 // split it on space, then match to these 7067 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 7068 // split it on dash, then match to these 7069 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 7070 string[2][] attributesInclude; /// [attr*=value] 7071 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 7072 7073 string[] hasSelectors; /// :has(this) 7074 string[] notSelectors; /// :not(this) 7075 7076 string[] isSelectors; /// :is(this) 7077 string[] whereSelectors; /// :where(this) 7078 7079 ParsedNth[] nthOfType; /// . 7080 ParsedNth[] nthLastOfType; /// . 7081 ParsedNth[] nthChild; /// . 7082 7083 bool firstChild; ///. 7084 bool lastChild; ///. 7085 7086 bool firstOfType; /// . 7087 bool lastOfType; /// . 7088 7089 bool emptyElement; ///. 7090 bool whitespaceOnly; /// 7091 bool oddChild; ///. 7092 bool evenChild; ///. 7093 7094 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 7095 7096 bool rootElement; ///. 7097 7098 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 7099 7100 bool isCleanSlateExceptSeparation() { 7101 auto cp = this; 7102 cp.separation = -1; 7103 return cp is SelectorPart.init; 7104 } 7105 7106 ///. 7107 string toString() { 7108 string ret; 7109 switch(separation) { 7110 default: assert(0); 7111 case -1: break; 7112 case 0: ret ~= " "; break; 7113 case 1: ret ~= " > "; break; 7114 case 2: ret ~= " + "; break; 7115 case 3: ret ~= " ~ "; break; 7116 case 4: ret ~= " < "; break; 7117 } 7118 ret ~= tagNameFilter; 7119 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7120 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7121 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7122 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7123 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7124 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7125 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7126 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7127 7128 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7129 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7130 7131 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7132 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7133 7134 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7135 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7136 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7137 7138 if(firstChild) ret ~= ":first-child"; 7139 if(lastChild) ret ~= ":last-child"; 7140 if(firstOfType) ret ~= ":first-of-type"; 7141 if(lastOfType) ret ~= ":last-of-type"; 7142 if(emptyElement) ret ~= ":empty"; 7143 if(whitespaceOnly) ret ~= ":whitespace-only"; 7144 if(oddChild) ret ~= ":odd-child"; 7145 if(evenChild) ret ~= ":even-child"; 7146 if(rootElement) ret ~= ":root"; 7147 if(scopeElement) ret ~= ":scope"; 7148 7149 return ret; 7150 } 7151 7152 // USEFUL 7153 ///. 7154 bool matchElement(Element e) { 7155 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7156 // Each individual call is reasonably fast already, but it adds up. 7157 if(e is null) return false; 7158 if(e.nodeType != 1) return false; 7159 7160 if(tagNameFilter != "" && tagNameFilter != "*") 7161 if(e.tagName != tagNameFilter) 7162 return false; 7163 if(firstChild) { 7164 if(e.parentNode is null) 7165 return false; 7166 if(e.parentNode.childElements[0] !is e) 7167 return false; 7168 } 7169 if(lastChild) { 7170 if(e.parentNode is null) 7171 return false; 7172 auto ce = e.parentNode.childElements; 7173 if(ce[$-1] !is e) 7174 return false; 7175 } 7176 if(firstOfType) { 7177 if(e.parentNode is null) 7178 return false; 7179 auto ce = e.parentNode.childElements; 7180 foreach(c; ce) { 7181 if(c.tagName == e.tagName) { 7182 if(c is e) 7183 return true; 7184 else 7185 return false; 7186 } 7187 } 7188 } 7189 if(lastOfType) { 7190 if(e.parentNode is null) 7191 return false; 7192 auto ce = e.parentNode.childElements; 7193 foreach_reverse(c; ce) { 7194 if(c.tagName == e.tagName) { 7195 if(c is e) 7196 return true; 7197 else 7198 return false; 7199 } 7200 } 7201 } 7202 /+ 7203 if(scopeElement) { 7204 if(e !is this_) 7205 return false; 7206 } 7207 +/ 7208 if(emptyElement) { 7209 if(e.children.length) 7210 return false; 7211 } 7212 if(whitespaceOnly) { 7213 if(e.innerText.strip.length) 7214 return false; 7215 } 7216 if(rootElement) { 7217 if(e.parentNode !is null) 7218 return false; 7219 } 7220 if(oddChild || evenChild) { 7221 if(e.parentNode is null) 7222 return false; 7223 foreach(i, child; e.parentNode.childElements) { 7224 if(child is e) { 7225 if(oddChild && !(i&1)) 7226 return false; 7227 if(evenChild && (i&1)) 7228 return false; 7229 break; 7230 } 7231 } 7232 } 7233 7234 bool matchWithSeparator(string attr, string value, string separator) { 7235 foreach(s; attr.split(separator)) 7236 if(s == value) 7237 return true; 7238 return false; 7239 } 7240 7241 foreach(a; attributesPresent) 7242 if(a !in e.attributes) 7243 return false; 7244 foreach(a; attributesEqual) 7245 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7246 return false; 7247 foreach(a; attributesNotEqual) 7248 // FIXME: maybe it should say null counts... this just bit me. 7249 // I did [attr][attr!=value] to work around. 7250 // 7251 // if it's null, it's not equal, right? 7252 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7253 if(e.getAttribute(a[0]) == a[1]) 7254 return false; 7255 foreach(a; attributesInclude) 7256 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7257 return false; 7258 foreach(a; attributesStartsWith) 7259 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7260 return false; 7261 foreach(a; attributesEndsWith) 7262 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7263 return false; 7264 foreach(a; attributesIncludesSeparatedBySpaces) 7265 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7266 return false; 7267 foreach(a; attributesIncludesSeparatedByDashes) 7268 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7269 return false; 7270 foreach(a; hasSelectors) { 7271 if(e.querySelector(a) is null) 7272 return false; 7273 } 7274 foreach(a; notSelectors) { 7275 auto sel = Selector(a); 7276 if(sel.matchesElement(e)) 7277 return false; 7278 } 7279 foreach(a; isSelectors) { 7280 auto sel = Selector(a); 7281 if(!sel.matchesElement(e)) 7282 return false; 7283 } 7284 foreach(a; whereSelectors) { 7285 auto sel = Selector(a); 7286 if(!sel.matchesElement(e)) 7287 return false; 7288 } 7289 7290 foreach(a; nthChild) { 7291 if(e.parentNode is null) 7292 return false; 7293 7294 auto among = e.parentNode.childElements; 7295 7296 if(!a.solvesFor(among, e)) 7297 return false; 7298 } 7299 foreach(a; nthOfType) { 7300 if(e.parentNode is null) 7301 return false; 7302 7303 auto among = e.parentNode.childElements(e.tagName); 7304 7305 if(!a.solvesFor(among, e)) 7306 return false; 7307 } 7308 foreach(a; nthLastOfType) { 7309 if(e.parentNode is null) 7310 return false; 7311 7312 auto among = retro(e.parentNode.childElements(e.tagName)); 7313 7314 if(!a.solvesFor(among, e)) 7315 return false; 7316 } 7317 7318 return true; 7319 } 7320 } 7321 7322 struct ParsedNth { 7323 int multiplier; 7324 int adder; 7325 7326 string of; 7327 7328 this(string text) { 7329 auto original = text; 7330 consumeWhitespace(text); 7331 if(text.startsWith("odd")) { 7332 multiplier = 2; 7333 adder = 1; 7334 7335 text = text[3 .. $]; 7336 } else if(text.startsWith("even")) { 7337 multiplier = 2; 7338 adder = 1; 7339 7340 text = text[4 .. $]; 7341 } else { 7342 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7343 consumeWhitespace(text); 7344 if(text.length && text[0] == 'n') { 7345 multiplier = n; 7346 text = text[1 .. $]; 7347 consumeWhitespace(text); 7348 if(text.length) { 7349 if(text[0] == '+') { 7350 text = text[1 .. $]; 7351 adder = parseNumber(text); 7352 } else if(text[0] == '-') { 7353 text = text[1 .. $]; 7354 adder = -parseNumber(text); 7355 } else if(text[0] == 'o') { 7356 // continue, this is handled below 7357 } else 7358 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7359 } 7360 } else { 7361 adder = n; 7362 } 7363 } 7364 7365 consumeWhitespace(text); 7366 if(text.startsWith("of")) { 7367 text = text[2 .. $]; 7368 consumeWhitespace(text); 7369 of = text[0 .. $]; 7370 } 7371 } 7372 7373 string toString() { 7374 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7375 } 7376 7377 bool solvesFor(R)(R elements, Element e) { 7378 int idx = 1; 7379 bool found = false; 7380 foreach(ele; elements) { 7381 if(of.length) { 7382 auto sel = Selector(of); 7383 if(!sel.matchesElement(ele)) 7384 continue; 7385 } 7386 if(ele is e) { 7387 found = true; 7388 break; 7389 } 7390 idx++; 7391 } 7392 if(!found) return false; 7393 7394 // multiplier* n + adder = idx 7395 // if there is a solution for integral n, it matches 7396 7397 idx -= adder; 7398 if(multiplier) { 7399 if(idx % multiplier == 0) 7400 return true; 7401 } else { 7402 return idx == 0; 7403 } 7404 return false; 7405 } 7406 7407 private void consumeWhitespace(ref string text) { 7408 while(text.length && text[0] == ' ') 7409 text = text[1 .. $]; 7410 } 7411 7412 private int parseNumber(ref string text) { 7413 consumeWhitespace(text); 7414 if(text.length == 0) return 0; 7415 bool negative = text[0] == '-'; 7416 if(text[0] == '+') 7417 text = text[1 .. $]; 7418 if(negative) text = text[1 .. $]; 7419 int i = 0; 7420 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7421 i++; 7422 if(i == 0) 7423 return 0; 7424 int cool = to!int(text[0 .. i]); 7425 text = text[i .. $]; 7426 return negative ? -cool : cool; 7427 } 7428 } 7429 7430 // USEFUL 7431 ///. 7432 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7433 Element[] ret; 7434 if(!parts.length) { 7435 return [start]; // the null selector only matches the start point; it 7436 // is what terminates the recursion 7437 } 7438 7439 auto part = parts[0]; 7440 //writeln("checking ", part, " against ", start, " with ", part.separation); 7441 switch(part.separation) { 7442 default: assert(0); 7443 case -1: 7444 case 0: // tree 7445 foreach(e; start.tree) { 7446 if(part.separation == 0 && start is e) 7447 continue; // space doesn't match itself! 7448 if(part.matchElement(e)) { 7449 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7450 } 7451 } 7452 break; 7453 case 1: // children 7454 foreach(e; start.childNodes) { 7455 if(part.matchElement(e)) { 7456 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7457 } 7458 } 7459 break; 7460 case 2: // next-sibling 7461 auto e = start.nextSibling("*"); 7462 if(part.matchElement(e)) 7463 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7464 break; 7465 case 3: // younger sibling 7466 auto tmp = start.parentNode; 7467 if(tmp !is null) { 7468 sizediff_t pos = -1; 7469 auto children = tmp.childElements; 7470 foreach(i, child; children) { 7471 if(child is start) { 7472 pos = i; 7473 break; 7474 } 7475 } 7476 assert(pos != -1); 7477 foreach(e; children[pos+1..$]) { 7478 if(part.matchElement(e)) 7479 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7480 } 7481 } 7482 break; 7483 case 4: // immediate parent node, an extension of mine to walk back up the tree 7484 auto e = start.parentNode; 7485 if(part.matchElement(e)) { 7486 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7487 } 7488 /* 7489 Example of usefulness: 7490 7491 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7492 7493 table th < tr 7494 7495 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7496 */ 7497 break; 7498 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7499 /* 7500 Like with the < operator, this is best used to find some parent of a particular known element. 7501 7502 Say you have an anchor inside a 7503 */ 7504 } 7505 7506 return ret; 7507 } 7508 7509 /++ 7510 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7511 7512 See_Also: 7513 $(LIST 7514 * [Element.querySelector] 7515 * [Element.querySelectorAll] 7516 * [Element.matches] 7517 * [Element.closest] 7518 * [Document.querySelector] 7519 * [Document.querySelectorAll] 7520 ) 7521 +/ 7522 /// Group: core_functionality 7523 struct Selector { 7524 SelectorComponent[] components; 7525 string original; 7526 /++ 7527 Parses the selector string and constructs the usable structure. 7528 +/ 7529 this(string cssSelector) { 7530 components = parseSelectorString(cssSelector); 7531 original = cssSelector; 7532 } 7533 7534 /++ 7535 Returns true if the given element matches this selector, 7536 considered relative to an arbitrary element. 7537 7538 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7539 with [std.algorithm.iteration.filter]: 7540 7541 --- 7542 Selector sel = Selector("foo > bar"); 7543 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7544 --- 7545 +/ 7546 bool matchesElement(Element e, Element relativeTo = null) { 7547 foreach(component; components) 7548 if(component.matchElement(e, relativeTo)) 7549 return true; 7550 7551 return false; 7552 } 7553 7554 /++ 7555 Reciprocal of [Element.querySelectorAll] 7556 +/ 7557 Element[] getMatchingElements(Element start) { 7558 Element[] ret; 7559 foreach(component; components) 7560 ret ~= getElementsBySelectorParts(start, component.parts); 7561 return removeDuplicates(ret); 7562 } 7563 7564 /++ 7565 Like [getMatchingElements], but returns a lazy range. Be careful 7566 about mutating the dom as you iterate through this. 7567 +/ 7568 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7569 import std.algorithm.iteration; 7570 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7571 } 7572 7573 7574 /// Returns the string this was built from 7575 string toString() { 7576 return original; 7577 } 7578 7579 /++ 7580 Returns a string from the parsed result 7581 7582 7583 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7584 +/ 7585 string parsedToString() { 7586 string ret; 7587 7588 foreach(idx, component; components) { 7589 if(idx) ret ~= ", "; 7590 ret ~= component.toString(); 7591 } 7592 7593 return ret; 7594 } 7595 } 7596 7597 ///. 7598 struct SelectorComponent { 7599 ///. 7600 SelectorPart[] parts; 7601 7602 ///. 7603 string toString() { 7604 string ret; 7605 foreach(part; parts) 7606 ret ~= part.toString(); 7607 return ret; 7608 } 7609 7610 // USEFUL 7611 ///. 7612 Element[] getElements(Element start) { 7613 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7614 } 7615 7616 // USEFUL (but not implemented) 7617 /// If relativeTo == null, it assumes the root of the parent document. 7618 bool matchElement(Element e, Element relativeTo = null) { 7619 if(e is null) return false; 7620 Element where = e; 7621 int lastSeparation = -1; 7622 7623 auto lparts = parts; 7624 7625 if(parts.length && parts[0].separation > 0) { 7626 // if it starts with a non-trivial separator, inject 7627 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7628 // which implies html 7629 7630 // there is probably a MUCH better way to do this. 7631 auto dummy = SelectorPart.init; 7632 dummy.tagNameFilter = "*"; 7633 dummy.separation = 0; 7634 lparts = dummy ~ lparts; 7635 } 7636 7637 foreach(part; retro(lparts)) { 7638 7639 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7640 // writeln(parts); 7641 7642 if(lastSeparation == -1) { 7643 if(!part.matchElement(where)) 7644 return false; 7645 } else if(lastSeparation == 0) { // generic parent 7646 // need to go up the whole chain 7647 where = where.parentNode; 7648 7649 while(where !is null) { 7650 if(part.matchElement(where)) 7651 break; 7652 7653 if(where is relativeTo) 7654 return false; 7655 7656 where = where.parentNode; 7657 } 7658 7659 if(where is null) 7660 return false; 7661 } else if(lastSeparation == 1) { // the > operator 7662 where = where.parentNode; 7663 7664 if(!part.matchElement(where)) 7665 return false; 7666 } else if(lastSeparation == 2) { // the + operator 7667 //writeln("WHERE", where, " ", part); 7668 where = where.previousSibling("*"); 7669 7670 if(!part.matchElement(where)) 7671 return false; 7672 } else if(lastSeparation == 3) { // the ~ operator 7673 where = where.previousSibling("*"); 7674 while(where !is null) { 7675 if(part.matchElement(where)) 7676 break; 7677 7678 if(where is relativeTo) 7679 return false; 7680 7681 where = where.previousSibling("*"); 7682 } 7683 7684 if(where is null) 7685 return false; 7686 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7687 // FIXME 7688 } 7689 7690 lastSeparation = part.separation; 7691 7692 if(where is relativeTo) 7693 return false; // at end of line, if we aren't done by now, the match fails 7694 } 7695 return true; // if we got here, it is a success 7696 } 7697 7698 // the string should NOT have commas. Use parseSelectorString for that instead 7699 ///. 7700 static SelectorComponent fromString(string selector) { 7701 return parseSelector(lexSelector(selector)); 7702 } 7703 } 7704 7705 ///. 7706 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7707 SelectorComponent[] ret; 7708 auto tokens = lexSelector(selector); // this will parse commas too 7709 // and now do comma-separated slices (i haz phobosophobia!) 7710 int parensCount = 0; 7711 while (tokens.length > 0) { 7712 size_t end = 0; 7713 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7714 if(tokens[end] == "(") parensCount++; 7715 if(tokens[end] == ")") parensCount--; 7716 ++end; 7717 } 7718 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7719 if (tokens.length-end < 2) break; 7720 tokens = tokens[end+1..$]; 7721 } 7722 return ret; 7723 } 7724 7725 ///. 7726 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7727 SelectorComponent s; 7728 7729 SelectorPart current; 7730 void commit() { 7731 // might as well skip null items 7732 if(!current.isCleanSlateExceptSeparation()) { 7733 s.parts ~= current; 7734 current = current.init; // start right over 7735 } 7736 } 7737 enum State { 7738 Starting, 7739 ReadingClass, 7740 ReadingId, 7741 ReadingAttributeSelector, 7742 ReadingAttributeComparison, 7743 ExpectingAttributeCloser, 7744 ReadingPseudoClass, 7745 ReadingAttributeValue, 7746 7747 SkippingFunctionalSelector, 7748 } 7749 State state = State.Starting; 7750 string attributeName, attributeValue, attributeComparison; 7751 int parensCount; 7752 foreach(idx, token; tokens) { 7753 string readFunctionalSelector() { 7754 string s; 7755 if(tokens[idx + 1] != "(") 7756 throw new Exception("parse error"); 7757 int pc = 1; 7758 foreach(t; tokens[idx + 2 .. $]) { 7759 if(t == "(") 7760 pc++; 7761 if(t == ")") 7762 pc--; 7763 if(pc == 0) 7764 break; 7765 s ~= t; 7766 } 7767 7768 return s; 7769 } 7770 7771 sizediff_t tid = -1; 7772 foreach(i, item; selectorTokens) 7773 if(token == item) { 7774 tid = i; 7775 break; 7776 } 7777 final switch(state) { 7778 case State.Starting: // fresh, might be reading an operator or a tagname 7779 if(tid == -1) { 7780 if(!caseSensitiveTags) 7781 token = token.toLower(); 7782 7783 if(current.isCleanSlateExceptSeparation()) { 7784 current.tagNameFilter = token; 7785 // default thing, see comment under "*" below 7786 if(current.separation == -1) current.separation = 0; 7787 } else { 7788 // if it was already set, we must see two thingies 7789 // separated by whitespace... 7790 commit(); 7791 current.separation = 0; // tree 7792 current.tagNameFilter = token; 7793 } 7794 } else { 7795 // Selector operators 7796 switch(token) { 7797 case "*": 7798 current.tagNameFilter = "*"; 7799 // the idea here is if we haven't actually set a separation 7800 // yet (e.g. the > operator), it should assume the generic 7801 // whitespace (descendant) mode to avoid matching self with -1 7802 if(current.separation == -1) current.separation = 0; 7803 break; 7804 case " ": 7805 // If some other separation has already been set, 7806 // this is irrelevant whitespace, so we should skip it. 7807 // this happens in the case of "foo > bar" for example. 7808 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7809 continue; 7810 commit(); 7811 current.separation = 0; // tree 7812 break; 7813 case ">>": 7814 commit(); 7815 current.separation = 0; // alternate syntax for tree from html5 css 7816 break; 7817 case ">": 7818 commit(); 7819 current.separation = 1; // child 7820 break; 7821 case "+": 7822 commit(); 7823 current.separation = 2; // sibling directly after 7824 break; 7825 case "~": 7826 commit(); 7827 current.separation = 3; // any sibling after 7828 break; 7829 case "<": 7830 commit(); 7831 current.separation = 4; // immediate parent of 7832 break; 7833 case "[": 7834 state = State.ReadingAttributeSelector; 7835 if(current.separation == -1) current.separation = 0; 7836 break; 7837 case ".": 7838 state = State.ReadingClass; 7839 if(current.separation == -1) current.separation = 0; 7840 break; 7841 case "#": 7842 state = State.ReadingId; 7843 if(current.separation == -1) current.separation = 0; 7844 break; 7845 case ":": 7846 case "::": 7847 state = State.ReadingPseudoClass; 7848 if(current.separation == -1) current.separation = 0; 7849 break; 7850 7851 default: 7852 assert(0, token); 7853 } 7854 } 7855 break; 7856 case State.ReadingClass: 7857 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7858 state = State.Starting; 7859 break; 7860 case State.ReadingId: 7861 current.attributesEqual ~= ["id", token]; 7862 state = State.Starting; 7863 break; 7864 case State.ReadingPseudoClass: 7865 switch(token) { 7866 case "first-of-type": 7867 current.firstOfType = true; 7868 break; 7869 case "last-of-type": 7870 current.lastOfType = true; 7871 break; 7872 case "only-of-type": 7873 current.firstOfType = true; 7874 current.lastOfType = true; 7875 break; 7876 case "first-child": 7877 current.firstChild = true; 7878 break; 7879 case "last-child": 7880 current.lastChild = true; 7881 break; 7882 case "only-child": 7883 current.firstChild = true; 7884 current.lastChild = true; 7885 break; 7886 case "scope": 7887 current.scopeElement = true; 7888 break; 7889 case "empty": 7890 // one with no children 7891 current.emptyElement = true; 7892 break; 7893 case "whitespace-only": 7894 current.whitespaceOnly = true; 7895 break; 7896 case "link": 7897 current.attributesPresent ~= "href"; 7898 break; 7899 case "root": 7900 current.rootElement = true; 7901 break; 7902 case "nth-child": 7903 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7904 state = State.SkippingFunctionalSelector; 7905 continue; 7906 case "nth-of-type": 7907 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7908 state = State.SkippingFunctionalSelector; 7909 continue; 7910 case "nth-last-of-type": 7911 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7912 state = State.SkippingFunctionalSelector; 7913 continue; 7914 case "is": 7915 state = State.SkippingFunctionalSelector; 7916 current.isSelectors ~= readFunctionalSelector(); 7917 continue; // now the rest of the parser skips past the parens we just handled 7918 case "where": 7919 state = State.SkippingFunctionalSelector; 7920 current.whereSelectors ~= readFunctionalSelector(); 7921 continue; // now the rest of the parser skips past the parens we just handled 7922 case "not": 7923 state = State.SkippingFunctionalSelector; 7924 current.notSelectors ~= readFunctionalSelector(); 7925 continue; // now the rest of the parser skips past the parens we just handled 7926 case "has": 7927 state = State.SkippingFunctionalSelector; 7928 current.hasSelectors ~= readFunctionalSelector(); 7929 continue; // now the rest of the parser skips past the parens we just handled 7930 // back to standards though not quite right lol 7931 case "disabled": 7932 current.attributesPresent ~= "disabled"; 7933 break; 7934 case "checked": 7935 current.attributesPresent ~= "checked"; 7936 break; 7937 7938 case "visited", "active", "hover", "target", "focus", "selected": 7939 current.attributesPresent ~= "nothing"; 7940 // FIXME 7941 /+ 7942 // extensions not implemented 7943 //case "text": // takes the text in the element and wraps it in an element, returning it 7944 +/ 7945 goto case; 7946 case "before", "after": 7947 current.attributesPresent ~= "FIXME"; 7948 7949 break; 7950 // My extensions 7951 case "odd-child": 7952 current.oddChild = true; 7953 break; 7954 case "even-child": 7955 current.evenChild = true; 7956 break; 7957 default: 7958 //if(token.indexOf("lang") == -1) 7959 //assert(0, token); 7960 break; 7961 } 7962 state = State.Starting; 7963 break; 7964 case State.SkippingFunctionalSelector: 7965 if(token == "(") { 7966 parensCount++; 7967 } else if(token == ")") { 7968 parensCount--; 7969 } 7970 7971 if(parensCount == 0) 7972 state = State.Starting; 7973 break; 7974 case State.ReadingAttributeSelector: 7975 attributeName = token; 7976 attributeComparison = null; 7977 attributeValue = null; 7978 state = State.ReadingAttributeComparison; 7979 break; 7980 case State.ReadingAttributeComparison: 7981 // FIXME: these things really should be quotable in the proper lexer... 7982 if(token != "]") { 7983 if(token.indexOf("=") == -1) { 7984 // not a comparison; consider it 7985 // part of the attribute 7986 attributeValue ~= token; 7987 } else { 7988 attributeComparison = token; 7989 state = State.ReadingAttributeValue; 7990 } 7991 break; 7992 } 7993 goto case; 7994 case State.ExpectingAttributeCloser: 7995 if(token != "]") { 7996 // not the closer; consider it part of comparison 7997 if(attributeComparison == "") 7998 attributeName ~= token; 7999 else 8000 attributeValue ~= token; 8001 break; 8002 } 8003 8004 // Selector operators 8005 switch(attributeComparison) { 8006 default: assert(0); 8007 case "": 8008 current.attributesPresent ~= attributeName; 8009 break; 8010 case "=": 8011 current.attributesEqual ~= [attributeName, attributeValue]; 8012 break; 8013 case "|=": 8014 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 8015 break; 8016 case "~=": 8017 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 8018 break; 8019 case "$=": 8020 current.attributesEndsWith ~= [attributeName, attributeValue]; 8021 break; 8022 case "^=": 8023 current.attributesStartsWith ~= [attributeName, attributeValue]; 8024 break; 8025 case "*=": 8026 current.attributesInclude ~= [attributeName, attributeValue]; 8027 break; 8028 case "!=": 8029 current.attributesNotEqual ~= [attributeName, attributeValue]; 8030 break; 8031 } 8032 8033 state = State.Starting; 8034 break; 8035 case State.ReadingAttributeValue: 8036 attributeValue = token; 8037 state = State.ExpectingAttributeCloser; 8038 break; 8039 } 8040 } 8041 8042 commit(); 8043 8044 return s; 8045 } 8046 8047 ///. 8048 Element[] removeDuplicates(Element[] input) { 8049 Element[] ret; 8050 8051 bool[Element] already; 8052 foreach(e; input) { 8053 if(e in already) continue; 8054 already[e] = true; 8055 ret ~= e; 8056 } 8057 8058 return ret; 8059 } 8060 8061 // done with CSS selector handling 8062 8063 8064 // FIXME: use the better parser from html.d 8065 /// This is probably not useful to you unless you're writing a browser or something like that. 8066 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 8067 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 8068 class CssStyle { 8069 ///. 8070 this(string rule, string content) { 8071 rule = rule.strip(); 8072 content = content.strip(); 8073 8074 if(content.length == 0) 8075 return; 8076 8077 originatingRule = rule; 8078 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 8079 8080 foreach(part; content.split(";")) { 8081 part = part.strip(); 8082 if(part.length == 0) 8083 continue; 8084 auto idx = part.indexOf(":"); 8085 if(idx == -1) 8086 continue; 8087 //throw new Exception("Bad css rule (no colon): " ~ part); 8088 8089 Property p; 8090 8091 p.name = part[0 .. idx].strip(); 8092 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 8093 p.givenExplicitly = true; 8094 p.specificity = originatingSpecificity; 8095 8096 properties ~= p; 8097 } 8098 8099 foreach(property; properties) 8100 expandShortForm(property, originatingSpecificity); 8101 } 8102 8103 ///. 8104 Specificity getSpecificityOfRule(string rule) { 8105 Specificity s; 8106 if(rule.length == 0) { // inline 8107 // s.important = 2; 8108 } else { 8109 // FIXME 8110 } 8111 8112 return s; 8113 } 8114 8115 string originatingRule; ///. 8116 Specificity originatingSpecificity; ///. 8117 8118 ///. 8119 union Specificity { 8120 uint score; ///. 8121 // version(little_endian) 8122 ///. 8123 struct { 8124 ubyte tags; ///. 8125 ubyte classes; ///. 8126 ubyte ids; ///. 8127 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8128 } 8129 } 8130 8131 ///. 8132 struct Property { 8133 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8134 string name; ///. 8135 string value; ///. 8136 Specificity specificity; ///. 8137 // do we care about the original source rule? 8138 } 8139 8140 ///. 8141 Property[] properties; 8142 8143 ///. 8144 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8145 string name = unCamelCase(nameGiven); 8146 if(value is null) 8147 return getValue(name); 8148 else 8149 return setValue(name, value, 0x02000000 /* inline specificity */); 8150 } 8151 8152 /// takes dash style name 8153 string getValue(string name) { 8154 foreach(property; properties) 8155 if(property.name == name) 8156 return property.value; 8157 return null; 8158 } 8159 8160 /// takes dash style name 8161 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8162 value = value.replace("! important", "!important"); 8163 if(value.indexOf("!important") != -1) { 8164 newSpecificity.important = 1; // FIXME 8165 value = value.replace("!important", "").strip(); 8166 } 8167 8168 foreach(ref property; properties) 8169 if(property.name == name) { 8170 if(newSpecificity.score >= property.specificity.score) { 8171 property.givenExplicitly = explicit; 8172 expandShortForm(property, newSpecificity); 8173 return (property.value = value); 8174 } else { 8175 if(name == "display") 8176 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8177 return value; // do nothing - the specificity is too low 8178 } 8179 } 8180 8181 // it's not here... 8182 8183 Property p; 8184 p.givenExplicitly = true; 8185 p.name = name; 8186 p.value = value; 8187 p.specificity = originatingSpecificity; 8188 8189 properties ~= p; 8190 expandShortForm(p, originatingSpecificity); 8191 8192 return value; 8193 } 8194 8195 private void expandQuadShort(string name, string value, Specificity specificity) { 8196 auto parts = value.split(" "); 8197 switch(parts.length) { 8198 case 1: 8199 setValue(name ~"-left", parts[0], specificity, false); 8200 setValue(name ~"-right", parts[0], specificity, false); 8201 setValue(name ~"-top", parts[0], specificity, false); 8202 setValue(name ~"-bottom", parts[0], specificity, false); 8203 break; 8204 case 2: 8205 setValue(name ~"-left", parts[1], specificity, false); 8206 setValue(name ~"-right", parts[1], specificity, false); 8207 setValue(name ~"-top", parts[0], specificity, false); 8208 setValue(name ~"-bottom", parts[0], specificity, false); 8209 break; 8210 case 3: 8211 setValue(name ~"-top", parts[0], specificity, false); 8212 setValue(name ~"-right", parts[1], specificity, false); 8213 setValue(name ~"-bottom", parts[2], specificity, false); 8214 setValue(name ~"-left", parts[2], specificity, false); 8215 8216 break; 8217 case 4: 8218 setValue(name ~"-top", parts[0], specificity, false); 8219 setValue(name ~"-right", parts[1], specificity, false); 8220 setValue(name ~"-bottom", parts[2], specificity, false); 8221 setValue(name ~"-left", parts[3], specificity, false); 8222 break; 8223 default: 8224 assert(0, value); 8225 } 8226 } 8227 8228 ///. 8229 void expandShortForm(Property p, Specificity specificity) { 8230 switch(p.name) { 8231 case "margin": 8232 case "padding": 8233 expandQuadShort(p.name, p.value, specificity); 8234 break; 8235 case "border": 8236 case "outline": 8237 setValue(p.name ~ "-left", p.value, specificity, false); 8238 setValue(p.name ~ "-right", p.value, specificity, false); 8239 setValue(p.name ~ "-top", p.value, specificity, false); 8240 setValue(p.name ~ "-bottom", p.value, specificity, false); 8241 break; 8242 8243 case "border-top": 8244 case "border-bottom": 8245 case "border-left": 8246 case "border-right": 8247 case "outline-top": 8248 case "outline-bottom": 8249 case "outline-left": 8250 case "outline-right": 8251 8252 default: {} 8253 } 8254 } 8255 8256 ///. 8257 override string toString() { 8258 string ret; 8259 if(originatingRule.length) 8260 ret = originatingRule ~ " {"; 8261 8262 foreach(property; properties) { 8263 if(!property.givenExplicitly) 8264 continue; // skip the inferred shit 8265 8266 if(originatingRule.length) 8267 ret ~= "\n\t"; 8268 else 8269 ret ~= " "; 8270 8271 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8272 } 8273 8274 if(originatingRule.length) 8275 ret ~= "\n}\n"; 8276 8277 return ret; 8278 } 8279 } 8280 8281 string cssUrl(string url) { 8282 return "url(\"" ~ url ~ "\")"; 8283 } 8284 8285 /// This probably isn't useful, unless you're writing a browser or something like that. 8286 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8287 /// as text. 8288 /// 8289 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8290 /// that you can apply to your documents to build the complete computedStyle object. 8291 class StyleSheet { 8292 ///. 8293 CssStyle[] rules; 8294 8295 ///. 8296 this(string source) { 8297 // FIXME: handle @ rules and probably could improve lexer 8298 // add nesting? 8299 int state; 8300 string currentRule; 8301 string currentValue; 8302 8303 string* currentThing = ¤tRule; 8304 foreach(c; source) { 8305 handle: switch(state) { 8306 default: assert(0); 8307 case 0: // starting - we assume we're reading a rule 8308 switch(c) { 8309 case '@': 8310 state = 4; 8311 break; 8312 case '/': 8313 state = 1; 8314 break; 8315 case '{': 8316 currentThing = ¤tValue; 8317 break; 8318 case '}': 8319 if(currentThing is ¤tValue) { 8320 rules ~= new CssStyle(currentRule, currentValue); 8321 8322 currentRule = ""; 8323 currentValue = ""; 8324 8325 currentThing = ¤tRule; 8326 } else { 8327 // idk what is going on here. 8328 // check sveit.com to reproduce 8329 currentRule = ""; 8330 currentValue = ""; 8331 } 8332 break; 8333 default: 8334 (*currentThing) ~= c; 8335 } 8336 break; 8337 case 1: // expecting * 8338 if(c == '*') 8339 state = 2; 8340 else { 8341 state = 0; 8342 (*currentThing) ~= "/" ~ c; 8343 } 8344 break; 8345 case 2: // inside comment 8346 if(c == '*') 8347 state = 3; 8348 break; 8349 case 3: // expecting / to end comment 8350 if(c == '/') 8351 state = 0; 8352 else 8353 state = 2; // it's just a comment so no need to append 8354 break; 8355 case 4: 8356 if(c == '{') 8357 state = 5; 8358 if(c == ';') 8359 state = 0; // just skipping import 8360 break; 8361 case 5: 8362 if(c == '}') 8363 state = 0; // skipping font face probably 8364 } 8365 } 8366 } 8367 8368 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8369 void apply(Document document) { 8370 foreach(rule; rules) { 8371 if(rule.originatingRule.length == 0) 8372 continue; // this shouldn't happen here in a stylesheet 8373 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8374 // note: this should be a different object than the inline style 8375 // since givenExplicitly is likely destroyed here 8376 auto current = element.computedStyle; 8377 8378 foreach(item; rule.properties) 8379 current.setValue(item.name, item.value, item.specificity); 8380 } 8381 } 8382 } 8383 } 8384 8385 8386 /// This is kinda private; just a little utility container for use by the ElementStream class. 8387 final class Stack(T) { 8388 this() { 8389 internalLength = 0; 8390 arr = initialBuffer[]; 8391 } 8392 8393 ///. 8394 void push(T t) { 8395 if(internalLength >= arr.length) { 8396 auto oldarr = arr; 8397 if(arr.length < 4096) 8398 arr = new T[arr.length * 2]; 8399 else 8400 arr = new T[arr.length + 4096]; 8401 arr[0 .. oldarr.length] = oldarr[]; 8402 } 8403 8404 arr[internalLength] = t; 8405 internalLength++; 8406 } 8407 8408 ///. 8409 T pop() { 8410 assert(internalLength); 8411 internalLength--; 8412 return arr[internalLength]; 8413 } 8414 8415 ///. 8416 T peek() { 8417 assert(internalLength); 8418 return arr[internalLength - 1]; 8419 } 8420 8421 ///. 8422 @property bool empty() { 8423 return internalLength ? false : true; 8424 } 8425 8426 ///. 8427 private T[] arr; 8428 private size_t internalLength; 8429 private T[64] initialBuffer; 8430 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8431 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8432 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8433 } 8434 8435 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8436 final class ElementStream { 8437 8438 ///. 8439 @property Element front() { 8440 return current.element; 8441 } 8442 8443 /// Use Element.tree instead. 8444 this(Element start) { 8445 current.element = start; 8446 current.childPosition = -1; 8447 isEmpty = false; 8448 stack = new Stack!(Current); 8449 } 8450 8451 /* 8452 Handle it 8453 handle its children 8454 8455 */ 8456 8457 ///. 8458 void popFront() { 8459 more: 8460 if(isEmpty) return; 8461 8462 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8463 8464 current.childPosition++; 8465 if(current.childPosition >= current.element.children.length) { 8466 if(stack.empty()) 8467 isEmpty = true; 8468 else { 8469 current = stack.pop(); 8470 goto more; 8471 } 8472 } else { 8473 stack.push(current); 8474 current.element = current.element.children[current.childPosition]; 8475 current.childPosition = -1; 8476 } 8477 } 8478 8479 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8480 void currentKilled() { 8481 if(stack.empty) // should never happen 8482 isEmpty = true; 8483 else { 8484 current = stack.pop(); 8485 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8486 } 8487 } 8488 8489 ///. 8490 @property bool empty() { 8491 return isEmpty; 8492 } 8493 8494 private: 8495 8496 struct Current { 8497 Element element; 8498 int childPosition; 8499 } 8500 8501 Current current; 8502 8503 Stack!(Current) stack; 8504 8505 bool isEmpty; 8506 } 8507 8508 8509 8510 // unbelievable. 8511 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8512 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8513 static import std.algorithm; 8514 auto found = std.algorithm.find(haystack, needle); 8515 if(found.length == 0) 8516 return -1; 8517 return haystack.length - found.length; 8518 } 8519 8520 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8521 assert(position < arr.length); 8522 T[] ret; 8523 ret.length = arr.length + what.length; 8524 int a = 0; 8525 foreach(i; arr[0..position+1]) 8526 ret[a++] = i; 8527 8528 foreach(i; what) 8529 ret[a++] = i; 8530 8531 foreach(i; arr[position+1..$]) 8532 ret[a++] = i; 8533 8534 return ret; 8535 } 8536 8537 package bool isInArray(T)(T item, T[] arr) { 8538 foreach(i; arr) 8539 if(item == i) 8540 return true; 8541 return false; 8542 } 8543 8544 private string[string] aadup(in string[string] arr) { 8545 string[string] ret; 8546 foreach(k, v; arr) 8547 ret[k] = v; 8548 return ret; 8549 } 8550 8551 // dom event support, if you want to use it 8552 8553 /// used for DOM events 8554 version(dom_with_events) 8555 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8556 8557 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8558 version(dom_with_events) 8559 class Event { 8560 this(string eventName, Element target) { 8561 this.eventName = eventName; 8562 this.srcElement = target; 8563 } 8564 8565 /// Prevents the default event handler (if there is one) from being called 8566 void preventDefault() { 8567 defaultPrevented = true; 8568 } 8569 8570 /// Stops the event propagation immediately. 8571 void stopPropagation() { 8572 propagationStopped = true; 8573 } 8574 8575 bool defaultPrevented; 8576 bool propagationStopped; 8577 string eventName; 8578 8579 Element srcElement; 8580 alias srcElement target; 8581 8582 Element relatedTarget; 8583 8584 int clientX; 8585 int clientY; 8586 8587 int button; 8588 8589 bool isBubbling; 8590 8591 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8592 void send() { 8593 if(srcElement is null) 8594 return; 8595 8596 auto e = srcElement; 8597 8598 if(eventName in e.bubblingEventHandlers) 8599 foreach(handler; e.bubblingEventHandlers[eventName]) 8600 handler(e, this); 8601 8602 if(!defaultPrevented) 8603 if(eventName in e.defaultEventHandlers) 8604 e.defaultEventHandlers[eventName](e, this); 8605 } 8606 8607 /// this dispatches the element using the capture -> target -> bubble process 8608 void dispatch() { 8609 if(srcElement is null) 8610 return; 8611 8612 // first capture, then bubble 8613 8614 Element[] chain; 8615 Element curr = srcElement; 8616 while(curr) { 8617 auto l = curr; 8618 chain ~= l; 8619 curr = curr.parentNode; 8620 8621 } 8622 8623 isBubbling = false; 8624 8625 foreach(e; chain.retro()) { 8626 if(eventName in e.capturingEventHandlers) 8627 foreach(handler; e.capturingEventHandlers[eventName]) 8628 handler(e, this); 8629 8630 // the default on capture should really be to always do nothing 8631 8632 //if(!defaultPrevented) 8633 // if(eventName in e.defaultEventHandlers) 8634 // e.defaultEventHandlers[eventName](e.element, this); 8635 8636 if(propagationStopped) 8637 break; 8638 } 8639 8640 isBubbling = true; 8641 if(!propagationStopped) 8642 foreach(e; chain) { 8643 if(eventName in e.bubblingEventHandlers) 8644 foreach(handler; e.bubblingEventHandlers[eventName]) 8645 handler(e, this); 8646 8647 if(propagationStopped) 8648 break; 8649 } 8650 8651 if(!defaultPrevented) 8652 foreach(e; chain) { 8653 if(eventName in e.defaultEventHandlers) 8654 e.defaultEventHandlers[eventName](e, this); 8655 } 8656 } 8657 } 8658 8659 struct FormFieldOptions { 8660 // usable for any 8661 8662 /// this is a regex pattern used to validate the field 8663 string pattern; 8664 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8665 bool isRequired; 8666 /// this is displayed as an example to the user 8667 string placeholder; 8668 8669 // usable for numeric ones 8670 8671 8672 // convenience methods to quickly get some options 8673 @property static FormFieldOptions none() { 8674 FormFieldOptions f; 8675 return f; 8676 } 8677 8678 static FormFieldOptions required() { 8679 FormFieldOptions f; 8680 f.isRequired = true; 8681 return f; 8682 } 8683 8684 static FormFieldOptions regex(string pattern, bool required = false) { 8685 FormFieldOptions f; 8686 f.pattern = pattern; 8687 f.isRequired = required; 8688 return f; 8689 } 8690 8691 static FormFieldOptions fromElement(Element e) { 8692 FormFieldOptions f; 8693 if(e.hasAttribute("required")) 8694 f.isRequired = true; 8695 if(e.hasAttribute("pattern")) 8696 f.pattern = e.pattern; 8697 if(e.hasAttribute("placeholder")) 8698 f.placeholder = e.placeholder; 8699 return f; 8700 } 8701 8702 Element applyToElement(Element e) { 8703 if(this.isRequired) 8704 e.required = "required"; 8705 if(this.pattern.length) 8706 e.pattern = this.pattern; 8707 if(this.placeholder.length) 8708 e.placeholder = this.placeholder; 8709 return e; 8710 } 8711 } 8712 8713 // this needs to look just like a string, but can expand as needed 8714 version(no_dom_stream) 8715 alias string Utf8Stream; 8716 else 8717 class Utf8Stream { 8718 protected: 8719 // these two should be overridden in subclasses to actually do the stream magic 8720 string getMore() { 8721 if(getMoreHelper !is null) 8722 return getMoreHelper(); 8723 return null; 8724 } 8725 8726 bool hasMore() { 8727 if(hasMoreHelper !is null) 8728 return hasMoreHelper(); 8729 return false; 8730 } 8731 // the rest should be ok 8732 8733 public: 8734 this(string d) { 8735 this.data = d; 8736 } 8737 8738 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8739 this.getMoreHelper = getMoreHelper; 8740 this.hasMoreHelper = hasMoreHelper; 8741 8742 if(hasMore()) 8743 this.data ~= getMore(); 8744 8745 stdout.flush(); 8746 } 8747 8748 @property final size_t length() { 8749 // the parser checks length primarily directly before accessing the next character 8750 // so this is the place we'll hook to append more if possible and needed. 8751 if(lastIdx + 1 >= data.length && hasMore()) { 8752 data ~= getMore(); 8753 } 8754 return data.length; 8755 } 8756 8757 final char opIndex(size_t idx) { 8758 if(idx > lastIdx) 8759 lastIdx = idx; 8760 return data[idx]; 8761 } 8762 8763 final string opSlice(size_t start, size_t end) { 8764 if(end > lastIdx) 8765 lastIdx = end; 8766 return data[start .. end]; 8767 } 8768 8769 final size_t opDollar() { 8770 return length(); 8771 } 8772 8773 final Utf8Stream opBinary(string op : "~")(string s) { 8774 this.data ~= s; 8775 return this; 8776 } 8777 8778 final Utf8Stream opOpAssign(string op : "~")(string s) { 8779 this.data ~= s; 8780 return this; 8781 } 8782 8783 final Utf8Stream opAssign(string rhs) { 8784 this.data = rhs; 8785 return this; 8786 } 8787 private: 8788 string data; 8789 8790 size_t lastIdx; 8791 8792 bool delegate() hasMoreHelper; 8793 string delegate() getMoreHelper; 8794 8795 8796 /+ 8797 // used to maybe clear some old stuff 8798 // you might have to remove elements parsed with it too since they can hold slices into the 8799 // old stuff, preventing gc 8800 void dropFront(int bytes) { 8801 posAdjustment += bytes; 8802 data = data[bytes .. $]; 8803 } 8804 8805 int posAdjustment; 8806 +/ 8807 } 8808 8809 void fillForm(T)(Form form, T obj, string name) { 8810 import arsd.database; 8811 fillData((k, v) => form.setValue(k, v), obj, name); 8812 } 8813 8814 8815 /+ 8816 /+ 8817 Syntax: 8818 8819 Tag: tagname#id.class 8820 Tree: Tag(Children, comma, separated...) 8821 Children: Tee or Variable 8822 Variable: $varname with optional |funcname following. 8823 8824 If a variable has a tree after it, it breaks the variable down: 8825 * if array, foreach it does the tree 8826 * if struct, it breaks down the member variables 8827 8828 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8829 +/ 8830 struct Stringplate { 8831 /++ 8832 8833 +/ 8834 this(string s) { 8835 8836 } 8837 8838 /++ 8839 8840 +/ 8841 Element expand(T...)(T vars) { 8842 return null; 8843 } 8844 } 8845 /// 8846 unittest { 8847 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8848 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8849 } 8850 +/ 8851 8852 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 8853 foreach(child; children) { 8854 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8855 // cool 8856 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 8857 // cool 8858 } else { 8859 // prolly block 8860 return false; 8861 } 8862 } 8863 return true; 8864 } 8865 8866 private bool isSimpleWhite(dchar c) { 8867 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8868 } 8869 8870 unittest { 8871 // Test for issue #120 8872 string s = `<html> 8873 <body> 8874 <P>AN 8875 <P>bubbles</P> 8876 <P>giggles</P> 8877 </body> 8878 </html>`; 8879 auto doc = new Document(); 8880 doc.parseUtf8(s, false, false); 8881 auto s2 = doc.toString(); 8882 assert( 8883 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8884 "paragraph order incorrect:\n" ~ s2); 8885 } 8886 8887 unittest { 8888 // test for suncarpet email dec 24 2019 8889 // arbitrary id asduiwh 8890 auto document = new Document("<html> 8891 <head> 8892 <meta charset=\"utf-8\"></meta> 8893 <title>Element.querySelector Test</title> 8894 </head> 8895 <body> 8896 <div id=\"foo\"> 8897 <div>Foo</div> 8898 <div>Bar</div> 8899 </div> 8900 </body> 8901 </html>"); 8902 8903 auto doc = document; 8904 8905 assert(doc.querySelectorAll("div div").length == 2); 8906 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8907 assert(doc.querySelectorAll("> html").length == 0); 8908 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8909 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8910 8911 8912 assert(doc.root.matches("html")); 8913 assert(!doc.root.matches("nothtml")); 8914 assert(doc.querySelector("#foo > div").matches("div")); 8915 assert(doc.querySelector("body > #foo").matches("#foo")); 8916 8917 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8918 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8919 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8920 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8921 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8922 8923 // also confirming the querySelector works via the mdn definition 8924 auto foo = doc.requireSelector("#foo"); 8925 assert(foo.querySelector("#foo > div") !is null); 8926 assert(foo.querySelector("body #foo > div") !is null); 8927 8928 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8929 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8930 //assert(foo.querySelectorAll("#foo > div").length == 2); 8931 } 8932 8933 unittest { 8934 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8935 auto document = new Document(`<article> 8936 <div id="div-01">Here is div-01 8937 <div id="div-02">Here is div-02 8938 <div id="div-03">Here is div-03</div> 8939 </div> 8940 </div> 8941 </article>`, true, true); 8942 8943 auto el = document.getElementById("div-03"); 8944 assert(el.closest("#div-02").id == "div-02"); 8945 assert(el.closest("div div").id == "div-03"); 8946 assert(el.closest("article > div").id == "div-01"); 8947 assert(el.closest(":not(div)").tagName == "article"); 8948 8949 assert(el.closest("p") is null); 8950 assert(el.closest("p, div") is el); 8951 } 8952 8953 unittest { 8954 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8955 auto document = new Document(`<test> 8956 <div class="foo"><p>cool</p><span>bar</span></div> 8957 <main><p>two</p></main> 8958 </test>`); 8959 8960 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8961 assert(document.querySelector("div:where(.foo)") !is null); 8962 } 8963 8964 unittest { 8965 immutable string html = q{ 8966 <root> 8967 <div class="roundedbox"> 8968 <table> 8969 <caption class="boxheader">Recent Reviews</caption> 8970 <tr> 8971 <th>Game</th> 8972 <th>User</th> 8973 <th>Rating</th> 8974 <th>Created</th> 8975 </tr> 8976 8977 <tr> 8978 <td>June 13, 2020 15:10</td> 8979 <td><a href="/reviews/8833">[Show]</a></td> 8980 </tr> 8981 8982 <tr> 8983 <td>June 13, 2020 15:02</td> 8984 <td><a href="/reviews/8832">[Show]</a></td> 8985 </tr> 8986 8987 <tr> 8988 <td>June 13, 2020 14:41</td> 8989 <td><a href="/reviews/8831">[Show]</a></td> 8990 </tr> 8991 </table> 8992 </div> 8993 </root> 8994 }; 8995 8996 auto doc = new Document(cast(string)html); 8997 // this should select the second table row, but... 8998 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8999 assert(rd !is null); 9000 assert(rd.href == "/reviews/8832"); 9001 9002 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9003 assert(rd !is null); 9004 assert(rd.href == "/reviews/8832"); 9005 } 9006 9007 unittest { 9008 try { 9009 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9010 assert(0); 9011 } catch(Exception e) { 9012 // good; it should throw an exception, not an error. 9013 } 9014 } 9015 9016 /* 9017 Copyright: Adam D. Ruppe, 2010 - 2021 9018 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9019 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9020 9021 Copyright Adam D. Ruppe 2010-2021. 9022 Distributed under the Boost Software License, Version 1.0. 9023 (See accompanying file LICENSE_1_0.txt or copy at 9024 http://www.boost.org/LICENSE_1_0.txt) 9025 */ 9026 9027