1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource { 98 /// Convenience method for web scraping. Requires [arsd.http2] to be 99 /// included in the build as well as [arsd.characterencodings]. 100 static Document fromUrl()(string url, bool strictMode = false) { 101 import arsd.http2; 102 auto client = new HttpClient(); 103 104 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 105 auto res = req.waitForCompletion(); 106 107 auto document = new Document(); 108 if(strictMode) { 109 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 110 } else { 111 document.parseGarbage(cast(string) res.content); 112 } 113 114 return document; 115 } 116 117 ///. 118 this(string data, bool caseSensitive = false, bool strict = false) { 119 parseUtf8(data, caseSensitive, strict); 120 } 121 122 /** 123 Creates an empty document. It has *nothing* in it at all. 124 */ 125 this() { 126 127 } 128 129 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 130 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 131 /// can chain it. 132 /// 133 /// Example: document["p"].innerText("hello").addClass("modified"); 134 /// 135 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 136 /// 137 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 138 /// 139 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 140 /// you could put in some kind of custom filter function tho. 141 ElementCollection opIndex(string selector) { 142 auto e = ElementCollection(this.root); 143 return e[selector]; 144 } 145 146 string _contentType = "text/html; charset=utf-8"; 147 148 /// If you're using this for some other kind of XML, you can 149 /// set the content type here. 150 /// 151 /// Note: this has no impact on the function of this class. 152 /// It is only used if the document is sent via a protocol like HTTP. 153 /// 154 /// This may be called by parse() if it recognizes the data. Otherwise, 155 /// if you don't set it, it assumes text/html; charset=utf-8. 156 @property string contentType(string mimeType) { 157 _contentType = mimeType; 158 return _contentType; 159 } 160 161 /// implementing the FileResource interface, useful for sending via 162 /// http automatically. 163 @property string filename() const { return null; } 164 165 /// implementing the FileResource interface, useful for sending via 166 /// http automatically. 167 override @property string contentType() const { 168 return _contentType; 169 } 170 171 /// implementing the FileResource interface; it calls toString. 172 override immutable(ubyte)[] getData() const { 173 return cast(immutable(ubyte)[]) this.toString(); 174 } 175 176 177 /// Concatenates any consecutive text nodes 178 /* 179 void normalize() { 180 181 } 182 */ 183 184 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 185 /// Call this before calling parse(). 186 187 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 188 void enableAddingSpecialTagsToDom() { 189 parseSawComment = (string) => true; 190 parseSawAspCode = (string) => true; 191 parseSawPhpCode = (string) => true; 192 parseSawQuestionInstruction = (string) => true; 193 parseSawBangInstruction = (string) => true; 194 } 195 196 /// If the parser sees a html comment, it will call this callback 197 /// <!-- comment --> will call parseSawComment(" comment ") 198 /// Return true if you want the node appended to the document. 199 bool delegate(string) parseSawComment; 200 201 /// If the parser sees <% asp code... %>, it will call this callback. 202 /// It will be passed "% asp code... %" or "%= asp code .. %" 203 /// Return true if you want the node appended to the document. 204 bool delegate(string) parseSawAspCode; 205 206 /// If the parser sees <?php php code... ?>, it will call this callback. 207 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 208 /// Note: dom.d cannot identify the other php <? code ?> short format. 209 /// Return true if you want the node appended to the document. 210 bool delegate(string) parseSawPhpCode; 211 212 /// if it sees a <?xxx> that is not php or asp 213 /// it calls this function with the contents. 214 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 215 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 216 /// Return true if you want the node appended to the document. 217 bool delegate(string) parseSawQuestionInstruction; 218 219 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 220 /// it calls this function with the contents. 221 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 222 /// Return true if you want the node appended to the document. 223 bool delegate(string) parseSawBangInstruction; 224 225 /// Given the kind of garbage you find on the Internet, try to make sense of it. 226 /// Equivalent to document.parse(data, false, false, null); 227 /// (Case-insensitive, non-strict, determine character encoding from the data.) 228 229 /// NOTE: this makes no attempt at added security. 230 /// 231 /// It is a template so it lazily imports characterencodings. 232 void parseGarbage()(string data) { 233 parse(data, false, false, null); 234 } 235 236 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 237 /// Will throw exceptions on things like unclosed tags. 238 void parseStrict(string data) { 239 parseStream(toUtf8Stream(data), true, true); 240 } 241 242 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 243 /// tag soup, but does NOT try to correct bad character encodings. 244 /// 245 /// They will still throw an exception. 246 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 247 parseStream(toUtf8Stream(data), caseSensitive, strict); 248 } 249 250 // this is a template so we get lazy import behavior 251 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 252 import arsd.characterencodings; 253 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 254 if(dataEncoding is null) { 255 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 256 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 257 // Now, XML and HTML can both list encoding in the document, but we can't really parse 258 // it here without changing a lot of code until we know the encoding. So I'm going to 259 // do some hackish string checking. 260 if(dataEncoding is null) { 261 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 262 // first, look for an XML prolog 263 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 264 if(idx != -1) { 265 idx += "encoding=\"".length; 266 // we're probably past the prolog if it's this far in; we might be looking at 267 // content. Forget about it. 268 if(idx > 100) 269 idx = -1; 270 } 271 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 272 if(idx == -1) { 273 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 274 if(idx != -1) { 275 idx += "charset=".length; 276 if(dataAsBytes[idx] == '"') 277 idx++; 278 } 279 } 280 281 // found something in either branch... 282 if(idx != -1) { 283 // read till a quote or about 12 chars, whichever comes first... 284 auto end = idx; 285 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 286 end++; 287 288 dataEncoding = cast(string) dataAsBytes[idx .. end]; 289 } 290 // otherwise, we just don't know. 291 } 292 } 293 294 if(dataEncoding is null) { 295 if(strict) 296 throw new MarkupException("I couldn't figure out the encoding of this document."); 297 else 298 // if we really don't know by here, it means we already tried UTF-8, 299 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 300 // tags... let's assume it's Windows-1252, since that's probably the most 301 // common aside from utf that wouldn't be labeled. 302 303 dataEncoding = "Windows 1252"; 304 } 305 306 // and now, go ahead and convert it. 307 308 string data; 309 310 if(!strict) { 311 // if we're in non-strict mode, we need to check 312 // the document for mislabeling too; sometimes 313 // web documents will say they are utf-8, but aren't 314 // actually properly encoded. If it fails to validate, 315 // we'll assume it's actually Windows encoding - the most 316 // likely candidate for mislabeled garbage. 317 dataEncoding = dataEncoding.toLower(); 318 dataEncoding = dataEncoding.replace(" ", ""); 319 dataEncoding = dataEncoding.replace("-", ""); 320 dataEncoding = dataEncoding.replace("_", ""); 321 if(dataEncoding == "utf8") { 322 try { 323 validate(rawdata); 324 } catch(UTFException e) { 325 dataEncoding = "Windows 1252"; 326 } 327 } 328 } 329 330 if(dataEncoding != "UTF-8") { 331 if(strict) 332 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 333 else { 334 try { 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 336 } catch(Exception e) { 337 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 338 } 339 } 340 } else 341 data = rawdata; 342 343 return toUtf8Stream(data); 344 } 345 346 private 347 Utf8Stream toUtf8Stream(in string rawdata) { 348 string data = rawdata; 349 static if(is(Utf8Stream == string)) 350 return data; 351 else 352 return new Utf8Stream(data); 353 } 354 355 /++ 356 List of elements that can be assumed to be self-closed 357 in this document. The default for a Document are a hard-coded 358 list of ones appropriate for HTML. For [XmlDocument], it defaults 359 to empty. You can modify this after construction but before parsing. 360 361 History: 362 Added February 8, 2021 (included in dub release 9.2) 363 +/ 364 string[] selfClosedElements = htmlSelfClosedElements; 365 366 /** 367 Take XMLish data and try to make the DOM tree out of it. 368 369 The goal isn't to be perfect, but to just be good enough to 370 approximate Javascript's behavior. 371 372 If strict, it throws on something that doesn't make sense. 373 (Examples: mismatched tags. It doesn't validate!) 374 If not strict, it tries to recover anyway, and only throws 375 when something is REALLY unworkable. 376 377 If strict is false, it uses a magic list of tags that needn't 378 be closed. If you are writing a document specifically for this, 379 try to avoid such - use self closed tags at least. Easier to parse. 380 381 The dataEncoding argument can be used to pass a specific 382 charset encoding for automatic conversion. If null (which is NOT 383 the default!), it tries to determine from the data itself, 384 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 385 386 If this assumption is wrong, it can throw on non-ascii 387 characters! 388 389 390 Note that it previously assumed the data was encoded as UTF-8, which 391 is why the dataEncoding argument defaults to that. 392 393 So it shouldn't break backward compatibility. 394 395 But, if you want the best behavior on wild data - figuring it out from the document 396 instead of assuming - you'll probably want to change that argument to null. 397 398 This is a template so it lazily imports arsd.characterencodings, which is required 399 to fix up data encodings. 400 401 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 402 dependency. If it is data from the Internet though, a random website, the encoding 403 is often a lie. This function, if dataEncoding == null, can correct for that, or 404 you can try parseGarbage. In those cases, arsd.characterencodings is required to 405 compile. 406 */ 407 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 408 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 409 parseStream(data, caseSensitive, strict); 410 } 411 412 // note: this work best in strict mode, unless data is just a simple string wrapper 413 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 414 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 415 // of my big app. 416 417 assert(data !is null); 418 419 // go through character by character. 420 // if you see a <, consider it a tag. 421 // name goes until the first non tagname character 422 // then see if it self closes or has an attribute 423 424 // if not in a tag, anything not a tag is a big text 425 // node child. It ends as soon as it sees a < 426 427 // Whitespace in text or attributes is preserved, but not between attributes 428 429 // & and friends are converted when I know them, left the same otherwise 430 431 432 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 433 //validate(data); // it *must* be UTF-8 for this to work correctly 434 435 sizediff_t pos = 0; 436 437 clear(); 438 439 loose = !caseSensitive; 440 441 bool sawImproperNesting = false; 442 bool paragraphHackfixRequired = false; 443 444 int getLineNumber(sizediff_t p) { 445 int line = 1; 446 foreach(c; data[0..p]) 447 if(c == '\n') 448 line++; 449 return line; 450 } 451 452 void parseError(string message) { 453 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 454 } 455 456 bool eatWhitespace() { 457 bool ateAny = false; 458 while(pos < data.length && data[pos].isSimpleWhite) { 459 pos++; 460 ateAny = true; 461 } 462 return ateAny; 463 } 464 465 string readTagName() { 466 // remember to include : for namespaces 467 // basically just keep going until >, /, or whitespace 468 auto start = pos; 469 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 470 { 471 pos++; 472 if(pos == data.length) { 473 if(strict) 474 throw new Exception("tag name incomplete when file ended"); 475 else 476 break; 477 } 478 } 479 480 if(!caseSensitive) 481 return toLower(data[start..pos]); 482 else 483 return data[start..pos]; 484 } 485 486 string readAttributeName() { 487 // remember to include : for namespaces 488 // basically just keep going until >, /, or whitespace 489 auto start = pos; 490 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 491 { 492 if(data[pos] == '<') { 493 if(strict) 494 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 495 else 496 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 497 } 498 pos++; 499 if(pos == data.length) { 500 if(strict) 501 throw new Exception("unterminated attribute name"); 502 else 503 break; 504 } 505 } 506 507 if(!caseSensitive) 508 return toLower(data[start..pos]); 509 else 510 return data[start..pos]; 511 } 512 513 string readAttributeValue() { 514 if(pos >= data.length) { 515 if(strict) 516 throw new Exception("no attribute value before end of file"); 517 else 518 return null; 519 } 520 switch(data[pos]) { 521 case '\'': 522 case '"': 523 auto started = pos; 524 char end = data[pos]; 525 pos++; 526 auto start = pos; 527 while(pos < data.length && data[pos] != end) 528 pos++; 529 if(strict && pos == data.length) 530 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 531 string v = htmlEntitiesDecode(data[start..pos], strict); 532 pos++; // skip over the end 533 return v; 534 default: 535 if(strict) 536 parseError("Attributes must be quoted"); 537 // read until whitespace or terminator (/> or >) 538 auto start = pos; 539 while( 540 pos < data.length && 541 data[pos] != '>' && 542 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 543 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 544 !data[pos].isSimpleWhite) 545 pos++; 546 547 string v = htmlEntitiesDecode(data[start..pos], strict); 548 // don't skip the end - we'll need it later 549 return v; 550 } 551 } 552 553 TextNode readTextNode() { 554 auto start = pos; 555 while(pos < data.length && data[pos] != '<') { 556 pos++; 557 } 558 559 return TextNode.fromUndecodedString(this, data[start..pos]); 560 } 561 562 // this is obsolete! 563 RawSource readCDataNode() { 564 auto start = pos; 565 while(pos < data.length && data[pos] != '<') { 566 pos++; 567 } 568 569 return new RawSource(this, data[start..pos]); 570 } 571 572 573 struct Ele { 574 int type; // element or closing tag or nothing 575 /* 576 type == 0 means regular node, self-closed (element is valid) 577 type == 1 means closing tag (payload is the tag name, element may be valid) 578 type == 2 means you should ignore it completely 579 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 580 type == 4 means the document was totally empty 581 */ 582 Element element; // for type == 0 or type == 3 583 string payload; // for type == 1 584 } 585 // recursively read a tag 586 Ele readElement(string[] parentChain = null) { 587 // FIXME: this is the slowest function in this module, by far, even in strict mode. 588 // Loose mode should perform decently, but strict mode is the important one. 589 if(!strict && parentChain is null) 590 parentChain = []; 591 592 static string[] recentAutoClosedTags; 593 594 if(pos >= data.length) 595 { 596 if(strict) { 597 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 598 } else { 599 if(parentChain.length) 600 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 601 else 602 return Ele(4); // signal emptiness upstream 603 } 604 } 605 606 if(data[pos] != '<') { 607 return Ele(0, readTextNode(), null); 608 } 609 610 enforce(data[pos] == '<'); 611 pos++; 612 if(pos == data.length) { 613 if(strict) 614 throw new MarkupException("Found trailing < at end of file"); 615 // if not strict, we'll just skip the switch 616 } else 617 switch(data[pos]) { 618 // I don't care about these, so I just want to skip them 619 case '!': // might be a comment, a doctype, or a special instruction 620 pos++; 621 622 // FIXME: we should store these in the tree too 623 // though I like having it stripped out tbh. 624 625 if(pos == data.length) { 626 if(strict) 627 throw new MarkupException("<! opened at end of file"); 628 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 629 // comment 630 pos += 2; 631 632 // FIXME: technically, a comment is anything 633 // between -- and -- inside a <!> block. 634 // so in <!-- test -- lol> , the " lol" is NOT a comment 635 // and should probably be handled differently in here, but for now 636 // I'll just keep running until --> since that's the common way 637 638 auto commentStart = pos; 639 while(pos+3 < data.length && data[pos..pos+3] != "-->") 640 pos++; 641 642 auto end = commentStart; 643 644 if(pos + 3 >= data.length) { 645 if(strict) 646 throw new MarkupException("unclosed comment"); 647 end = data.length; 648 pos = data.length; 649 } else { 650 end = pos; 651 assert(data[pos] == '-'); 652 pos++; 653 assert(data[pos] == '-'); 654 pos++; 655 assert(data[pos] == '>'); 656 pos++; 657 } 658 659 if(parseSawComment !is null) 660 if(parseSawComment(data[commentStart .. end])) { 661 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 662 } 663 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 664 pos += 7; 665 666 auto cdataStart = pos; 667 668 ptrdiff_t end = -1; 669 typeof(end) cdataEnd; 670 671 if(pos < data.length) { 672 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 673 end = data[pos .. $].indexOf("]]>"); 674 } 675 676 if(end == -1) { 677 if(strict) 678 throw new MarkupException("Unclosed CDATA section"); 679 end = pos; 680 cdataEnd = pos; 681 } else { 682 cdataEnd = pos + end; 683 pos = cdataEnd + 3; 684 } 685 686 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 687 } else { 688 auto start = pos; 689 while(pos < data.length && data[pos] != '>') 690 pos++; 691 692 auto bangEnds = pos; 693 if(pos == data.length) { 694 if(strict) 695 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 696 } else pos++; // skipping the > 697 698 if(parseSawBangInstruction !is null) 699 if(parseSawBangInstruction(data[start .. bangEnds])) { 700 // FIXME: these should be able to modify the parser state, 701 // doing things like adding entities, somehow. 702 703 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 704 } 705 } 706 707 /* 708 if(pos < data.length && data[pos] == '>') 709 pos++; // skip the > 710 else 711 assert(!strict); 712 */ 713 break; 714 case '%': 715 case '?': 716 /* 717 Here's what we want to support: 718 719 <% asp code %> 720 <%= asp code %> 721 <?php php code ?> 722 <?= php code ?> 723 724 The contents don't really matter, just if it opens with 725 one of the above for, it ends on the two char terminator. 726 727 <?something> 728 this is NOT php code 729 because I've seen this in the wild: <?EM-dummyText> 730 731 This could be php with shorttags which would be cut off 732 prematurely because if(a >) - that > counts as the close 733 of the tag, but since dom.d can't tell the difference 734 between that and the <?EM> real world example, it will 735 not try to look for the ?> ending. 736 737 The difference between this and the asp/php stuff is that it 738 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 739 on >. 740 */ 741 742 char end = data[pos]; 743 auto started = pos; 744 bool isAsp = end == '%'; 745 int currentIndex = 0; 746 bool isPhp = false; 747 bool isEqualTag = false; 748 int phpCount = 0; 749 750 more: 751 pos++; // skip the start 752 if(pos == data.length) { 753 if(strict) 754 throw new MarkupException("Unclosed <"~end~" by end of file"); 755 } else { 756 currentIndex++; 757 if(currentIndex == 1 && data[pos] == '=') { 758 if(!isAsp) 759 isPhp = true; 760 isEqualTag = true; 761 goto more; 762 } 763 if(currentIndex == 1 && data[pos] == 'p') 764 phpCount++; 765 if(currentIndex == 2 && data[pos] == 'h') 766 phpCount++; 767 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 768 isPhp = true; 769 770 if(data[pos] == '>') { 771 if((isAsp || isPhp) && data[pos - 1] != end) 772 goto more; 773 // otherwise we're done 774 } else 775 goto more; 776 } 777 778 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 779 auto code = data[started .. pos]; 780 781 782 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 783 if(pos < data.length) 784 pos++; // get past the > 785 786 if(isAsp && parseSawAspCode !is null) { 787 if(parseSawAspCode(code)) { 788 return Ele(3, new AspCode(this, code), null); 789 } 790 } else if(isPhp && parseSawPhpCode !is null) { 791 if(parseSawPhpCode(code)) { 792 return Ele(3, new PhpCode(this, code), null); 793 } 794 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 795 if(parseSawQuestionInstruction(code)) { 796 return Ele(3, new QuestionInstruction(this, code), null); 797 } 798 } 799 break; 800 case '/': // closing an element 801 pos++; // skip the start 802 auto p = pos; 803 while(pos < data.length && data[pos] != '>') 804 pos++; 805 //writefln("</%s>", data[p..pos]); 806 if(pos == data.length && data[pos-1] != '>') { 807 if(strict) 808 throw new MarkupException("File ended before closing tag had a required >"); 809 else 810 data ~= ">"; // just hack it in 811 } 812 pos++; // skip the '>' 813 814 string tname = data[p..pos-1]; 815 if(!caseSensitive) 816 tname = tname.toLower(); 817 818 return Ele(1, null, tname); // closing tag reports itself here 819 case ' ': // assume it isn't a real element... 820 if(strict) { 821 parseError("bad markup - improperly placed <"); 822 assert(0); // parseError always throws 823 } else 824 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 825 default: 826 827 if(!strict) { 828 // what about something that kinda looks like a tag, but isn't? 829 auto nextTag = data[pos .. $].indexOf("<"); 830 auto closeTag = data[pos .. $].indexOf(">"); 831 if(closeTag != -1 && nextTag != -1) 832 if(nextTag < closeTag) { 833 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 834 835 auto equal = data[pos .. $].indexOf("=\""); 836 if(equal != -1 && equal < closeTag) { 837 // this MIGHT be ok, soldier on 838 } else { 839 // definitely no good, this must be a (horribly distorted) text node 840 pos++; // skip the < we're on - don't want text node to end prematurely 841 auto node = readTextNode(); 842 node.contents = "<" ~ node.contents; // put this back 843 return Ele(0, node, null); 844 } 845 } 846 } 847 848 string tagName = readTagName(); 849 string[string] attributes; 850 851 Ele addTag(bool selfClosed) { 852 if(selfClosed) 853 pos++; 854 else { 855 if(!strict) 856 if(tagName.isInArray(selfClosedElements)) 857 // these are de-facto self closed 858 selfClosed = true; 859 } 860 861 import std.algorithm.comparison; 862 863 if(strict) { 864 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 865 } else { 866 // if we got here, it's probably because a slash was in an 867 // unquoted attribute - don't trust the selfClosed value 868 if(!selfClosed) 869 selfClosed = tagName.isInArray(selfClosedElements); 870 871 while(pos < data.length && data[pos] != '>') 872 pos++; 873 874 if(pos >= data.length) { 875 // the tag never closed 876 assert(data.length != 0); 877 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 878 } 879 } 880 881 auto whereThisTagStarted = pos; // for better error messages 882 883 pos++; 884 885 auto e = createElement(tagName); 886 e.attributes = attributes; 887 version(dom_node_indexes) { 888 if(e.dataset.nodeIndex.length == 0) 889 e.dataset.nodeIndex = to!string(&(e.attributes)); 890 } 891 e.selfClosed = selfClosed; 892 e.parseAttributes(); 893 894 895 // HACK to handle script and style as a raw data section as it is in HTML browsers 896 if(tagName == "script" || tagName == "style") { 897 if(!selfClosed) { 898 string closer = "</" ~ tagName ~ ">"; 899 ptrdiff_t ending; 900 if(pos >= data.length) 901 ending = -1; 902 else 903 ending = indexOf(data[pos..$], closer); 904 905 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 906 /* 907 if(loose && ending == -1 && pos < data.length) 908 ending = indexOf(data[pos..$], closer.toUpper()); 909 */ 910 if(ending == -1) { 911 if(strict) 912 throw new Exception("tag " ~ tagName ~ " never closed"); 913 else { 914 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 915 if(pos < data.length) { 916 e = new TextNode(this, data[pos .. $]); 917 pos = data.length; 918 } 919 } 920 } else { 921 ending += pos; 922 e.innerRawSource = data[pos..ending]; 923 pos = ending + closer.length; 924 } 925 } 926 return Ele(0, e, null); 927 } 928 929 bool closed = selfClosed; 930 931 void considerHtmlParagraphHack(Element n) { 932 assert(!strict); 933 if(e.tagName == "p" && e.tagName == n.tagName) { 934 // html lets you write <p> para 1 <p> para 1 935 // but in the dom tree, they should be siblings, not children. 936 paragraphHackfixRequired = true; 937 } 938 } 939 940 //writef("<%s>", tagName); 941 while(!closed) { 942 Ele n; 943 if(strict) 944 n = readElement(); 945 else 946 n = readElement(parentChain ~ tagName); 947 948 if(n.type == 4) return n; // the document is empty 949 950 if(n.type == 3 && n.element !is null) { 951 // special node, append if possible 952 if(e !is null) 953 e.appendChild(n.element); 954 else 955 piecesBeforeRoot ~= n.element; 956 } else if(n.type == 0) { 957 if(!strict) 958 considerHtmlParagraphHack(n.element); 959 e.appendChild(n.element); 960 } else if(n.type == 1) { 961 bool found = false; 962 if(n.payload != tagName) { 963 if(strict) 964 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 965 else { 966 sawImproperNesting = true; 967 // this is so we don't drop several levels of awful markup 968 if(n.element) { 969 if(!strict) 970 considerHtmlParagraphHack(n.element); 971 e.appendChild(n.element); 972 n.element = null; 973 } 974 975 // is the element open somewhere up the chain? 976 foreach(i, parent; parentChain) 977 if(parent == n.payload) { 978 recentAutoClosedTags ~= tagName; 979 // just rotating it so we don't inadvertently break stuff with vile crap 980 if(recentAutoClosedTags.length > 4) 981 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 982 983 n.element = e; 984 return n; 985 } 986 987 // if not, this is a text node; we can't fix it up... 988 989 // If it's already in the tree somewhere, assume it is closed by algorithm 990 // and we shouldn't output it - odds are the user just flipped a couple tags 991 foreach(ele; e.tree) { 992 if(ele.tagName == n.payload) { 993 found = true; 994 break; 995 } 996 } 997 998 foreach(ele; recentAutoClosedTags) { 999 if(ele == n.payload) { 1000 found = true; 1001 break; 1002 } 1003 } 1004 1005 if(!found) // if not found in the tree though, it's probably just text 1006 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1007 } 1008 } else { 1009 if(n.element) { 1010 if(!strict) 1011 considerHtmlParagraphHack(n.element); 1012 e.appendChild(n.element); 1013 } 1014 } 1015 1016 if(n.payload == tagName) // in strict mode, this is always true 1017 closed = true; 1018 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1019 } 1020 //writef("</%s>\n", tagName); 1021 return Ele(0, e, null); 1022 } 1023 1024 // if a tag was opened but not closed by end of file, we can arrive here 1025 if(!strict && pos >= data.length) 1026 return addTag(false); 1027 //else if(strict) assert(0); // should be caught before 1028 1029 switch(data[pos]) { 1030 default: assert(0); 1031 case '/': // self closing tag 1032 return addTag(true); 1033 case '>': 1034 return addTag(false); 1035 case ' ': 1036 case '\t': 1037 case '\n': 1038 case '\r': 1039 // there might be attributes... 1040 moreAttributes: 1041 eatWhitespace(); 1042 1043 // same deal as above the switch.... 1044 if(!strict && pos >= data.length) 1045 return addTag(false); 1046 1047 if(strict && pos >= data.length) 1048 throw new MarkupException("tag open, didn't find > before end of file"); 1049 1050 switch(data[pos]) { 1051 case '/': // self closing tag 1052 return addTag(true); 1053 case '>': // closed tag; open -- we now read the contents 1054 return addTag(false); 1055 default: // it is an attribute 1056 string attrName = readAttributeName(); 1057 string attrValue = attrName; 1058 1059 bool ateAny = eatWhitespace(); 1060 if(strict && ateAny) 1061 throw new MarkupException("inappropriate whitespace after attribute name"); 1062 1063 if(pos >= data.length) { 1064 if(strict) 1065 assert(0, "this should have thrown in readAttributeName"); 1066 else { 1067 data ~= ">"; 1068 goto blankValue; 1069 } 1070 } 1071 if(data[pos] == '=') { 1072 pos++; 1073 1074 ateAny = eatWhitespace(); 1075 // the spec actually allows this! 1076 //if(strict && ateAny) 1077 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1078 1079 attrValue = readAttributeValue(); 1080 1081 eatWhitespace(); 1082 } 1083 1084 blankValue: 1085 1086 if(strict && attrName in attributes) 1087 throw new MarkupException("Repeated attribute: " ~ attrName); 1088 1089 if(attrName.strip().length) 1090 attributes[attrName] = attrValue; 1091 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1092 1093 if(!strict && pos < data.length && data[pos] == '<') { 1094 // this is the broken tag that doesn't have a > at the end 1095 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1096 // let's insert one as a hack 1097 goto case '>'; 1098 } 1099 1100 goto moreAttributes; 1101 } 1102 } 1103 } 1104 1105 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1106 //assert(0); 1107 } 1108 1109 eatWhitespace(); 1110 Ele r; 1111 do { 1112 r = readElement(); // there SHOULD only be one element... 1113 1114 if(r.type == 3 && r.element !is null) 1115 piecesBeforeRoot ~= r.element; 1116 1117 if(r.type == 4) 1118 break; // the document is completely empty... 1119 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1120 1121 root = r.element; 1122 1123 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1124 while(r.type != 4) { 1125 r = readElement(); 1126 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1127 if(r.element !is null) 1128 piecesAfterRoot ~= r.element; 1129 } 1130 } 1131 1132 if(root is null) 1133 { 1134 if(strict) 1135 assert(0, "empty document should be impossible in strict mode"); 1136 else 1137 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1138 } 1139 1140 if(paragraphHackfixRequired) { 1141 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1142 1143 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1144 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1145 1146 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1147 // Kind of inefficient because we can't detect when we recurse back out of a node. 1148 Element[Element] insertLocations; 1149 auto iterator = root.tree; 1150 foreach(ele; iterator) { 1151 if(ele.parentNode is null) 1152 continue; 1153 1154 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1155 auto shouldBePreviousSibling = ele.parentNode; 1156 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1157 if (auto p = holder in insertLocations) { 1158 shouldBePreviousSibling = *p; 1159 assert(shouldBePreviousSibling.parentNode is holder); 1160 } 1161 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1162 insertLocations[holder] = ele; 1163 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1164 } 1165 } 1166 } 1167 } 1168 1169 /* end massive parse function */ 1170 1171 /// Gets the <title> element's innerText, if one exists 1172 @property string title() { 1173 bool doesItMatch(Element e) { 1174 return (e.tagName == "title"); 1175 } 1176 1177 auto e = findFirst(&doesItMatch); 1178 if(e) 1179 return e.innerText(); 1180 return ""; 1181 } 1182 1183 /// Sets the title of the page, creating a <title> element if needed. 1184 @property void title(string t) { 1185 bool doesItMatch(Element e) { 1186 return (e.tagName == "title"); 1187 } 1188 1189 auto e = findFirst(&doesItMatch); 1190 1191 if(!e) { 1192 e = createElement("title"); 1193 auto heads = getElementsByTagName("head"); 1194 if(heads.length) 1195 heads[0].appendChild(e); 1196 } 1197 1198 if(e) 1199 e.innerText = t; 1200 } 1201 1202 // FIXME: would it work to alias root this; ???? might be a good idea 1203 /// These functions all forward to the root element. See the documentation in the Element class. 1204 Element getElementById(string id) { 1205 return root.getElementById(id); 1206 } 1207 1208 /// ditto 1209 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1210 if( is(SomeElementType : Element)) 1211 out(ret) { assert(ret !is null); } 1212 body { 1213 return root.requireElementById!(SomeElementType)(id, file, line); 1214 } 1215 1216 /// ditto 1217 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1218 if( is(SomeElementType : Element)) 1219 out(ret) { assert(ret !is null); } 1220 body { 1221 auto e = cast(SomeElementType) querySelector(selector); 1222 if(e is null) 1223 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1224 return e; 1225 } 1226 1227 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1228 if(is(SomeElementType : Element)) 1229 { 1230 auto e = cast(SomeElementType) querySelector(selector); 1231 return MaybeNullElement!SomeElementType(e); 1232 } 1233 1234 /// ditto 1235 @scriptable 1236 Element querySelector(string selector) { 1237 // see comment below on Document.querySelectorAll 1238 auto s = Selector(selector);//, !loose); 1239 foreach(ref comp; s.components) 1240 if(comp.parts.length && comp.parts[0].separation == 0) 1241 comp.parts[0].separation = -1; 1242 foreach(e; s.getMatchingElementsLazy(this.root)) 1243 return e; 1244 return null; 1245 1246 } 1247 1248 /// ditto 1249 @scriptable 1250 Element[] querySelectorAll(string selector) { 1251 // In standards-compliant code, the document is slightly magical 1252 // in that it is a pseudoelement at top level. It should actually 1253 // match the root as one of its children. 1254 // 1255 // In versions of dom.d before Dec 29 2019, this worked because 1256 // querySelectorAll was willing to return itself. With that bug fix 1257 // (search "arbitrary id asduiwh" in this file for associated unittest) 1258 // this would have failed. Hence adding back the root if it matches the 1259 // selector itself. 1260 // 1261 // I'd love to do this better later. 1262 1263 auto s = Selector(selector);//, !loose); 1264 foreach(ref comp; s.components) 1265 if(comp.parts.length && comp.parts[0].separation == 0) 1266 comp.parts[0].separation = -1; 1267 return s.getMatchingElements(this.root); 1268 } 1269 1270 /// ditto 1271 deprecated("use querySelectorAll instead") 1272 Element[] getElementsBySelector(string selector) { 1273 return root.getElementsBySelector(selector); 1274 } 1275 1276 /// ditto 1277 @scriptable 1278 Element[] getElementsByTagName(string tag) { 1279 return root.getElementsByTagName(tag); 1280 } 1281 1282 /// ditto 1283 @scriptable 1284 Element[] getElementsByClassName(string tag) { 1285 return root.getElementsByClassName(tag); 1286 } 1287 1288 /** FIXME: btw, this could just be a lazy range...... */ 1289 Element getFirstElementByTagName(string tag) { 1290 if(loose) 1291 tag = tag.toLower(); 1292 bool doesItMatch(Element e) { 1293 return e.tagName == tag; 1294 } 1295 return findFirst(&doesItMatch); 1296 } 1297 1298 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1299 Element mainBody() { 1300 return getFirstElementByTagName("body"); 1301 } 1302 1303 /// this uses a weird thing... it's [name=] if no colon and 1304 /// [property=] if colon 1305 string getMeta(string name) { 1306 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1307 auto e = querySelector("head meta["~thing~"="~name~"]"); 1308 if(e is null) 1309 return null; 1310 return e.content; 1311 } 1312 1313 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1314 void setMeta(string name, string value) { 1315 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1316 auto e = querySelector("head meta["~thing~"="~name~"]"); 1317 if(e is null) { 1318 e = requireSelector("head").addChild("meta"); 1319 e.setAttribute(thing, name); 1320 } 1321 1322 e.content = value; 1323 } 1324 1325 ///. 1326 Form[] forms() { 1327 return cast(Form[]) getElementsByTagName("form"); 1328 } 1329 1330 ///. 1331 Form createForm() 1332 out(ret) { 1333 assert(ret !is null); 1334 } 1335 body { 1336 return cast(Form) createElement("form"); 1337 } 1338 1339 ///. 1340 Element createElement(string name) { 1341 if(loose) 1342 name = name.toLower(); 1343 1344 auto e = Element.make(name, null, null, selfClosedElements); 1345 e.parentDocument = this; 1346 1347 return e; 1348 1349 // return new Element(this, name, null, selfClosed); 1350 } 1351 1352 ///. 1353 Element createFragment() { 1354 return new DocumentFragment(this); 1355 } 1356 1357 ///. 1358 Element createTextNode(string content) { 1359 return new TextNode(this, content); 1360 } 1361 1362 1363 ///. 1364 Element findFirst(bool delegate(Element) doesItMatch) { 1365 if(root is null) 1366 return null; 1367 Element result; 1368 1369 bool goThroughElement(Element e) { 1370 if(doesItMatch(e)) { 1371 result = e; 1372 return true; 1373 } 1374 1375 foreach(child; e.children) { 1376 if(goThroughElement(child)) 1377 return true; 1378 } 1379 1380 return false; 1381 } 1382 1383 goThroughElement(root); 1384 1385 return result; 1386 } 1387 1388 ///. 1389 void clear() { 1390 root = null; 1391 loose = false; 1392 } 1393 1394 ///. 1395 void setProlog(string d) { 1396 _prolog = d; 1397 prologWasSet = true; 1398 } 1399 1400 ///. 1401 private string _prolog = "<!DOCTYPE html>\n"; 1402 private bool prologWasSet = false; // set to true if the user changed it 1403 1404 @property string prolog() const { 1405 // if the user explicitly changed it, do what they want 1406 // or if we didn't keep/find stuff from the document itself, 1407 // we'll use the builtin one as a default. 1408 if(prologWasSet || piecesBeforeRoot.length == 0) 1409 return _prolog; 1410 1411 string p; 1412 foreach(e; piecesBeforeRoot) 1413 p ~= e.toString() ~ "\n"; 1414 return p; 1415 } 1416 1417 ///. 1418 override string toString() const { 1419 return prolog ~ root.toString(); 1420 } 1421 1422 /++ 1423 Writes it out with whitespace for easier eyeball debugging 1424 1425 Do NOT use for anything other than eyeball debugging, 1426 because whitespace may be significant content in XML. 1427 +/ 1428 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1429 import std.string; 1430 string s = prolog.strip; 1431 1432 /* 1433 if(insertComments) s ~= "<!--"; 1434 s ~= "\n"; 1435 if(insertComments) s ~= "-->"; 1436 */ 1437 1438 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1439 foreach(a; piecesAfterRoot) 1440 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1441 return s; 1442 } 1443 1444 ///. 1445 Element root; 1446 1447 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1448 Element[] piecesBeforeRoot; 1449 1450 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1451 Element[] piecesAfterRoot; 1452 1453 ///. 1454 bool loose; 1455 1456 1457 1458 // what follows are for mutation events that you can observe 1459 void delegate(DomMutationEvent)[] eventObservers; 1460 1461 void dispatchMutationEvent(DomMutationEvent e) { 1462 foreach(o; eventObservers) 1463 o(e); 1464 } 1465 } 1466 1467 /// This represents almost everything in the DOM. 1468 /// Group: core_functionality 1469 class Element { 1470 /// Returns a collection of elements by selector. 1471 /// See: [Document.opIndex] 1472 ElementCollection opIndex(string selector) { 1473 auto e = ElementCollection(this); 1474 return e[selector]; 1475 } 1476 1477 /++ 1478 Returns the child node with the particular index. 1479 1480 Be aware that child nodes include text nodes, including 1481 whitespace-only nodes. 1482 +/ 1483 Element opIndex(size_t index) { 1484 if(index >= children.length) 1485 return null; 1486 return this.children[index]; 1487 } 1488 1489 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1490 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1491 if( 1492 is(SomeElementType : Element) 1493 ) 1494 out(ret) { 1495 assert(ret !is null); 1496 } 1497 body { 1498 auto e = cast(SomeElementType) getElementById(id); 1499 if(e is null) 1500 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1501 return e; 1502 } 1503 1504 /// ditto but with selectors instead of ids 1505 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1506 if( 1507 is(SomeElementType : Element) 1508 ) 1509 out(ret) { 1510 assert(ret !is null); 1511 } 1512 body { 1513 auto e = cast(SomeElementType) querySelector(selector); 1514 if(e is null) 1515 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1516 return e; 1517 } 1518 1519 1520 /++ 1521 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1522 +/ 1523 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1524 if(is(SomeElementType : Element)) 1525 { 1526 auto e = cast(SomeElementType) querySelector(selector); 1527 return MaybeNullElement!SomeElementType(e); 1528 } 1529 1530 1531 1532 /// get all the classes on this element 1533 @property string[] classes() { 1534 return split(className, " "); 1535 } 1536 1537 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1538 @scriptable 1539 Element addClass(string c) { 1540 if(hasClass(c)) 1541 return this; // don't add it twice 1542 1543 string cn = getAttribute("class"); 1544 if(cn.length == 0) { 1545 setAttribute("class", c); 1546 return this; 1547 } else { 1548 setAttribute("class", cn ~ " " ~ c); 1549 } 1550 1551 return this; 1552 } 1553 1554 /// Removes a particular class name. 1555 @scriptable 1556 Element removeClass(string c) { 1557 if(!hasClass(c)) 1558 return this; 1559 string n; 1560 foreach(name; classes) { 1561 if(c == name) 1562 continue; // cut it out 1563 if(n.length) 1564 n ~= " "; 1565 n ~= name; 1566 } 1567 1568 className = n.strip(); 1569 1570 return this; 1571 } 1572 1573 /// Returns whether the given class appears in this element. 1574 bool hasClass(string c) { 1575 string cn = className; 1576 1577 auto idx = cn.indexOf(c); 1578 if(idx == -1) 1579 return false; 1580 1581 foreach(cla; cn.split(" ")) 1582 if(cla == c) 1583 return true; 1584 return false; 1585 1586 /* 1587 int rightSide = idx + c.length; 1588 1589 bool checkRight() { 1590 if(rightSide == cn.length) 1591 return true; // it's the only class 1592 else if(iswhite(cn[rightSide])) 1593 return true; 1594 return false; // this is a substring of something else.. 1595 } 1596 1597 if(idx == 0) { 1598 return checkRight(); 1599 } else { 1600 if(!iswhite(cn[idx - 1])) 1601 return false; // substring 1602 return checkRight(); 1603 } 1604 1605 assert(0); 1606 */ 1607 } 1608 1609 1610 /* ******************************* 1611 DOM Mutation 1612 *********************************/ 1613 /// convenience function to quickly add a tag with some text or 1614 /// other relevant info (for example, it's a src for an <img> element 1615 /// instead of inner text) 1616 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1617 in { 1618 assert(tagName !is null); 1619 } 1620 out(e) { 1621 //assert(e.parentNode is this); 1622 //assert(e.parentDocument is this.parentDocument); 1623 } 1624 body { 1625 auto e = Element.make(tagName, childInfo, childInfo2); 1626 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1627 // return the parent. That will break existing code though. 1628 return appendChild(e); 1629 } 1630 1631 /// Another convenience function. Adds a child directly after the current one, returning 1632 /// the new child. 1633 /// 1634 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1635 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1636 in { 1637 assert(tagName !is null); 1638 assert(parentNode !is null); 1639 } 1640 out(e) { 1641 assert(e.parentNode is this.parentNode); 1642 assert(e.parentDocument is this.parentDocument); 1643 } 1644 body { 1645 auto e = Element.make(tagName, childInfo, childInfo2); 1646 return parentNode.insertAfter(this, e); 1647 } 1648 1649 /// 1650 Element addSibling(Element e) { 1651 return parentNode.insertAfter(this, e); 1652 } 1653 1654 /// 1655 Element addChild(Element e) { 1656 return this.appendChild(e); 1657 } 1658 1659 /// Convenience function to append text intermixed with other children. 1660 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1661 /// or div.addChildren("Hello, ", user.name, "!"); 1662 1663 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1664 void addChildren(T...)(T t) { 1665 foreach(item; t) { 1666 static if(is(item : Element)) 1667 appendChild(item); 1668 else static if (is(isSomeString!(item))) 1669 appendText(to!string(item)); 1670 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1671 } 1672 } 1673 1674 ///. 1675 Element addChild(string tagName, Element firstChild, string info2 = null) 1676 in { 1677 assert(firstChild !is null); 1678 } 1679 out(ret) { 1680 assert(ret !is null); 1681 assert(ret.parentNode is this); 1682 assert(firstChild.parentNode is ret); 1683 1684 assert(ret.parentDocument is this.parentDocument); 1685 //assert(firstChild.parentDocument is this.parentDocument); 1686 } 1687 body { 1688 auto e = Element.make(tagName, "", info2); 1689 e.appendChild(firstChild); 1690 this.appendChild(e); 1691 return e; 1692 } 1693 1694 /// 1695 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1696 in { 1697 } 1698 out(ret) { 1699 assert(ret !is null); 1700 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1701 assert(ret.parentDocument is this.parentDocument); 1702 } 1703 body { 1704 auto e = Element.make(tagName, "", info2); 1705 this.appendChild(e); 1706 e.innerHTML = innerHtml.source; 1707 return e; 1708 } 1709 1710 1711 /// . 1712 void appendChildren(Element[] children) { 1713 foreach(ele; children) 1714 appendChild(ele); 1715 } 1716 1717 ///. 1718 void reparent(Element newParent) 1719 in { 1720 assert(newParent !is null); 1721 assert(parentNode !is null); 1722 } 1723 out { 1724 assert(this.parentNode is newParent); 1725 //assert(isInArray(this, newParent.children)); 1726 } 1727 body { 1728 parentNode.removeChild(this); 1729 newParent.appendChild(this); 1730 } 1731 1732 /** 1733 Strips this tag out of the document, putting its inner html 1734 as children of the parent. 1735 1736 For example, given: `<p>hello <b>there</b></p>`, if you 1737 call `stripOut` on the `b` element, you'll be left with 1738 `<p>hello there<p>`. 1739 1740 The idea here is to make it easy to get rid of garbage 1741 markup you aren't interested in. 1742 */ 1743 void stripOut() 1744 in { 1745 assert(parentNode !is null); 1746 } 1747 out { 1748 assert(parentNode is null); 1749 assert(children.length == 0); 1750 } 1751 body { 1752 foreach(c; children) 1753 c.parentNode = null; // remove the parent 1754 if(children.length) 1755 parentNode.replaceChild(this, this.children); 1756 else 1757 parentNode.removeChild(this); 1758 this.children.length = 0; // we reparented them all above 1759 } 1760 1761 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1762 /// if the element already isn't in a tree, it does nothing. 1763 Element removeFromTree() 1764 in { 1765 1766 } 1767 out(var) { 1768 assert(this.parentNode is null); 1769 assert(var is this); 1770 } 1771 body { 1772 if(this.parentNode is null) 1773 return this; 1774 1775 this.parentNode.removeChild(this); 1776 1777 return this; 1778 } 1779 1780 /++ 1781 Wraps this element inside the given element. 1782 It's like `this.replaceWith(what); what.appendchild(this);` 1783 1784 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1785 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1786 +/ 1787 Element wrapIn(Element what) 1788 in { 1789 assert(what !is null); 1790 } 1791 out(ret) { 1792 assert(this.parentNode is what); 1793 assert(ret is what); 1794 } 1795 body { 1796 this.replaceWith(what); 1797 what.appendChild(this); 1798 1799 return what; 1800 } 1801 1802 /// Replaces this element with something else in the tree. 1803 Element replaceWith(Element e) 1804 in { 1805 assert(this.parentNode !is null); 1806 } 1807 body { 1808 e.removeFromTree(); 1809 this.parentNode.replaceChild(this, e); 1810 return e; 1811 } 1812 1813 /** 1814 Splits the className into an array of each class given 1815 */ 1816 string[] classNames() const { 1817 return className().split(" "); 1818 } 1819 1820 /** 1821 Fetches the first consecutive text nodes concatenated together. 1822 1823 1824 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1825 1826 See_also: [directText], [innerText] 1827 */ 1828 string firstInnerText() const { 1829 string s; 1830 foreach(child; children) { 1831 if(child.nodeType != NodeType.Text) 1832 break; 1833 1834 s ~= child.nodeValue(); 1835 } 1836 return s; 1837 } 1838 1839 1840 /** 1841 Returns the text directly under this element. 1842 1843 1844 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1845 past child tags. So, `<example>some <b>bold</b> text</example>` 1846 will return `some text` because it only gets the text, skipping non-text children. 1847 1848 See_also: [firstInnerText], [innerText] 1849 */ 1850 @property string directText() { 1851 string ret; 1852 foreach(e; children) { 1853 if(e.nodeType == NodeType.Text) 1854 ret ~= e.nodeValue(); 1855 } 1856 1857 return ret; 1858 } 1859 1860 /** 1861 Sets the direct text, without modifying other child nodes. 1862 1863 1864 Unlike [innerText], this does *not* remove existing elements in the element. 1865 1866 It only replaces the first text node it sees. 1867 1868 If there are no text nodes, it calls [appendText]. 1869 1870 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1871 */ 1872 @property void directText(string text) { 1873 foreach(e; children) { 1874 if(e.nodeType == NodeType.Text) { 1875 auto it = cast(TextNode) e; 1876 it.contents = text; 1877 return; 1878 } 1879 } 1880 1881 appendText(text); 1882 } 1883 1884 // do nothing, this is primarily a virtual hook 1885 // for links and forms 1886 void setValue(string field, string value) { } 1887 1888 1889 // this is a thing so i can remove observer support if it gets slow 1890 // I have not implemented all these yet 1891 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1892 if(parentDocument is null) return; 1893 DomMutationEvent me; 1894 me.operation = operation; 1895 me.target = this; 1896 me.relatedString = s1; 1897 me.relatedString2 = s2; 1898 me.related = r; 1899 me.related2 = r2; 1900 parentDocument.dispatchMutationEvent(me); 1901 } 1902 1903 // putting all the members up front 1904 1905 // this ought to be private. don't use it directly. 1906 Element[] children; 1907 1908 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1909 string tagName; 1910 1911 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1912 string[string] attributes; 1913 1914 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1915 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1916 private bool selfClosed; 1917 1918 /// Get the parent Document object that contains this element. 1919 /// It may be null, so remember to check for that. 1920 Document parentDocument; 1921 1922 ///. 1923 inout(Element) parentNode() inout { 1924 auto p = _parentNode; 1925 1926 if(cast(DocumentFragment) p) 1927 return p._parentNode; 1928 1929 return p; 1930 } 1931 1932 //protected 1933 Element parentNode(Element e) { 1934 return _parentNode = e; 1935 } 1936 1937 private Element _parentNode; 1938 1939 // the next few methods are for implementing interactive kind of things 1940 private CssStyle _computedStyle; 1941 1942 // these are here for event handlers. Don't forget that this library never fires events. 1943 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1944 EventHandler[][string] bubblingEventHandlers; 1945 EventHandler[][string] capturingEventHandlers; 1946 EventHandler[string] defaultEventHandlers; 1947 1948 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1949 if(event.length > 2 && event[0..2] == "on") 1950 event = event[2 .. $]; 1951 1952 if(useCapture) 1953 capturingEventHandlers[event] ~= handler; 1954 else 1955 bubblingEventHandlers[event] ~= handler; 1956 } 1957 1958 1959 // and now methods 1960 1961 /++ 1962 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 1963 1964 History: 1965 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 1966 immutable global list for HTML. It still defaults to the same list, but you can change it now via 1967 the parameter. 1968 +/ 1969 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 1970 bool selfClosed = tagName.isInArray(selfClosedElements); 1971 1972 Element e; 1973 // want to create the right kind of object for the given tag... 1974 switch(tagName) { 1975 case "#text": 1976 e = new TextNode(null, childInfo); 1977 return e; 1978 // break; 1979 case "table": 1980 e = new Table(null); 1981 break; 1982 case "a": 1983 e = new Link(null); 1984 break; 1985 case "form": 1986 e = new Form(null); 1987 break; 1988 case "tr": 1989 e = new TableRow(null); 1990 break; 1991 case "td", "th": 1992 e = new TableCell(null, tagName); 1993 break; 1994 default: 1995 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1996 } 1997 1998 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1999 e.tagName = tagName; 2000 e.selfClosed = selfClosed; 2001 2002 if(childInfo !is null) 2003 switch(tagName) { 2004 /* html5 convenience tags */ 2005 case "audio": 2006 if(childInfo.length) 2007 e.addChild("source", childInfo); 2008 if(childInfo2 !is null) 2009 e.appendText(childInfo2); 2010 break; 2011 case "source": 2012 e.src = childInfo; 2013 if(childInfo2 !is null) 2014 e.type = childInfo2; 2015 break; 2016 /* regular html 4 stuff */ 2017 case "img": 2018 e.src = childInfo; 2019 if(childInfo2 !is null) 2020 e.alt = childInfo2; 2021 break; 2022 case "link": 2023 e.href = childInfo; 2024 if(childInfo2 !is null) 2025 e.rel = childInfo2; 2026 break; 2027 case "option": 2028 e.innerText = childInfo; 2029 if(childInfo2 !is null) 2030 e.value = childInfo2; 2031 break; 2032 case "input": 2033 e.type = "hidden"; 2034 e.name = childInfo; 2035 if(childInfo2 !is null) 2036 e.value = childInfo2; 2037 break; 2038 case "button": 2039 e.innerText = childInfo; 2040 if(childInfo2 !is null) 2041 e.type = childInfo2; 2042 break; 2043 case "a": 2044 e.innerText = childInfo; 2045 if(childInfo2 !is null) 2046 e.href = childInfo2; 2047 break; 2048 case "script": 2049 case "style": 2050 e.innerRawSource = childInfo; 2051 break; 2052 case "meta": 2053 e.name = childInfo; 2054 if(childInfo2 !is null) 2055 e.content = childInfo2; 2056 break; 2057 /* generically, assume we were passed text and perhaps class */ 2058 default: 2059 e.innerText = childInfo; 2060 if(childInfo2.length) 2061 e.className = childInfo2; 2062 } 2063 2064 return e; 2065 } 2066 2067 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2068 // FIXME: childInfo2 is ignored when info1 is null 2069 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2070 m.innerHTML = innerHtml.source; 2071 return m; 2072 } 2073 2074 static Element make(string tagName, Element child, string childInfo2 = null) { 2075 auto m = Element.make(tagName, cast(string) null, childInfo2); 2076 m.appendChild(child); 2077 return m; 2078 } 2079 2080 2081 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2082 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2083 parentDocument = _parentDocument; 2084 tagName = _tagName; 2085 if(_attributes !is null) 2086 attributes = _attributes; 2087 selfClosed = _selfClosed; 2088 2089 version(dom_node_indexes) 2090 this.dataset.nodeIndex = to!string(&(this.attributes)); 2091 2092 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2093 } 2094 2095 /++ 2096 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2097 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2098 2099 History: 2100 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2101 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2102 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2103 something here if you like. 2104 +/ 2105 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2106 tagName = _tagName; 2107 if(_attributes !is null) 2108 attributes = _attributes; 2109 selfClosed = tagName.isInArray(selfClosedElements); 2110 2111 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2112 //children.length = 8; 2113 //children.length = 0; 2114 2115 version(dom_node_indexes) 2116 this.dataset.nodeIndex = to!string(&(this.attributes)); 2117 } 2118 2119 private this(Document _parentDocument) { 2120 parentDocument = _parentDocument; 2121 2122 version(dom_node_indexes) 2123 this.dataset.nodeIndex = to!string(&(this.attributes)); 2124 } 2125 2126 2127 /* ******************************* 2128 Navigating the DOM 2129 *********************************/ 2130 2131 /// Returns the first child of this element. If it has no children, returns null. 2132 /// Remember, text nodes are children too. 2133 @property Element firstChild() { 2134 return children.length ? children[0] : null; 2135 } 2136 2137 /// 2138 @property Element lastChild() { 2139 return children.length ? children[$ - 1] : null; 2140 } 2141 2142 /// UNTESTED 2143 /// the next element you would encounter if you were reading it in the source 2144 Element nextInSource() { 2145 auto n = firstChild; 2146 if(n is null) 2147 n = nextSibling(); 2148 if(n is null) { 2149 auto p = this.parentNode; 2150 while(p !is null && n is null) { 2151 n = p.nextSibling; 2152 } 2153 } 2154 2155 return n; 2156 } 2157 2158 /// UNTESTED 2159 /// ditto 2160 Element previousInSource() { 2161 auto p = previousSibling; 2162 if(p is null) { 2163 auto par = parentNode; 2164 if(par) 2165 p = par.lastChild; 2166 if(p is null) 2167 p = par; 2168 } 2169 return p; 2170 } 2171 2172 ///. 2173 @property Element previousElementSibling() { 2174 return previousSibling("*"); 2175 } 2176 2177 ///. 2178 @property Element previousSibling(string tagName = null) { 2179 if(this.parentNode is null) 2180 return null; 2181 Element ps = null; 2182 foreach(e; this.parentNode.childNodes) { 2183 if(e is this) 2184 break; 2185 if(tagName == "*" && e.nodeType != NodeType.Text) { 2186 ps = e; 2187 } else if(tagName is null || e.tagName == tagName) 2188 ps = e; 2189 } 2190 2191 return ps; 2192 } 2193 2194 ///. 2195 @property Element nextElementSibling() { 2196 return nextSibling("*"); 2197 } 2198 2199 ///. 2200 @property Element nextSibling(string tagName = null) { 2201 if(this.parentNode is null) 2202 return null; 2203 Element ns = null; 2204 bool mightBe = false; 2205 foreach(e; this.parentNode.childNodes) { 2206 if(e is this) { 2207 mightBe = true; 2208 continue; 2209 } 2210 if(mightBe) { 2211 if(tagName == "*" && e.nodeType != NodeType.Text) { 2212 ns = e; 2213 break; 2214 } 2215 if(tagName is null || e.tagName == tagName) { 2216 ns = e; 2217 break; 2218 } 2219 } 2220 } 2221 2222 return ns; 2223 } 2224 2225 2226 /// Gets the nearest node, going up the chain, with the given tagName 2227 /// May return null or throw. 2228 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2229 if(tagName is null) { 2230 static if(is(T == Form)) 2231 tagName = "form"; 2232 else static if(is(T == Table)) 2233 tagName = "table"; 2234 else static if(is(T == Link)) 2235 tagName == "a"; 2236 } 2237 2238 auto par = this.parentNode; 2239 while(par !is null) { 2240 if(tagName is null || par.tagName == tagName) 2241 break; 2242 par = par.parentNode; 2243 } 2244 2245 static if(!is(T == Element)) { 2246 auto t = cast(T) par; 2247 if(t is null) 2248 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2249 } else 2250 auto t = par; 2251 2252 return t; 2253 } 2254 2255 ///. 2256 Element getElementById(string id) { 2257 // FIXME: I use this function a lot, and it's kinda slow 2258 // not terribly slow, but not great. 2259 foreach(e; tree) 2260 if(e.id == id) 2261 return e; 2262 return null; 2263 } 2264 2265 /++ 2266 Returns a child element that matches the given `selector`. 2267 2268 Note: you can give multiple selectors, separated by commas. 2269 It will return the first match it finds. 2270 +/ 2271 @scriptable 2272 Element querySelector(string selector) { 2273 Selector s = Selector(selector); 2274 foreach(ele; tree) 2275 if(s.matchesElement(ele)) 2276 return ele; 2277 return null; 2278 } 2279 2280 /// a more standards-compliant alias for getElementsBySelector 2281 @scriptable 2282 Element[] querySelectorAll(string selector) { 2283 return getElementsBySelector(selector); 2284 } 2285 2286 /// If the element matches the given selector. Previously known as `matchesSelector`. 2287 @scriptable 2288 bool matches(string selector) { 2289 /+ 2290 bool caseSensitiveTags = true; 2291 if(parentDocument && parentDocument.loose) 2292 caseSensitiveTags = false; 2293 +/ 2294 2295 Selector s = Selector(selector); 2296 return s.matchesElement(this); 2297 } 2298 2299 /// Returns itself or the closest parent that matches the given selector, or null if none found 2300 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2301 @scriptable 2302 Element closest(string selector) { 2303 Element e = this; 2304 while(e !is null) { 2305 if(e.matches(selector)) 2306 return e; 2307 e = e.parentNode; 2308 } 2309 return null; 2310 } 2311 2312 /** 2313 Returns elements that match the given CSS selector 2314 2315 * -- all, default if nothing else is there 2316 2317 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2318 2319 It is all additive 2320 2321 OP 2322 2323 space = descendant 2324 > = direct descendant 2325 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2326 2327 [foo] Foo is present as an attribute 2328 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2329 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2330 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2331 2332 [item$=sdas] ends with 2333 [item^-sdsad] begins with 2334 2335 Quotes are optional here. 2336 2337 Pseudos: 2338 :first-child 2339 :last-child 2340 :link (same as a[href] for our purposes here) 2341 2342 2343 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2344 2345 2346 2347 This ONLY cares about elements. text, etc, are ignored 2348 2349 2350 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2351 */ 2352 Element[] getElementsBySelector(string selector) { 2353 // FIXME: this function could probably use some performance attention 2354 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2355 2356 2357 bool caseSensitiveTags = true; 2358 if(parentDocument && parentDocument.loose) 2359 caseSensitiveTags = false; 2360 2361 Element[] ret; 2362 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2363 ret ~= sel.getElements(this); 2364 return ret; 2365 } 2366 2367 /// . 2368 Element[] getElementsByClassName(string cn) { 2369 // is this correct? 2370 return getElementsBySelector("." ~ cn); 2371 } 2372 2373 ///. 2374 Element[] getElementsByTagName(string tag) { 2375 if(parentDocument && parentDocument.loose) 2376 tag = tag.toLower(); 2377 Element[] ret; 2378 foreach(e; tree) 2379 if(e.tagName == tag) 2380 ret ~= e; 2381 return ret; 2382 } 2383 2384 2385 /* ******************************* 2386 Attributes 2387 *********************************/ 2388 2389 /** 2390 Gets the given attribute value, or null if the 2391 attribute is not set. 2392 2393 Note that the returned string is decoded, so it no longer contains any xml entities. 2394 */ 2395 @scriptable 2396 string getAttribute(string name) const { 2397 if(parentDocument && parentDocument.loose) 2398 name = name.toLower(); 2399 auto e = name in attributes; 2400 if(e) 2401 return *e; 2402 else 2403 return null; 2404 } 2405 2406 /** 2407 Sets an attribute. Returns this for easy chaining 2408 */ 2409 @scriptable 2410 Element setAttribute(string name, string value) { 2411 if(parentDocument && parentDocument.loose) 2412 name = name.toLower(); 2413 2414 // I never use this shit legitimately and neither should you 2415 auto it = name.toLower(); 2416 if(it == "href" || it == "src") { 2417 auto v = value.strip().toLower(); 2418 if(v.startsWith("vbscript:")) 2419 value = value[9..$]; 2420 if(v.startsWith("javascript:")) 2421 value = value[11..$]; 2422 } 2423 2424 attributes[name] = value; 2425 2426 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2427 2428 return this; 2429 } 2430 2431 /** 2432 Returns if the attribute exists. 2433 */ 2434 @scriptable 2435 bool hasAttribute(string name) { 2436 if(parentDocument && parentDocument.loose) 2437 name = name.toLower(); 2438 2439 if(name in attributes) 2440 return true; 2441 else 2442 return false; 2443 } 2444 2445 /** 2446 Removes the given attribute from the element. 2447 */ 2448 @scriptable 2449 Element removeAttribute(string name) 2450 out(ret) { 2451 assert(ret is this); 2452 } 2453 body { 2454 if(parentDocument && parentDocument.loose) 2455 name = name.toLower(); 2456 if(name in attributes) 2457 attributes.remove(name); 2458 2459 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2460 return this; 2461 } 2462 2463 /** 2464 Gets the class attribute's contents. Returns 2465 an empty string if it has no class. 2466 */ 2467 @property string className() const { 2468 auto c = getAttribute("class"); 2469 if(c is null) 2470 return ""; 2471 return c; 2472 } 2473 2474 ///. 2475 @property Element className(string c) { 2476 setAttribute("class", c); 2477 return this; 2478 } 2479 2480 /** 2481 Provides easy access to common HTML attributes, object style. 2482 2483 --- 2484 auto element = Element.make("a"); 2485 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2486 string where = a.href; // same as a.getAttribute("href"); 2487 --- 2488 2489 */ 2490 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2491 if(v !is null) 2492 setAttribute(name, v); 2493 return getAttribute(name); 2494 } 2495 2496 /** 2497 Old access to attributes. Use [attrs] instead. 2498 2499 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2500 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2501 2502 Instead, use element.attrs.attribute, element.attrs["attribute"], 2503 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2504 */ 2505 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2506 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2507 } 2508 2509 /* 2510 // this would be nice for convenience, but it broke the getter above. 2511 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2512 if(boolean) 2513 setAttribute(name, name); 2514 else 2515 removeAttribute(name); 2516 } 2517 */ 2518 2519 /** 2520 Returns the element's children. 2521 */ 2522 @property const(Element[]) childNodes() const { 2523 return children; 2524 } 2525 2526 /// Mutable version of the same 2527 @property Element[] childNodes() { // FIXME: the above should be inout 2528 return children; 2529 } 2530 2531 /++ 2532 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2533 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2534 +/ 2535 @property DataSet dataset() { 2536 return DataSet(this); 2537 } 2538 2539 /++ 2540 Gives dot/opIndex access to attributes 2541 --- 2542 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2543 --- 2544 +/ 2545 @property AttributeSet attrs() { 2546 return AttributeSet(this); 2547 } 2548 2549 /++ 2550 Provides both string and object style (like in Javascript) access to the style attribute. 2551 2552 --- 2553 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2554 --- 2555 +/ 2556 @property ElementStyle style() { 2557 return ElementStyle(this); 2558 } 2559 2560 /++ 2561 This sets the style attribute with a string. 2562 +/ 2563 @property ElementStyle style(string s) { 2564 this.setAttribute("style", s); 2565 return this.style; 2566 } 2567 2568 private void parseAttributes(string[] whichOnes = null) { 2569 /+ 2570 if(whichOnes is null) 2571 whichOnes = attributes.keys; 2572 foreach(attr; whichOnes) { 2573 switch(attr) { 2574 case "id": 2575 2576 break; 2577 case "class": 2578 2579 break; 2580 case "style": 2581 2582 break; 2583 default: 2584 // we don't care about it 2585 } 2586 } 2587 +/ 2588 } 2589 2590 2591 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2592 /// Don't use this. 2593 @property CssStyle computedStyle() { 2594 if(_computedStyle is null) { 2595 auto style = this.getAttribute("style"); 2596 /* we'll treat shitty old html attributes as css here */ 2597 if(this.hasAttribute("width")) 2598 style ~= "; width: " ~ this.attrs.width; 2599 if(this.hasAttribute("height")) 2600 style ~= "; height: " ~ this.attrs.height; 2601 if(this.hasAttribute("bgcolor")) 2602 style ~= "; background-color: " ~ this.attrs.bgcolor; 2603 if(this.tagName == "body" && this.hasAttribute("text")) 2604 style ~= "; color: " ~ this.attrs.text; 2605 if(this.hasAttribute("color")) 2606 style ~= "; color: " ~ this.attrs.color; 2607 /* done */ 2608 2609 2610 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2611 } 2612 return _computedStyle; 2613 } 2614 2615 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2616 version(browser) { 2617 void* expansionHook; ///ditto 2618 int offsetWidth; ///ditto 2619 int offsetHeight; ///ditto 2620 int offsetLeft; ///ditto 2621 int offsetTop; ///ditto 2622 Element offsetParent; ///ditto 2623 bool hasLayout; ///ditto 2624 int zIndex; ///ditto 2625 2626 ///ditto 2627 int absoluteLeft() { 2628 int a = offsetLeft; 2629 auto p = offsetParent; 2630 while(p) { 2631 a += p.offsetLeft; 2632 p = p.offsetParent; 2633 } 2634 2635 return a; 2636 } 2637 2638 ///ditto 2639 int absoluteTop() { 2640 int a = offsetTop; 2641 auto p = offsetParent; 2642 while(p) { 2643 a += p.offsetTop; 2644 p = p.offsetParent; 2645 } 2646 2647 return a; 2648 } 2649 } 2650 2651 // Back to the regular dom functions 2652 2653 public: 2654 2655 2656 /* ******************************* 2657 DOM Mutation 2658 *********************************/ 2659 2660 /// Removes all inner content from the tag; all child text and elements are gone. 2661 void removeAllChildren() 2662 out { 2663 assert(this.children.length == 0); 2664 } 2665 body { 2666 children = null; 2667 } 2668 2669 /// History: added June 13, 2020 2670 Element appendSibling(Element e) { 2671 parentNode.insertAfter(this, e); 2672 return e; 2673 } 2674 2675 /// History: added June 13, 2020 2676 Element prependSibling(Element e) { 2677 parentNode.insertBefore(this, e); 2678 return e; 2679 } 2680 2681 2682 /++ 2683 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2684 2685 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2686 2687 History: 2688 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2689 +/ 2690 Element appendChild(Element e) 2691 in { 2692 assert(e !is null); 2693 } 2694 out (ret) { 2695 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2696 assert(e.parentDocument is this.parentDocument); 2697 assert(e is ret); 2698 } 2699 body { 2700 if(e.parentNode !is null) 2701 e.parentNode.removeChild(e); 2702 2703 selfClosed = false; 2704 e.parentNode = this; 2705 e.parentDocument = this.parentDocument; 2706 if(auto frag = cast(DocumentFragment) e) 2707 children ~= frag.children; 2708 else 2709 children ~= e; 2710 2711 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2712 2713 return e; 2714 } 2715 2716 /// Inserts the second element to this node, right before the first param 2717 Element insertBefore(in Element where, Element what) 2718 in { 2719 assert(where !is null); 2720 assert(where.parentNode is this); 2721 assert(what !is null); 2722 assert(what.parentNode is null); 2723 } 2724 out (ret) { 2725 assert(where.parentNode is this); 2726 assert(what.parentNode is this); 2727 2728 assert(what.parentDocument is this.parentDocument); 2729 assert(ret is what); 2730 } 2731 body { 2732 foreach(i, e; children) { 2733 if(e is where) { 2734 if(auto frag = cast(DocumentFragment) what) 2735 children = children[0..i] ~ frag.children ~ children[i..$]; 2736 else 2737 children = children[0..i] ~ what ~ children[i..$]; 2738 what.parentDocument = this.parentDocument; 2739 what.parentNode = this; 2740 return what; 2741 } 2742 } 2743 2744 return what; 2745 2746 assert(0); 2747 } 2748 2749 /++ 2750 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2751 +/ 2752 Element insertAfter(in Element where, Element what) 2753 in { 2754 assert(where !is null); 2755 assert(where.parentNode is this); 2756 assert(what !is null); 2757 assert(what.parentNode is null); 2758 } 2759 out (ret) { 2760 assert(where.parentNode is this); 2761 assert(what.parentNode is this); 2762 assert(what.parentDocument is this.parentDocument); 2763 assert(ret is what); 2764 } 2765 body { 2766 foreach(i, e; children) { 2767 if(e is where) { 2768 if(auto frag = cast(DocumentFragment) what) 2769 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2770 else 2771 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2772 what.parentNode = this; 2773 what.parentDocument = this.parentDocument; 2774 return what; 2775 } 2776 } 2777 2778 return what; 2779 2780 assert(0); 2781 } 2782 2783 /// swaps one child for a new thing. Returns the old child which is now parentless. 2784 Element swapNode(Element child, Element replacement) 2785 in { 2786 assert(child !is null); 2787 assert(replacement !is null); 2788 assert(child.parentNode is this); 2789 } 2790 out(ret) { 2791 assert(ret is child); 2792 assert(ret.parentNode is null); 2793 assert(replacement.parentNode is this); 2794 assert(replacement.parentDocument is this.parentDocument); 2795 } 2796 body { 2797 foreach(ref c; this.children) 2798 if(c is child) { 2799 c.parentNode = null; 2800 c = replacement; 2801 c.parentNode = this; 2802 c.parentDocument = this.parentDocument; 2803 return child; 2804 } 2805 assert(0); 2806 } 2807 2808 2809 /++ 2810 Appends the given to the node. 2811 2812 2813 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2814 yields `<example>text <b>bold</b> hi</example>`. 2815 2816 See_Also: 2817 [firstInnerText], [directText], [innerText], [appendChild] 2818 +/ 2819 @scriptable 2820 Element appendText(string text) { 2821 Element e = new TextNode(parentDocument, text); 2822 appendChild(e); 2823 return this; 2824 } 2825 2826 /++ 2827 Returns child elements which are of a tag type (excludes text, comments, etc.). 2828 2829 2830 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2831 2832 Params: 2833 tagName = filter results to only the child elements with the given tag name. 2834 +/ 2835 @property Element[] childElements(string tagName = null) { 2836 Element[] ret; 2837 foreach(c; children) 2838 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2839 ret ~= c; 2840 return ret; 2841 } 2842 2843 /++ 2844 Appends the given html to the element, returning the elements appended 2845 2846 2847 This is similar to `element.innerHTML += "html string";` in Javascript. 2848 +/ 2849 @scriptable 2850 Element[] appendHtml(string html) { 2851 Document d = new Document("<root>" ~ html ~ "</root>"); 2852 return stealChildren(d.root); 2853 } 2854 2855 2856 ///. 2857 void insertChildAfter(Element child, Element where) 2858 in { 2859 assert(child !is null); 2860 assert(where !is null); 2861 assert(where.parentNode is this); 2862 assert(!selfClosed); 2863 //assert(isInArray(where, children)); 2864 } 2865 out { 2866 assert(child.parentNode is this); 2867 assert(where.parentNode is this); 2868 //assert(isInArray(where, children)); 2869 //assert(isInArray(child, children)); 2870 } 2871 body { 2872 foreach(ref i, c; children) { 2873 if(c is where) { 2874 i++; 2875 if(auto frag = cast(DocumentFragment) child) 2876 children = children[0..i] ~ child.children ~ children[i..$]; 2877 else 2878 children = children[0..i] ~ child ~ children[i..$]; 2879 child.parentNode = this; 2880 child.parentDocument = this.parentDocument; 2881 break; 2882 } 2883 } 2884 } 2885 2886 /++ 2887 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2888 2889 Params: 2890 e = the element whose children you want to steal 2891 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2892 +/ 2893 Element[] stealChildren(Element e, Element position = null) 2894 in { 2895 assert(!selfClosed); 2896 assert(e !is null); 2897 //if(position !is null) 2898 //assert(isInArray(position, children)); 2899 } 2900 out (ret) { 2901 assert(e.children.length == 0); 2902 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2903 version(none) 2904 debug foreach(child; ret) { 2905 assert(child.parentNode is this); 2906 assert(child.parentDocument is this.parentDocument); 2907 } 2908 } 2909 body { 2910 foreach(c; e.children) { 2911 c.parentNode = this; 2912 c.parentDocument = this.parentDocument; 2913 } 2914 if(position is null) 2915 children ~= e.children; 2916 else { 2917 foreach(i, child; children) { 2918 if(child is position) { 2919 children = children[0..i] ~ 2920 e.children ~ 2921 children[i..$]; 2922 break; 2923 } 2924 } 2925 } 2926 2927 auto ret = e.children[]; 2928 e.children.length = 0; 2929 2930 return ret; 2931 } 2932 2933 /// Puts the current element first in our children list. The given element must not have a parent already. 2934 Element prependChild(Element e) 2935 in { 2936 assert(e.parentNode is null); 2937 assert(!selfClosed); 2938 } 2939 out { 2940 assert(e.parentNode is this); 2941 assert(e.parentDocument is this.parentDocument); 2942 assert(children[0] is e); 2943 } 2944 body { 2945 e.parentNode = this; 2946 e.parentDocument = this.parentDocument; 2947 if(auto frag = cast(DocumentFragment) e) 2948 children = e.children ~ children; 2949 else 2950 children = e ~ children; 2951 return e; 2952 } 2953 2954 2955 /** 2956 Returns a string containing all child elements, formatted such that it could be pasted into 2957 an XML file. 2958 */ 2959 @property string innerHTML(Appender!string where = appender!string()) const { 2960 if(children is null) 2961 return ""; 2962 2963 auto start = where.data.length; 2964 2965 foreach(child; children) { 2966 assert(child !is null); 2967 2968 child.writeToAppender(where); 2969 } 2970 2971 return where.data[start .. $]; 2972 } 2973 2974 /** 2975 Takes some html and replaces the element's children with the tree made from the string. 2976 */ 2977 @property Element innerHTML(string html, bool strict = false) { 2978 if(html.length) 2979 selfClosed = false; 2980 2981 if(html.length == 0) { 2982 // I often say innerHTML = ""; as a shortcut to clear it out, 2983 // so let's optimize that slightly. 2984 removeAllChildren(); 2985 return this; 2986 } 2987 2988 auto doc = new Document(); 2989 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2990 2991 children = doc.root.children; 2992 foreach(c; children) { 2993 c.parentNode = this; 2994 c.parentDocument = this.parentDocument; 2995 } 2996 2997 reparentTreeDocuments(); 2998 2999 doc.root.children = null; 3000 3001 return this; 3002 } 3003 3004 /// ditto 3005 @property Element innerHTML(Html html) { 3006 return this.innerHTML = html.source; 3007 } 3008 3009 private void reparentTreeDocuments() { 3010 foreach(c; this.tree) 3011 c.parentDocument = this.parentDocument; 3012 } 3013 3014 /** 3015 Replaces this node with the given html string, which is parsed 3016 3017 Note: this invalidates the this reference, since it is removed 3018 from the tree. 3019 3020 Returns the new children that replace this. 3021 */ 3022 @property Element[] outerHTML(string html) { 3023 auto doc = new Document(); 3024 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3025 3026 children = doc.root.children; 3027 foreach(c; children) { 3028 c.parentNode = this; 3029 c.parentDocument = this.parentDocument; 3030 } 3031 3032 3033 reparentTreeDocuments(); 3034 3035 3036 stripOut(); 3037 3038 return doc.root.children; 3039 } 3040 3041 /++ 3042 Returns all the html for this element, including the tag itself. 3043 3044 This is equivalent to calling toString(). 3045 +/ 3046 @property string outerHTML() { 3047 return this.toString(); 3048 } 3049 3050 /// This sets the inner content of the element *without* trying to parse it. 3051 /// You can inject any code in there; this serves as an escape hatch from the dom. 3052 /// 3053 /// The only times you might actually need it are for < style > and < script > tags in html. 3054 /// Other than that, innerHTML and/or innerText should do the job. 3055 @property void innerRawSource(string rawSource) { 3056 children.length = 0; 3057 auto rs = new RawSource(parentDocument, rawSource); 3058 rs.parentNode = this; 3059 3060 children ~= rs; 3061 } 3062 3063 ///. 3064 Element replaceChild(Element find, Element replace) 3065 in { 3066 assert(find !is null); 3067 assert(replace !is null); 3068 assert(replace.parentNode is null); 3069 } 3070 out(ret) { 3071 assert(ret is replace); 3072 assert(replace.parentNode is this); 3073 assert(replace.parentDocument is this.parentDocument); 3074 assert(find.parentNode is null); 3075 } 3076 body { 3077 // FIXME 3078 //if(auto frag = cast(DocumentFragment) replace) 3079 //return this.replaceChild(frag, replace.children); 3080 for(int i = 0; i < children.length; i++) { 3081 if(children[i] is find) { 3082 replace.parentNode = this; 3083 children[i].parentNode = null; 3084 children[i] = replace; 3085 replace.parentDocument = this.parentDocument; 3086 return replace; 3087 } 3088 } 3089 3090 throw new Exception("no such child"); 3091 } 3092 3093 /** 3094 Replaces the given element with a whole group. 3095 */ 3096 void replaceChild(Element find, Element[] replace) 3097 in { 3098 assert(find !is null); 3099 assert(replace !is null); 3100 assert(find.parentNode is this); 3101 debug foreach(r; replace) 3102 assert(r.parentNode is null); 3103 } 3104 out { 3105 assert(find.parentNode is null); 3106 assert(children.length >= replace.length); 3107 debug foreach(child; children) 3108 assert(child !is find); 3109 debug foreach(r; replace) 3110 assert(r.parentNode is this); 3111 } 3112 body { 3113 if(replace.length == 0) { 3114 removeChild(find); 3115 return; 3116 } 3117 assert(replace.length); 3118 for(int i = 0; i < children.length; i++) { 3119 if(children[i] is find) { 3120 children[i].parentNode = null; // this element should now be dead 3121 children[i] = replace[0]; 3122 foreach(e; replace) { 3123 e.parentNode = this; 3124 e.parentDocument = this.parentDocument; 3125 } 3126 3127 children = .insertAfter(children, i, replace[1..$]); 3128 3129 return; 3130 } 3131 } 3132 3133 throw new Exception("no such child"); 3134 } 3135 3136 3137 /** 3138 Removes the given child from this list. 3139 3140 Returns the removed element. 3141 */ 3142 Element removeChild(Element c) 3143 in { 3144 assert(c !is null); 3145 assert(c.parentNode is this); 3146 } 3147 out { 3148 debug foreach(child; children) 3149 assert(child !is c); 3150 assert(c.parentNode is null); 3151 } 3152 body { 3153 foreach(i, e; children) { 3154 if(e is c) { 3155 children = children[0..i] ~ children [i+1..$]; 3156 c.parentNode = null; 3157 return c; 3158 } 3159 } 3160 3161 throw new Exception("no such child"); 3162 } 3163 3164 /// This removes all the children from this element, returning the old list. 3165 Element[] removeChildren() 3166 out (ret) { 3167 assert(children.length == 0); 3168 debug foreach(r; ret) 3169 assert(r.parentNode is null); 3170 } 3171 body { 3172 Element[] oldChildren = children.dup; 3173 foreach(c; oldChildren) 3174 c.parentNode = null; 3175 3176 children.length = 0; 3177 3178 return oldChildren; 3179 } 3180 3181 /** 3182 Fetch the inside text, with all tags stripped out. 3183 3184 <p>cool <b>api</b> & code dude<p> 3185 innerText of that is "cool api & code dude". 3186 3187 This does not match what real innerText does! 3188 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3189 3190 It is more like textContent. 3191 */ 3192 @scriptable 3193 @property string innerText() const { 3194 string s; 3195 foreach(child; children) { 3196 if(child.nodeType != NodeType.Text) 3197 s ~= child.innerText; 3198 else 3199 s ~= child.nodeValue(); 3200 } 3201 return s; 3202 } 3203 3204 /// 3205 alias textContent = innerText; 3206 3207 /** 3208 Sets the inside text, replacing all children. You don't 3209 have to worry about entity encoding. 3210 */ 3211 @scriptable 3212 @property void innerText(string text) { 3213 selfClosed = false; 3214 Element e = new TextNode(parentDocument, text); 3215 e.parentNode = this; 3216 children = [e]; 3217 } 3218 3219 /** 3220 Strips this node out of the document, replacing it with the given text 3221 */ 3222 @property void outerText(string text) { 3223 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3224 } 3225 3226 /** 3227 Same result as innerText; the tag with all inner tags stripped out 3228 */ 3229 @property string outerText() const { 3230 return innerText; 3231 } 3232 3233 3234 /* ******************************* 3235 Miscellaneous 3236 *********************************/ 3237 3238 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3239 @property Element cloned() 3240 /+ 3241 out(ret) { 3242 // FIXME: not sure why these fail... 3243 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3244 assert(ret.tagName == this.tagName); 3245 } 3246 body { 3247 +/ 3248 { 3249 return this.cloneNode(true); 3250 } 3251 3252 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3253 Element cloneNode(bool deepClone) { 3254 auto e = Element.make(this.tagName); 3255 e.parentDocument = this.parentDocument; 3256 e.attributes = this.attributes.aadup; 3257 e.selfClosed = this.selfClosed; 3258 3259 if(deepClone) { 3260 foreach(child; children) { 3261 e.appendChild(child.cloneNode(true)); 3262 } 3263 } 3264 3265 3266 return e; 3267 } 3268 3269 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3270 string nodeValue() const { 3271 return ""; 3272 } 3273 3274 // should return int 3275 ///. 3276 @property int nodeType() const { 3277 return 1; 3278 } 3279 3280 3281 invariant () { 3282 assert(tagName.indexOf(" ") == -1); 3283 3284 if(children !is null) 3285 debug foreach(child; children) { 3286 // assert(parentNode !is null); 3287 assert(child !is null); 3288 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3289 assert(child !is this); 3290 //assert(child !is parentNode); 3291 } 3292 3293 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3294 if(parentNode !is null) { 3295 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3296 auto lol = cast(TextNode) this; 3297 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3298 } 3299 +/ 3300 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3301 // reason is so you can create these without needing a reference to the document 3302 } 3303 3304 /** 3305 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3306 an XML file. 3307 */ 3308 override string toString() const { 3309 return writeToAppender(); 3310 } 3311 3312 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3313 if(indentWith is null) 3314 return null; 3315 string s; 3316 3317 if(insertComments) s ~= "<!--"; 3318 s ~= "\n"; 3319 foreach(indent; 0 .. indentationLevel) 3320 s ~= indentWith; 3321 if(insertComments) s ~= "-->"; 3322 3323 return s; 3324 } 3325 3326 /++ 3327 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3328 for eyeball debugging. 3329 +/ 3330 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3331 3332 // first step is to concatenate any consecutive text nodes to simplify 3333 // the white space analysis. this changes the tree! but i'm allowed since 3334 // the comment always says it changes the comments 3335 // 3336 // actually i'm not allowed cuz it is const so i will cheat and lie 3337 /+ 3338 TextNode lastTextChild = null; 3339 for(int a = 0; a < this.children.length; a++) { 3340 auto child = this.children[a]; 3341 if(auto tn = cast(TextNode) child) { 3342 if(lastTextChild) { 3343 lastTextChild.contents ~= tn.contents; 3344 for(int b = a; b < this.children.length - 1; b++) 3345 this.children[b] = this.children[b + 1]; 3346 this.children = this.children[0 .. $-1]; 3347 } else { 3348 lastTextChild = tn; 3349 } 3350 } else { 3351 lastTextChild = null; 3352 } 3353 } 3354 +/ 3355 3356 const(Element)[] children; 3357 3358 TextNode lastTextChild = null; 3359 for(int a = 0; a < this.children.length; a++) { 3360 auto child = this.children[a]; 3361 if(auto tn = cast(const(TextNode)) child) { 3362 if(lastTextChild !is null) { 3363 lastTextChild.contents ~= tn.contents; 3364 } else { 3365 lastTextChild = new TextNode(""); 3366 lastTextChild.parentNode = cast(Element) this; 3367 lastTextChild.contents ~= tn.contents; 3368 children ~= lastTextChild; 3369 } 3370 } else { 3371 lastTextChild = null; 3372 children ~= child; 3373 } 3374 } 3375 3376 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3377 3378 s ~= "<"; 3379 s ~= tagName; 3380 3381 // i sort these for consistent output. might be more legible 3382 // but especially it keeps it the same for diff purposes. 3383 import std.algorithm : sort; 3384 auto keys = sort(attributes.keys); 3385 foreach(n; keys) { 3386 auto v = attributes[n]; 3387 s ~= " "; 3388 s ~= n; 3389 s ~= "=\""; 3390 s ~= htmlEntitiesEncode(v); 3391 s ~= "\""; 3392 } 3393 3394 if(selfClosed){ 3395 s ~= " />"; 3396 return s; 3397 } 3398 3399 s ~= ">"; 3400 3401 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3402 // just keep them on the same line 3403 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3404 foreach(child; children) { 3405 s ~= child.toString();//toPrettyString(false, 0, null); 3406 } 3407 } else { 3408 foreach(child; children) { 3409 assert(child !is null); 3410 3411 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3412 } 3413 3414 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3415 } 3416 3417 s ~= "</"; 3418 s ~= tagName; 3419 s ~= ">"; 3420 3421 return s; 3422 } 3423 3424 /+ 3425 /// Writes out the opening tag only, if applicable. 3426 string writeTagOnly(Appender!string where = appender!string()) const { 3427 +/ 3428 3429 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3430 /// Note: the ordering of attributes in the string is undefined. 3431 /// Returns the string it creates. 3432 string writeToAppender(Appender!string where = appender!string()) const { 3433 assert(tagName !is null); 3434 3435 where.reserve((this.children.length + 1) * 512); 3436 3437 auto start = where.data.length; 3438 3439 where.put("<"); 3440 where.put(tagName); 3441 3442 import std.algorithm : sort; 3443 auto keys = sort(attributes.keys); 3444 foreach(n; keys) { 3445 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3446 //assert(v !is null); 3447 where.put(" "); 3448 where.put(n); 3449 where.put("=\""); 3450 htmlEntitiesEncode(v, where); 3451 where.put("\""); 3452 } 3453 3454 if(selfClosed){ 3455 where.put(" />"); 3456 return where.data[start .. $]; 3457 } 3458 3459 where.put('>'); 3460 3461 innerHTML(where); 3462 3463 where.put("</"); 3464 where.put(tagName); 3465 where.put('>'); 3466 3467 return where.data[start .. $]; 3468 } 3469 3470 /** 3471 Returns a lazy range of all its children, recursively. 3472 */ 3473 @property ElementStream tree() { 3474 return new ElementStream(this); 3475 } 3476 3477 // I moved these from Form because they are generally useful. 3478 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3479 /// Tags: HTML, HTML5 3480 // FIXME: add overloads for other label types... 3481 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3482 auto fs = this; 3483 auto i = fs.addChild("label"); 3484 3485 if(!(type == "checkbox" || type == "radio")) 3486 i.addChild("span", label); 3487 3488 Element input; 3489 if(type == "textarea") 3490 input = i.addChild("textarea"). 3491 setAttribute("name", name). 3492 setAttribute("rows", "6"); 3493 else 3494 input = i.addChild("input"). 3495 setAttribute("name", name). 3496 setAttribute("type", type); 3497 3498 if(type == "checkbox" || type == "radio") 3499 i.addChild("span", label); 3500 3501 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3502 fieldOptions.applyToElement(input); 3503 return i; 3504 } 3505 3506 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3507 auto fs = this; 3508 auto i = fs.addChild("label"); 3509 i.addChild(label); 3510 Element input; 3511 if(type == "textarea") 3512 input = i.addChild("textarea"). 3513 setAttribute("name", name). 3514 setAttribute("rows", "6"); 3515 else 3516 input = i.addChild("input"). 3517 setAttribute("name", name). 3518 setAttribute("type", type); 3519 3520 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3521 fieldOptions.applyToElement(input); 3522 return i; 3523 } 3524 3525 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3526 return addField(label, name, "text", fieldOptions); 3527 } 3528 3529 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3530 auto fs = this; 3531 auto i = fs.addChild("label"); 3532 i.addChild("span", label); 3533 auto sel = i.addChild("select").setAttribute("name", name); 3534 3535 foreach(k, opt; options) 3536 sel.addChild("option", opt, k); 3537 3538 // FIXME: implement requirements somehow 3539 3540 return i; 3541 } 3542 3543 Element addSubmitButton(string label = null) { 3544 auto t = this; 3545 auto holder = t.addChild("div"); 3546 holder.addClass("submit-holder"); 3547 auto i = holder.addChild("input"); 3548 i.type = "submit"; 3549 if(label.length) 3550 i.value = label; 3551 return holder; 3552 } 3553 3554 } 3555 3556 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3557 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3558 /// Group: core_functionality 3559 class XmlDocument : Document { 3560 this(string data) { 3561 selfClosedElements = null; 3562 contentType = "text/xml; charset=utf-8"; 3563 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3564 3565 parseStrict(data); 3566 } 3567 } 3568 3569 3570 3571 3572 import std.string; 3573 3574 /* domconvenience follows { */ 3575 3576 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3577 /// Group: core_functionality 3578 Element[] findComments(Document document, string txt) { 3579 return findComments(document.root, txt); 3580 } 3581 3582 /// ditto 3583 Element[] findComments(Element element, string txt) { 3584 txt = txt.strip().toLower(); 3585 Element[] ret; 3586 3587 foreach(comment; element.getElementsByTagName("#comment")) { 3588 string t = comment.nodeValue().strip().toLower(); 3589 if(t == txt) 3590 ret ~= comment; 3591 } 3592 3593 return ret; 3594 } 3595 3596 /// An option type that propagates null. See: [Element.optionSelector] 3597 /// Group: implementations 3598 struct MaybeNullElement(SomeElementType) { 3599 this(SomeElementType ele) { 3600 this.element = ele; 3601 } 3602 SomeElementType element; 3603 3604 /// Forwards to the element, wit a null check inserted that propagates null. 3605 auto opDispatch(string method, T...)(T args) { 3606 alias type = typeof(__traits(getMember, element, method)(args)); 3607 static if(is(type : Element)) { 3608 if(element is null) 3609 return MaybeNullElement!type(null); 3610 return __traits(getMember, element, method)(args); 3611 } else static if(is(type == string)) { 3612 if(element is null) 3613 return cast(string) null; 3614 return __traits(getMember, element, method)(args); 3615 } else static if(is(type == void)) { 3616 if(element is null) 3617 return; 3618 __traits(getMember, element, method)(args); 3619 } else { 3620 static assert(0); 3621 } 3622 } 3623 3624 /// Allows implicit casting to the wrapped element. 3625 alias element this; 3626 } 3627 3628 /++ 3629 A collection of elements which forwards methods to the children. 3630 +/ 3631 /// Group: implementations 3632 struct ElementCollection { 3633 /// 3634 this(Element e) { 3635 elements = [e]; 3636 } 3637 3638 /// 3639 this(Element e, string selector) { 3640 elements = e.querySelectorAll(selector); 3641 } 3642 3643 /// 3644 this(Element[] e) { 3645 elements = e; 3646 } 3647 3648 Element[] elements; 3649 //alias elements this; // let it implicitly convert to the underlying array 3650 3651 /// 3652 ElementCollection opIndex(string selector) { 3653 ElementCollection ec; 3654 foreach(e; elements) 3655 ec.elements ~= e.getElementsBySelector(selector); 3656 return ec; 3657 } 3658 3659 /// 3660 Element opIndex(int i) { 3661 return elements[i]; 3662 } 3663 3664 /// if you slice it, give the underlying array for easy forwarding of the 3665 /// collection to range expecting algorithms or looping over. 3666 Element[] opSlice() { 3667 return elements; 3668 } 3669 3670 /// And input range primitives so we can foreach over this 3671 void popFront() { 3672 elements = elements[1..$]; 3673 } 3674 3675 /// ditto 3676 Element front() { 3677 return elements[0]; 3678 } 3679 3680 /// ditto 3681 bool empty() { 3682 return !elements.length; 3683 } 3684 3685 /++ 3686 Collects strings from the collection, concatenating them together 3687 Kinda like running reduce and ~= on it. 3688 3689 --- 3690 document["p"].collect!"innerText"; 3691 --- 3692 +/ 3693 string collect(string method)(string separator = "") { 3694 string text; 3695 foreach(e; elements) { 3696 text ~= mixin("e." ~ method); 3697 text ~= separator; 3698 } 3699 return text; 3700 } 3701 3702 /// Forward method calls to each individual [Element|element] of the collection 3703 /// returns this so it can be chained. 3704 ElementCollection opDispatch(string name, T...)(T t) { 3705 foreach(e; elements) { 3706 mixin("e." ~ name)(t); 3707 } 3708 return this; 3709 } 3710 3711 /++ 3712 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3713 +/ 3714 ElementCollection wrapIn(Element what) { 3715 foreach(e; elements) { 3716 e.wrapIn(what.cloneNode(false)); 3717 } 3718 3719 return this; 3720 } 3721 3722 /// Concatenates two ElementCollection together. 3723 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3724 return ElementCollection(this.elements ~ rhs.elements); 3725 } 3726 } 3727 3728 3729 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3730 /// Group: implementations 3731 mixin template JavascriptStyleDispatch() { 3732 /// 3733 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3734 if(v !is null) 3735 return set(name, v); 3736 return get(name); 3737 } 3738 3739 /// 3740 string opIndex(string key) const { 3741 return get(key); 3742 } 3743 3744 /// 3745 string opIndexAssign(string value, string field) { 3746 return set(field, value); 3747 } 3748 3749 // FIXME: doesn't seem to work 3750 string* opBinary(string op)(string key) if(op == "in") { 3751 return key in fields; 3752 } 3753 } 3754 3755 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3756 /// 3757 /// Do not create this object directly. 3758 /// Group: implementations 3759 struct DataSet { 3760 /// 3761 this(Element e) { 3762 this._element = e; 3763 } 3764 3765 private Element _element; 3766 /// 3767 string set(string name, string value) { 3768 _element.setAttribute("data-" ~ unCamelCase(name), value); 3769 return value; 3770 } 3771 3772 /// 3773 string get(string name) const { 3774 return _element.getAttribute("data-" ~ unCamelCase(name)); 3775 } 3776 3777 /// 3778 mixin JavascriptStyleDispatch!(); 3779 } 3780 3781 /// Proxy object for attributes which will replace the main opDispatch eventually 3782 /// Group: implementations 3783 struct AttributeSet { 3784 /// 3785 this(Element e) { 3786 this._element = e; 3787 } 3788 3789 private Element _element; 3790 /// 3791 string set(string name, string value) { 3792 _element.setAttribute(name, value); 3793 return value; 3794 } 3795 3796 /// 3797 string get(string name) const { 3798 return _element.getAttribute(name); 3799 } 3800 3801 /// 3802 mixin JavascriptStyleDispatch!(); 3803 } 3804 3805 3806 3807 /// for style, i want to be able to set it with a string like a plain attribute, 3808 /// but also be able to do properties Javascript style. 3809 3810 /// Group: implementations 3811 struct ElementStyle { 3812 this(Element parent) { 3813 _element = parent; 3814 } 3815 3816 Element _element; 3817 3818 @property ref inout(string) _attribute() inout { 3819 auto s = "style" in _element.attributes; 3820 if(s is null) { 3821 auto e = cast() _element; // const_cast 3822 e.attributes["style"] = ""; // we need something to reference 3823 s = cast(inout) ("style" in e.attributes); 3824 } 3825 3826 assert(s !is null); 3827 return *s; 3828 } 3829 3830 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3831 3832 string set(string name, string value) { 3833 if(name.length == 0) 3834 return value; 3835 if(name == "cssFloat") 3836 name = "float"; 3837 else 3838 name = unCamelCase(name); 3839 auto r = rules(); 3840 r[name] = value; 3841 3842 _attribute = ""; 3843 foreach(k, v; r) { 3844 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3845 continue; 3846 if(_attribute.length) 3847 _attribute ~= " "; 3848 _attribute ~= k ~ ": " ~ v ~ ";"; 3849 } 3850 3851 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3852 3853 return value; 3854 } 3855 string get(string name) const { 3856 if(name == "cssFloat") 3857 name = "float"; 3858 else 3859 name = unCamelCase(name); 3860 auto r = rules(); 3861 if(name in r) 3862 return r[name]; 3863 return null; 3864 } 3865 3866 string[string] rules() const { 3867 string[string] ret; 3868 foreach(rule; _attribute.split(";")) { 3869 rule = rule.strip(); 3870 if(rule.length == 0) 3871 continue; 3872 auto idx = rule.indexOf(":"); 3873 if(idx == -1) 3874 ret[rule] = ""; 3875 else { 3876 auto name = rule[0 .. idx].strip(); 3877 auto value = rule[idx + 1 .. $].strip(); 3878 3879 ret[name] = value; 3880 } 3881 } 3882 3883 return ret; 3884 } 3885 3886 mixin JavascriptStyleDispatch!(); 3887 } 3888 3889 /// Converts a camel cased propertyName to a css style dashed property-name 3890 string unCamelCase(string a) { 3891 string ret; 3892 foreach(c; a) 3893 if((c >= 'A' && c <= 'Z')) 3894 ret ~= "-" ~ toLower("" ~ c)[0]; 3895 else 3896 ret ~= c; 3897 return ret; 3898 } 3899 3900 /// Translates a css style property-name to a camel cased propertyName 3901 string camelCase(string a) { 3902 string ret; 3903 bool justSawDash = false; 3904 foreach(c; a) 3905 if(c == '-') { 3906 justSawDash = true; 3907 } else { 3908 if(justSawDash) { 3909 justSawDash = false; 3910 ret ~= toUpper("" ~ c); 3911 } else 3912 ret ~= c; 3913 } 3914 return ret; 3915 } 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 // domconvenience ends } 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 // @safe: 3938 3939 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3940 // Instead, override writeToAppender(); 3941 3942 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3943 3944 // Stripping them is useful for reading php as html.... but adding them 3945 // is good for building php. 3946 3947 // I need to maintain compatibility with the way it is now too. 3948 3949 import std.string; 3950 import std.exception; 3951 import std.uri; 3952 import std.array; 3953 import std.range; 3954 3955 //import std.stdio; 3956 3957 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3958 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3959 // most likely a typo so I say kill kill kill. 3960 3961 3962 /++ 3963 This might belong in another module, but it represents a file with a mime type and some data. 3964 Document implements this interface with type = text/html (see Document.contentType for more info) 3965 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3966 +/ 3967 /// Group: bonus_functionality 3968 interface FileResource { 3969 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3970 @property string contentType() const; 3971 /// the data 3972 immutable(ubyte)[] getData() const; 3973 /++ 3974 filename, return null if none 3975 3976 History: 3977 Added December 25, 2020 3978 +/ 3979 @property string filename() const; 3980 } 3981 3982 3983 3984 3985 ///. 3986 /// Group: bonus_functionality 3987 enum NodeType { Text = 3 } 3988 3989 3990 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3991 /// Group: core_functionality 3992 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 3993 in {} 3994 out(ret) { assert(ret !is null); } 3995 body { 3996 auto ret = cast(T) e; 3997 if(ret is null) 3998 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3999 return ret; 4000 } 4001 4002 4003 ///. 4004 /// Group: core_functionality 4005 class DocumentFragment : Element { 4006 ///. 4007 this(Document _parentDocument) { 4008 tagName = "#fragment"; 4009 super(_parentDocument); 4010 } 4011 4012 /++ 4013 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4014 4015 Since: March 29, 2018 (or git tagged v2.1.0) 4016 +/ 4017 this(Html html) { 4018 this(null); 4019 4020 this.innerHTML = html.source; 4021 } 4022 4023 ///. 4024 override string writeToAppender(Appender!string where = appender!string()) const { 4025 return this.innerHTML(where); 4026 } 4027 4028 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 4029 string s; 4030 foreach(child; children) 4031 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 4032 return s; 4033 } 4034 4035 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4036 /* 4037 override inout(Element) parentNode() inout { 4038 return children.length ? children[0].parentNode : null; 4039 } 4040 */ 4041 override Element parentNode(Element p) { 4042 this._parentNode = p; 4043 foreach(child; children) 4044 child.parentNode = p; 4045 return p; 4046 } 4047 } 4048 4049 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4050 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4051 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4052 /// 4053 /// The output parameter can be given to append to an existing buffer. You don't have to 4054 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4055 /// Group: core_functionality 4056 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4057 // if there's no entities, we can save a lot of time by not bothering with the 4058 // decoding loop. This check cuts the net toString time by better than half in my test. 4059 // let me know if it made your tests worse though, since if you use an entity in just about 4060 // every location, the check will add time... but I suspect the average experience is like mine 4061 // since the check gives up as soon as it can anyway. 4062 4063 bool shortcut = true; 4064 foreach(char c; data) { 4065 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4066 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4067 shortcut = false; // there's actual work to be done 4068 break; 4069 } 4070 } 4071 4072 if(shortcut) { 4073 output.put(data); 4074 return data; 4075 } 4076 4077 auto start = output.data.length; 4078 4079 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4080 4081 foreach(dchar d; data) { 4082 if(d == '&') 4083 output.put("&"); 4084 else if (d == '<') 4085 output.put("<"); 4086 else if (d == '>') 4087 output.put(">"); 4088 else if (d == '\"') 4089 output.put("""); 4090 // else if (d == '\'') 4091 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4092 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4093 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4094 // idk about apostrophes though. Might be worth it, might not. 4095 else if (!encodeNonAscii || (d < 128 && d > 0)) 4096 output.put(d); 4097 else 4098 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4099 } 4100 4101 //assert(output !is null); // this fails on empty attributes..... 4102 return output.data[start .. $]; 4103 4104 // data = data.replace("\u00a0", " "); 4105 } 4106 4107 /// An alias for htmlEntitiesEncode; it works for xml too 4108 /// Group: core_functionality 4109 string xmlEntitiesEncode(string data) { 4110 return htmlEntitiesEncode(data); 4111 } 4112 4113 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4114 /// Group: core_functionality 4115 dchar parseEntity(in dchar[] entity) { 4116 switch(entity[1..$-1]) { 4117 case "quot": 4118 return '"'; 4119 case "apos": 4120 return '\''; 4121 case "lt": 4122 return '<'; 4123 case "gt": 4124 return '>'; 4125 case "amp": 4126 return '&'; 4127 // the next are html rather than xml 4128 4129 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4130 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4131 case "Tab": return '\u0009'; 4132 case "NewLine": return '\u000A'; 4133 case "excl": return '\u0021'; 4134 case "QUOT": return '\u0022'; 4135 case "num": return '\u0023'; 4136 case "dollar": return '\u0024'; 4137 case "percnt": return '\u0025'; 4138 case "AMP": return '\u0026'; 4139 case "lpar": return '\u0028'; 4140 case "rpar": return '\u0029'; 4141 case "ast": case "midast": return '\u002A'; 4142 case "plus": return '\u002B'; 4143 case "comma": return '\u002C'; 4144 case "period": return '\u002E'; 4145 case "sol": return '\u002F'; 4146 case "colon": return '\u003A'; 4147 case "semi": return '\u003B'; 4148 case "LT": return '\u003C'; 4149 case "equals": return '\u003D'; 4150 case "GT": return '\u003E'; 4151 case "quest": return '\u003F'; 4152 case "commat": return '\u0040'; 4153 case "lsqb": case "lbrack": return '\u005B'; 4154 case "bsol": return '\u005C'; 4155 case "rsqb": case "rbrack": return '\u005D'; 4156 case "Hat": return '\u005E'; 4157 case "lowbar": case "UnderBar": return '\u005F'; 4158 case "grave": case "DiacriticalGrave": return '\u0060'; 4159 case "lcub": case "lbrace": return '\u007B'; 4160 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4161 case "rcub": case "rbrace": return '\u007D'; 4162 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4163 case "iexcl": return '\u00A1'; 4164 case "cent": return '\u00A2'; 4165 case "pound": return '\u00A3'; 4166 case "curren": return '\u00A4'; 4167 case "yen": return '\u00A5'; 4168 case "brvbar": return '\u00A6'; 4169 case "sect": return '\u00A7'; 4170 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4171 case "copy": case "COPY": return '\u00A9'; 4172 case "ordf": return '\u00AA'; 4173 case "laquo": return '\u00AB'; 4174 case "not": return '\u00AC'; 4175 case "shy": return '\u00AD'; 4176 case "reg": case "circledR": case "REG": return '\u00AE'; 4177 case "macr": case "strns": return '\u00AF'; 4178 case "deg": return '\u00B0'; 4179 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4180 case "sup2": return '\u00B2'; 4181 case "sup3": return '\u00B3'; 4182 case "acute": case "DiacriticalAcute": return '\u00B4'; 4183 case "micro": return '\u00B5'; 4184 case "para": return '\u00B6'; 4185 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4186 case "cedil": case "Cedilla": return '\u00B8'; 4187 case "sup1": return '\u00B9'; 4188 case "ordm": return '\u00BA'; 4189 case "raquo": return '\u00BB'; 4190 case "frac14": return '\u00BC'; 4191 case "frac12": case "half": return '\u00BD'; 4192 case "frac34": return '\u00BE'; 4193 case "iquest": return '\u00BF'; 4194 case "Agrave": return '\u00C0'; 4195 case "Aacute": return '\u00C1'; 4196 case "Acirc": return '\u00C2'; 4197 case "Atilde": return '\u00C3'; 4198 case "Auml": return '\u00C4'; 4199 case "Aring": case "angst": return '\u00C5'; 4200 case "AElig": return '\u00C6'; 4201 case "Ccedil": return '\u00C7'; 4202 case "Egrave": return '\u00C8'; 4203 case "Eacute": return '\u00C9'; 4204 case "Ecirc": return '\u00CA'; 4205 case "Euml": return '\u00CB'; 4206 case "Igrave": return '\u00CC'; 4207 case "Iacute": return '\u00CD'; 4208 case "Icirc": return '\u00CE'; 4209 case "Iuml": return '\u00CF'; 4210 case "ETH": return '\u00D0'; 4211 case "Ntilde": return '\u00D1'; 4212 case "Ograve": return '\u00D2'; 4213 case "Oacute": return '\u00D3'; 4214 case "Ocirc": return '\u00D4'; 4215 case "Otilde": return '\u00D5'; 4216 case "Ouml": return '\u00D6'; 4217 case "times": return '\u00D7'; 4218 case "Oslash": return '\u00D8'; 4219 case "Ugrave": return '\u00D9'; 4220 case "Uacute": return '\u00DA'; 4221 case "Ucirc": return '\u00DB'; 4222 case "Uuml": return '\u00DC'; 4223 case "Yacute": return '\u00DD'; 4224 case "THORN": return '\u00DE'; 4225 case "szlig": return '\u00DF'; 4226 case "agrave": return '\u00E0'; 4227 case "aacute": return '\u00E1'; 4228 case "acirc": return '\u00E2'; 4229 case "atilde": return '\u00E3'; 4230 case "auml": return '\u00E4'; 4231 case "aring": return '\u00E5'; 4232 case "aelig": return '\u00E6'; 4233 case "ccedil": return '\u00E7'; 4234 case "egrave": return '\u00E8'; 4235 case "eacute": return '\u00E9'; 4236 case "ecirc": return '\u00EA'; 4237 case "euml": return '\u00EB'; 4238 case "igrave": return '\u00EC'; 4239 case "iacute": return '\u00ED'; 4240 case "icirc": return '\u00EE'; 4241 case "iuml": return '\u00EF'; 4242 case "eth": return '\u00F0'; 4243 case "ntilde": return '\u00F1'; 4244 case "ograve": return '\u00F2'; 4245 case "oacute": return '\u00F3'; 4246 case "ocirc": return '\u00F4'; 4247 case "otilde": return '\u00F5'; 4248 case "ouml": return '\u00F6'; 4249 case "divide": case "div": return '\u00F7'; 4250 case "oslash": return '\u00F8'; 4251 case "ugrave": return '\u00F9'; 4252 case "uacute": return '\u00FA'; 4253 case "ucirc": return '\u00FB'; 4254 case "uuml": return '\u00FC'; 4255 case "yacute": return '\u00FD'; 4256 case "thorn": return '\u00FE'; 4257 case "yuml": return '\u00FF'; 4258 case "Amacr": return '\u0100'; 4259 case "amacr": return '\u0101'; 4260 case "Abreve": return '\u0102'; 4261 case "abreve": return '\u0103'; 4262 case "Aogon": return '\u0104'; 4263 case "aogon": return '\u0105'; 4264 case "Cacute": return '\u0106'; 4265 case "cacute": return '\u0107'; 4266 case "Ccirc": return '\u0108'; 4267 case "ccirc": return '\u0109'; 4268 case "Cdot": return '\u010A'; 4269 case "cdot": return '\u010B'; 4270 case "Ccaron": return '\u010C'; 4271 case "ccaron": return '\u010D'; 4272 case "Dcaron": return '\u010E'; 4273 case "dcaron": return '\u010F'; 4274 case "Dstrok": return '\u0110'; 4275 case "dstrok": return '\u0111'; 4276 case "Emacr": return '\u0112'; 4277 case "emacr": return '\u0113'; 4278 case "Edot": return '\u0116'; 4279 case "edot": return '\u0117'; 4280 case "Eogon": return '\u0118'; 4281 case "eogon": return '\u0119'; 4282 case "Ecaron": return '\u011A'; 4283 case "ecaron": return '\u011B'; 4284 case "Gcirc": return '\u011C'; 4285 case "gcirc": return '\u011D'; 4286 case "Gbreve": return '\u011E'; 4287 case "gbreve": return '\u011F'; 4288 case "Gdot": return '\u0120'; 4289 case "gdot": return '\u0121'; 4290 case "Gcedil": return '\u0122'; 4291 case "Hcirc": return '\u0124'; 4292 case "hcirc": return '\u0125'; 4293 case "Hstrok": return '\u0126'; 4294 case "hstrok": return '\u0127'; 4295 case "Itilde": return '\u0128'; 4296 case "itilde": return '\u0129'; 4297 case "Imacr": return '\u012A'; 4298 case "imacr": return '\u012B'; 4299 case "Iogon": return '\u012E'; 4300 case "iogon": return '\u012F'; 4301 case "Idot": return '\u0130'; 4302 case "imath": case "inodot": return '\u0131'; 4303 case "IJlig": return '\u0132'; 4304 case "ijlig": return '\u0133'; 4305 case "Jcirc": return '\u0134'; 4306 case "jcirc": return '\u0135'; 4307 case "Kcedil": return '\u0136'; 4308 case "kcedil": return '\u0137'; 4309 case "kgreen": return '\u0138'; 4310 case "Lacute": return '\u0139'; 4311 case "lacute": return '\u013A'; 4312 case "Lcedil": return '\u013B'; 4313 case "lcedil": return '\u013C'; 4314 case "Lcaron": return '\u013D'; 4315 case "lcaron": return '\u013E'; 4316 case "Lmidot": return '\u013F'; 4317 case "lmidot": return '\u0140'; 4318 case "Lstrok": return '\u0141'; 4319 case "lstrok": return '\u0142'; 4320 case "Nacute": return '\u0143'; 4321 case "nacute": return '\u0144'; 4322 case "Ncedil": return '\u0145'; 4323 case "ncedil": return '\u0146'; 4324 case "Ncaron": return '\u0147'; 4325 case "ncaron": return '\u0148'; 4326 case "napos": return '\u0149'; 4327 case "ENG": return '\u014A'; 4328 case "eng": return '\u014B'; 4329 case "Omacr": return '\u014C'; 4330 case "omacr": return '\u014D'; 4331 case "Odblac": return '\u0150'; 4332 case "odblac": return '\u0151'; 4333 case "OElig": return '\u0152'; 4334 case "oelig": return '\u0153'; 4335 case "Racute": return '\u0154'; 4336 case "racute": return '\u0155'; 4337 case "Rcedil": return '\u0156'; 4338 case "rcedil": return '\u0157'; 4339 case "Rcaron": return '\u0158'; 4340 case "rcaron": return '\u0159'; 4341 case "Sacute": return '\u015A'; 4342 case "sacute": return '\u015B'; 4343 case "Scirc": return '\u015C'; 4344 case "scirc": return '\u015D'; 4345 case "Scedil": return '\u015E'; 4346 case "scedil": return '\u015F'; 4347 case "Scaron": return '\u0160'; 4348 case "scaron": return '\u0161'; 4349 case "Tcedil": return '\u0162'; 4350 case "tcedil": return '\u0163'; 4351 case "Tcaron": return '\u0164'; 4352 case "tcaron": return '\u0165'; 4353 case "Tstrok": return '\u0166'; 4354 case "tstrok": return '\u0167'; 4355 case "Utilde": return '\u0168'; 4356 case "utilde": return '\u0169'; 4357 case "Umacr": return '\u016A'; 4358 case "umacr": return '\u016B'; 4359 case "Ubreve": return '\u016C'; 4360 case "ubreve": return '\u016D'; 4361 case "Uring": return '\u016E'; 4362 case "uring": return '\u016F'; 4363 case "Udblac": return '\u0170'; 4364 case "udblac": return '\u0171'; 4365 case "Uogon": return '\u0172'; 4366 case "uogon": return '\u0173'; 4367 case "Wcirc": return '\u0174'; 4368 case "wcirc": return '\u0175'; 4369 case "Ycirc": return '\u0176'; 4370 case "ycirc": return '\u0177'; 4371 case "Yuml": return '\u0178'; 4372 case "Zacute": return '\u0179'; 4373 case "zacute": return '\u017A'; 4374 case "Zdot": return '\u017B'; 4375 case "zdot": return '\u017C'; 4376 case "Zcaron": return '\u017D'; 4377 case "zcaron": return '\u017E'; 4378 case "fnof": return '\u0192'; 4379 case "imped": return '\u01B5'; 4380 case "gacute": return '\u01F5'; 4381 case "jmath": return '\u0237'; 4382 case "circ": return '\u02C6'; 4383 case "caron": case "Hacek": return '\u02C7'; 4384 case "breve": case "Breve": return '\u02D8'; 4385 case "dot": case "DiacriticalDot": return '\u02D9'; 4386 case "ring": return '\u02DA'; 4387 case "ogon": return '\u02DB'; 4388 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4389 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4390 case "DownBreve": return '\u0311'; 4391 case "Alpha": return '\u0391'; 4392 case "Beta": return '\u0392'; 4393 case "Gamma": return '\u0393'; 4394 case "Delta": return '\u0394'; 4395 case "Epsilon": return '\u0395'; 4396 case "Zeta": return '\u0396'; 4397 case "Eta": return '\u0397'; 4398 case "Theta": return '\u0398'; 4399 case "Iota": return '\u0399'; 4400 case "Kappa": return '\u039A'; 4401 case "Lambda": return '\u039B'; 4402 case "Mu": return '\u039C'; 4403 case "Nu": return '\u039D'; 4404 case "Xi": return '\u039E'; 4405 case "Omicron": return '\u039F'; 4406 case "Pi": return '\u03A0'; 4407 case "Rho": return '\u03A1'; 4408 case "Sigma": return '\u03A3'; 4409 case "Tau": return '\u03A4'; 4410 case "Upsilon": return '\u03A5'; 4411 case "Phi": return '\u03A6'; 4412 case "Chi": return '\u03A7'; 4413 case "Psi": return '\u03A8'; 4414 case "Omega": case "ohm": return '\u03A9'; 4415 case "alpha": return '\u03B1'; 4416 case "beta": return '\u03B2'; 4417 case "gamma": return '\u03B3'; 4418 case "delta": return '\u03B4'; 4419 case "epsi": case "epsilon": return '\u03B5'; 4420 case "zeta": return '\u03B6'; 4421 case "eta": return '\u03B7'; 4422 case "theta": return '\u03B8'; 4423 case "iota": return '\u03B9'; 4424 case "kappa": return '\u03BA'; 4425 case "lambda": return '\u03BB'; 4426 case "mu": return '\u03BC'; 4427 case "nu": return '\u03BD'; 4428 case "xi": return '\u03BE'; 4429 case "omicron": return '\u03BF'; 4430 case "pi": return '\u03C0'; 4431 case "rho": return '\u03C1'; 4432 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4433 case "sigma": return '\u03C3'; 4434 case "tau": return '\u03C4'; 4435 case "upsi": case "upsilon": return '\u03C5'; 4436 case "phi": return '\u03C6'; 4437 case "chi": return '\u03C7'; 4438 case "psi": return '\u03C8'; 4439 case "omega": return '\u03C9'; 4440 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4441 case "Upsi": case "upsih": return '\u03D2'; 4442 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4443 case "piv": case "varpi": return '\u03D6'; 4444 case "Gammad": return '\u03DC'; 4445 case "gammad": case "digamma": return '\u03DD'; 4446 case "kappav": case "varkappa": return '\u03F0'; 4447 case "rhov": case "varrho": return '\u03F1'; 4448 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4449 case "bepsi": case "backepsilon": return '\u03F6'; 4450 case "IOcy": return '\u0401'; 4451 case "DJcy": return '\u0402'; 4452 case "GJcy": return '\u0403'; 4453 case "Jukcy": return '\u0404'; 4454 case "DScy": return '\u0405'; 4455 case "Iukcy": return '\u0406'; 4456 case "YIcy": return '\u0407'; 4457 case "Jsercy": return '\u0408'; 4458 case "LJcy": return '\u0409'; 4459 case "NJcy": return '\u040A'; 4460 case "TSHcy": return '\u040B'; 4461 case "KJcy": return '\u040C'; 4462 case "Ubrcy": return '\u040E'; 4463 case "DZcy": return '\u040F'; 4464 case "Acy": return '\u0410'; 4465 case "Bcy": return '\u0411'; 4466 case "Vcy": return '\u0412'; 4467 case "Gcy": return '\u0413'; 4468 case "Dcy": return '\u0414'; 4469 case "IEcy": return '\u0415'; 4470 case "ZHcy": return '\u0416'; 4471 case "Zcy": return '\u0417'; 4472 case "Icy": return '\u0418'; 4473 case "Jcy": return '\u0419'; 4474 case "Kcy": return '\u041A'; 4475 case "Lcy": return '\u041B'; 4476 case "Mcy": return '\u041C'; 4477 case "Ncy": return '\u041D'; 4478 case "Ocy": return '\u041E'; 4479 case "Pcy": return '\u041F'; 4480 case "Rcy": return '\u0420'; 4481 case "Scy": return '\u0421'; 4482 case "Tcy": return '\u0422'; 4483 case "Ucy": return '\u0423'; 4484 case "Fcy": return '\u0424'; 4485 case "KHcy": return '\u0425'; 4486 case "TScy": return '\u0426'; 4487 case "CHcy": return '\u0427'; 4488 case "SHcy": return '\u0428'; 4489 case "SHCHcy": return '\u0429'; 4490 case "HARDcy": return '\u042A'; 4491 case "Ycy": return '\u042B'; 4492 case "SOFTcy": return '\u042C'; 4493 case "Ecy": return '\u042D'; 4494 case "YUcy": return '\u042E'; 4495 case "YAcy": return '\u042F'; 4496 case "acy": return '\u0430'; 4497 case "bcy": return '\u0431'; 4498 case "vcy": return '\u0432'; 4499 case "gcy": return '\u0433'; 4500 case "dcy": return '\u0434'; 4501 case "iecy": return '\u0435'; 4502 case "zhcy": return '\u0436'; 4503 case "zcy": return '\u0437'; 4504 case "icy": return '\u0438'; 4505 case "jcy": return '\u0439'; 4506 case "kcy": return '\u043A'; 4507 case "lcy": return '\u043B'; 4508 case "mcy": return '\u043C'; 4509 case "ncy": return '\u043D'; 4510 case "ocy": return '\u043E'; 4511 case "pcy": return '\u043F'; 4512 case "rcy": return '\u0440'; 4513 case "scy": return '\u0441'; 4514 case "tcy": return '\u0442'; 4515 case "ucy": return '\u0443'; 4516 case "fcy": return '\u0444'; 4517 case "khcy": return '\u0445'; 4518 case "tscy": return '\u0446'; 4519 case "chcy": return '\u0447'; 4520 case "shcy": return '\u0448'; 4521 case "shchcy": return '\u0449'; 4522 case "hardcy": return '\u044A'; 4523 case "ycy": return '\u044B'; 4524 case "softcy": return '\u044C'; 4525 case "ecy": return '\u044D'; 4526 case "yucy": return '\u044E'; 4527 case "yacy": return '\u044F'; 4528 case "iocy": return '\u0451'; 4529 case "djcy": return '\u0452'; 4530 case "gjcy": return '\u0453'; 4531 case "jukcy": return '\u0454'; 4532 case "dscy": return '\u0455'; 4533 case "iukcy": return '\u0456'; 4534 case "yicy": return '\u0457'; 4535 case "jsercy": return '\u0458'; 4536 case "ljcy": return '\u0459'; 4537 case "njcy": return '\u045A'; 4538 case "tshcy": return '\u045B'; 4539 case "kjcy": return '\u045C'; 4540 case "ubrcy": return '\u045E'; 4541 case "dzcy": return '\u045F'; 4542 case "ensp": return '\u2002'; 4543 case "emsp": return '\u2003'; 4544 case "emsp13": return '\u2004'; 4545 case "emsp14": return '\u2005'; 4546 case "numsp": return '\u2007'; 4547 case "puncsp": return '\u2008'; 4548 case "thinsp": case "ThinSpace": return '\u2009'; 4549 case "hairsp": case "VeryThinSpace": return '\u200A'; 4550 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4551 case "zwnj": return '\u200C'; 4552 case "zwj": return '\u200D'; 4553 case "lrm": return '\u200E'; 4554 case "rlm": return '\u200F'; 4555 case "hyphen": case "dash": return '\u2010'; 4556 case "ndash": return '\u2013'; 4557 case "mdash": return '\u2014'; 4558 case "horbar": return '\u2015'; 4559 case "Verbar": case "Vert": return '\u2016'; 4560 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4561 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4562 case "lsquor": case "sbquo": return '\u201A'; 4563 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4564 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4565 case "ldquor": case "bdquo": return '\u201E'; 4566 case "dagger": return '\u2020'; 4567 case "Dagger": case "ddagger": return '\u2021'; 4568 case "bull": case "bullet": return '\u2022'; 4569 case "nldr": return '\u2025'; 4570 case "hellip": case "mldr": return '\u2026'; 4571 case "permil": return '\u2030'; 4572 case "pertenk": return '\u2031'; 4573 case "prime": return '\u2032'; 4574 case "Prime": return '\u2033'; 4575 case "tprime": return '\u2034'; 4576 case "bprime": case "backprime": return '\u2035'; 4577 case "lsaquo": return '\u2039'; 4578 case "rsaquo": return '\u203A'; 4579 case "oline": case "OverBar": return '\u203E'; 4580 case "caret": return '\u2041'; 4581 case "hybull": return '\u2043'; 4582 case "frasl": return '\u2044'; 4583 case "bsemi": return '\u204F'; 4584 case "qprime": return '\u2057'; 4585 case "MediumSpace": return '\u205F'; 4586 case "NoBreak": return '\u2060'; 4587 case "ApplyFunction": case "af": return '\u2061'; 4588 case "InvisibleTimes": case "it": return '\u2062'; 4589 case "InvisibleComma": case "ic": return '\u2063'; 4590 case "euro": return '\u20AC'; 4591 case "tdot": case "TripleDot": return '\u20DB'; 4592 case "DotDot": return '\u20DC'; 4593 case "Copf": case "complexes": return '\u2102'; 4594 case "incare": return '\u2105'; 4595 case "gscr": return '\u210A'; 4596 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4597 case "Hfr": case "Poincareplane": return '\u210C'; 4598 case "quaternions": case "Hopf": return '\u210D'; 4599 case "planckh": return '\u210E'; 4600 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4601 case "Iscr": case "imagline": return '\u2110'; 4602 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4603 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4604 case "ell": return '\u2113'; 4605 case "Nopf": case "naturals": return '\u2115'; 4606 case "numero": return '\u2116'; 4607 case "copysr": return '\u2117'; 4608 case "weierp": case "wp": return '\u2118'; 4609 case "Popf": case "primes": return '\u2119'; 4610 case "rationals": case "Qopf": return '\u211A'; 4611 case "Rscr": case "realine": return '\u211B'; 4612 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4613 case "reals": case "Ropf": return '\u211D'; 4614 case "rx": return '\u211E'; 4615 case "trade": case "TRADE": return '\u2122'; 4616 case "integers": case "Zopf": return '\u2124'; 4617 case "mho": return '\u2127'; 4618 case "Zfr": case "zeetrf": return '\u2128'; 4619 case "iiota": return '\u2129'; 4620 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4621 case "Cfr": case "Cayleys": return '\u212D'; 4622 case "escr": return '\u212F'; 4623 case "Escr": case "expectation": return '\u2130'; 4624 case "Fscr": case "Fouriertrf": return '\u2131'; 4625 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4626 case "order": case "orderof": case "oscr": return '\u2134'; 4627 case "alefsym": case "aleph": return '\u2135'; 4628 case "beth": return '\u2136'; 4629 case "gimel": return '\u2137'; 4630 case "daleth": return '\u2138'; 4631 case "CapitalDifferentialD": case "DD": return '\u2145'; 4632 case "DifferentialD": case "dd": return '\u2146'; 4633 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4634 case "ImaginaryI": case "ii": return '\u2148'; 4635 case "frac13": return '\u2153'; 4636 case "frac23": return '\u2154'; 4637 case "frac15": return '\u2155'; 4638 case "frac25": return '\u2156'; 4639 case "frac35": return '\u2157'; 4640 case "frac45": return '\u2158'; 4641 case "frac16": return '\u2159'; 4642 case "frac56": return '\u215A'; 4643 case "frac18": return '\u215B'; 4644 case "frac38": return '\u215C'; 4645 case "frac58": return '\u215D'; 4646 case "frac78": return '\u215E'; 4647 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4648 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4649 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4650 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4651 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4652 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4653 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4654 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4655 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4656 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4657 case "nlarr": case "nleftarrow": return '\u219A'; 4658 case "nrarr": case "nrightarrow": return '\u219B'; 4659 case "rarrw": case "rightsquigarrow": return '\u219D'; 4660 case "Larr": case "twoheadleftarrow": return '\u219E'; 4661 case "Uarr": return '\u219F'; 4662 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4663 case "Darr": return '\u21A1'; 4664 case "larrtl": case "leftarrowtail": return '\u21A2'; 4665 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4666 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4667 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4668 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4669 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4670 case "larrhk": case "hookleftarrow": return '\u21A9'; 4671 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4672 case "larrlp": case "looparrowleft": return '\u21AB'; 4673 case "rarrlp": case "looparrowright": return '\u21AC'; 4674 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4675 case "nharr": case "nleftrightarrow": return '\u21AE'; 4676 case "lsh": case "Lsh": return '\u21B0'; 4677 case "rsh": case "Rsh": return '\u21B1'; 4678 case "ldsh": return '\u21B2'; 4679 case "rdsh": return '\u21B3'; 4680 case "crarr": return '\u21B5'; 4681 case "cularr": case "curvearrowleft": return '\u21B6'; 4682 case "curarr": case "curvearrowright": return '\u21B7'; 4683 case "olarr": case "circlearrowleft": return '\u21BA'; 4684 case "orarr": case "circlearrowright": return '\u21BB'; 4685 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4686 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4687 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4688 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4689 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4690 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4691 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4692 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4693 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4694 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4695 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4696 case "llarr": case "leftleftarrows": return '\u21C7'; 4697 case "uuarr": case "upuparrows": return '\u21C8'; 4698 case "rrarr": case "rightrightarrows": return '\u21C9'; 4699 case "ddarr": case "downdownarrows": return '\u21CA'; 4700 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4701 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4702 case "nlArr": case "nLeftarrow": return '\u21CD'; 4703 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4704 case "nrArr": case "nRightarrow": return '\u21CF'; 4705 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4706 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4707 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4708 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4709 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4710 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4711 case "nwArr": return '\u21D6'; 4712 case "neArr": return '\u21D7'; 4713 case "seArr": return '\u21D8'; 4714 case "swArr": return '\u21D9'; 4715 case "lAarr": case "Lleftarrow": return '\u21DA'; 4716 case "rAarr": case "Rrightarrow": return '\u21DB'; 4717 case "zigrarr": return '\u21DD'; 4718 case "larrb": case "LeftArrowBar": return '\u21E4'; 4719 case "rarrb": case "RightArrowBar": return '\u21E5'; 4720 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4721 case "loarr": return '\u21FD'; 4722 case "roarr": return '\u21FE'; 4723 case "hoarr": return '\u21FF'; 4724 case "forall": case "ForAll": return '\u2200'; 4725 case "comp": case "complement": return '\u2201'; 4726 case "part": case "PartialD": return '\u2202'; 4727 case "exist": case "Exists": return '\u2203'; 4728 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4729 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4730 case "nabla": case "Del": return '\u2207'; 4731 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4732 case "notin": case "NotElement": case "notinva": return '\u2209'; 4733 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4734 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4735 case "prod": case "Product": return '\u220F'; 4736 case "coprod": case "Coproduct": return '\u2210'; 4737 case "sum": case "Sum": return '\u2211'; 4738 case "minus": return '\u2212'; 4739 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4740 case "plusdo": case "dotplus": return '\u2214'; 4741 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4742 case "lowast": return '\u2217'; 4743 case "compfn": case "SmallCircle": return '\u2218'; 4744 case "radic": case "Sqrt": return '\u221A'; 4745 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4746 case "infin": return '\u221E'; 4747 case "angrt": return '\u221F'; 4748 case "ang": case "angle": return '\u2220'; 4749 case "angmsd": case "measuredangle": return '\u2221'; 4750 case "angsph": return '\u2222'; 4751 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4752 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4753 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4754 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4755 case "and": case "wedge": return '\u2227'; 4756 case "or": case "vee": return '\u2228'; 4757 case "cap": return '\u2229'; 4758 case "cup": return '\u222A'; 4759 case "int": case "Integral": return '\u222B'; 4760 case "Int": return '\u222C'; 4761 case "tint": case "iiint": return '\u222D'; 4762 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4763 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4764 case "Cconint": return '\u2230'; 4765 case "cwint": return '\u2231'; 4766 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4767 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4768 case "there4": case "therefore": case "Therefore": return '\u2234'; 4769 case "becaus": case "because": case "Because": return '\u2235'; 4770 case "ratio": return '\u2236'; 4771 case "Colon": case "Proportion": return '\u2237'; 4772 case "minusd": case "dotminus": return '\u2238'; 4773 case "mDDot": return '\u223A'; 4774 case "homtht": return '\u223B'; 4775 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4776 case "bsim": case "backsim": return '\u223D'; 4777 case "ac": case "mstpos": return '\u223E'; 4778 case "acd": return '\u223F'; 4779 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4780 case "nsim": case "NotTilde": return '\u2241'; 4781 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4782 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4783 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4784 case "cong": case "TildeFullEqual": return '\u2245'; 4785 case "simne": return '\u2246'; 4786 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4787 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4788 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4789 case "ape": case "approxeq": return '\u224A'; 4790 case "apid": return '\u224B'; 4791 case "bcong": case "backcong": return '\u224C'; 4792 case "asympeq": case "CupCap": return '\u224D'; 4793 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4794 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4795 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4796 case "eDot": case "doteqdot": return '\u2251'; 4797 case "efDot": case "fallingdotseq": return '\u2252'; 4798 case "erDot": case "risingdotseq": return '\u2253'; 4799 case "colone": case "coloneq": case "Assign": return '\u2254'; 4800 case "ecolon": case "eqcolon": return '\u2255'; 4801 case "ecir": case "eqcirc": return '\u2256'; 4802 case "cire": case "circeq": return '\u2257'; 4803 case "wedgeq": return '\u2259'; 4804 case "veeeq": return '\u225A'; 4805 case "trie": case "triangleq": return '\u225C'; 4806 case "equest": case "questeq": return '\u225F'; 4807 case "ne": case "NotEqual": return '\u2260'; 4808 case "equiv": case "Congruent": return '\u2261'; 4809 case "nequiv": case "NotCongruent": return '\u2262'; 4810 case "le": case "leq": return '\u2264'; 4811 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4812 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4813 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4814 case "lnE": case "lneqq": return '\u2268'; 4815 case "gnE": case "gneqq": return '\u2269'; 4816 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4817 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4818 case "twixt": case "between": return '\u226C'; 4819 case "NotCupCap": return '\u226D'; 4820 case "nlt": case "NotLess": case "nless": return '\u226E'; 4821 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4822 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4823 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4824 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4825 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4826 case "nlsim": case "NotLessTilde": return '\u2274'; 4827 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4828 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4829 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4830 case "ntlg": case "NotLessGreater": return '\u2278'; 4831 case "ntgl": case "NotGreaterLess": return '\u2279'; 4832 case "pr": case "Precedes": case "prec": return '\u227A'; 4833 case "sc": case "Succeeds": case "succ": return '\u227B'; 4834 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4835 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4836 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4837 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4838 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4839 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4840 case "sub": case "subset": return '\u2282'; 4841 case "sup": case "supset": case "Superset": return '\u2283'; 4842 case "nsub": return '\u2284'; 4843 case "nsup": return '\u2285'; 4844 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4845 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4846 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4847 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4848 case "subne": case "subsetneq": return '\u228A'; 4849 case "supne": case "supsetneq": return '\u228B'; 4850 case "cupdot": return '\u228D'; 4851 case "uplus": case "UnionPlus": return '\u228E'; 4852 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4853 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4854 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4855 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4856 case "sqcap": case "SquareIntersection": return '\u2293'; 4857 case "sqcup": case "SquareUnion": return '\u2294'; 4858 case "oplus": case "CirclePlus": return '\u2295'; 4859 case "ominus": case "CircleMinus": return '\u2296'; 4860 case "otimes": case "CircleTimes": return '\u2297'; 4861 case "osol": return '\u2298'; 4862 case "odot": case "CircleDot": return '\u2299'; 4863 case "ocir": case "circledcirc": return '\u229A'; 4864 case "oast": case "circledast": return '\u229B'; 4865 case "odash": case "circleddash": return '\u229D'; 4866 case "plusb": case "boxplus": return '\u229E'; 4867 case "minusb": case "boxminus": return '\u229F'; 4868 case "timesb": case "boxtimes": return '\u22A0'; 4869 case "sdotb": case "dotsquare": return '\u22A1'; 4870 case "vdash": case "RightTee": return '\u22A2'; 4871 case "dashv": case "LeftTee": return '\u22A3'; 4872 case "top": case "DownTee": return '\u22A4'; 4873 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4874 case "models": return '\u22A7'; 4875 case "vDash": case "DoubleRightTee": return '\u22A8'; 4876 case "Vdash": return '\u22A9'; 4877 case "Vvdash": return '\u22AA'; 4878 case "VDash": return '\u22AB'; 4879 case "nvdash": return '\u22AC'; 4880 case "nvDash": return '\u22AD'; 4881 case "nVdash": return '\u22AE'; 4882 case "nVDash": return '\u22AF'; 4883 case "prurel": return '\u22B0'; 4884 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4885 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4886 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4887 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4888 case "origof": return '\u22B6'; 4889 case "imof": return '\u22B7'; 4890 case "mumap": case "multimap": return '\u22B8'; 4891 case "hercon": return '\u22B9'; 4892 case "intcal": case "intercal": return '\u22BA'; 4893 case "veebar": return '\u22BB'; 4894 case "barvee": return '\u22BD'; 4895 case "angrtvb": return '\u22BE'; 4896 case "lrtri": return '\u22BF'; 4897 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4898 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4899 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4900 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4901 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4902 case "sdot": return '\u22C5'; 4903 case "sstarf": case "Star": return '\u22C6'; 4904 case "divonx": case "divideontimes": return '\u22C7'; 4905 case "bowtie": return '\u22C8'; 4906 case "ltimes": return '\u22C9'; 4907 case "rtimes": return '\u22CA'; 4908 case "lthree": case "leftthreetimes": return '\u22CB'; 4909 case "rthree": case "rightthreetimes": return '\u22CC'; 4910 case "bsime": case "backsimeq": return '\u22CD'; 4911 case "cuvee": case "curlyvee": return '\u22CE'; 4912 case "cuwed": case "curlywedge": return '\u22CF'; 4913 case "Sub": case "Subset": return '\u22D0'; 4914 case "Sup": case "Supset": return '\u22D1'; 4915 case "Cap": return '\u22D2'; 4916 case "Cup": return '\u22D3'; 4917 case "fork": case "pitchfork": return '\u22D4'; 4918 case "epar": return '\u22D5'; 4919 case "ltdot": case "lessdot": return '\u22D6'; 4920 case "gtdot": case "gtrdot": return '\u22D7'; 4921 case "Ll": return '\u22D8'; 4922 case "Gg": case "ggg": return '\u22D9'; 4923 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4924 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4925 case "cuepr": case "curlyeqprec": return '\u22DE'; 4926 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4927 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4928 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4929 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4930 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4931 case "lnsim": return '\u22E6'; 4932 case "gnsim": return '\u22E7'; 4933 case "prnsim": case "precnsim": return '\u22E8'; 4934 case "scnsim": case "succnsim": return '\u22E9'; 4935 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4936 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4937 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4938 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4939 case "vellip": return '\u22EE'; 4940 case "ctdot": return '\u22EF'; 4941 case "utdot": return '\u22F0'; 4942 case "dtdot": return '\u22F1'; 4943 case "disin": return '\u22F2'; 4944 case "isinsv": return '\u22F3'; 4945 case "isins": return '\u22F4'; 4946 case "isindot": return '\u22F5'; 4947 case "notinvc": return '\u22F6'; 4948 case "notinvb": return '\u22F7'; 4949 case "isinE": return '\u22F9'; 4950 case "nisd": return '\u22FA'; 4951 case "xnis": return '\u22FB'; 4952 case "nis": return '\u22FC'; 4953 case "notnivc": return '\u22FD'; 4954 case "notnivb": return '\u22FE'; 4955 case "barwed": case "barwedge": return '\u2305'; 4956 case "Barwed": case "doublebarwedge": return '\u2306'; 4957 case "lceil": case "LeftCeiling": return '\u2308'; 4958 case "rceil": case "RightCeiling": return '\u2309'; 4959 case "lfloor": case "LeftFloor": return '\u230A'; 4960 case "rfloor": case "RightFloor": return '\u230B'; 4961 case "drcrop": return '\u230C'; 4962 case "dlcrop": return '\u230D'; 4963 case "urcrop": return '\u230E'; 4964 case "ulcrop": return '\u230F'; 4965 case "bnot": return '\u2310'; 4966 case "profline": return '\u2312'; 4967 case "profsurf": return '\u2313'; 4968 case "telrec": return '\u2315'; 4969 case "target": return '\u2316'; 4970 case "ulcorn": case "ulcorner": return '\u231C'; 4971 case "urcorn": case "urcorner": return '\u231D'; 4972 case "dlcorn": case "llcorner": return '\u231E'; 4973 case "drcorn": case "lrcorner": return '\u231F'; 4974 case "frown": case "sfrown": return '\u2322'; 4975 case "smile": case "ssmile": return '\u2323'; 4976 case "cylcty": return '\u232D'; 4977 case "profalar": return '\u232E'; 4978 case "topbot": return '\u2336'; 4979 case "ovbar": return '\u233D'; 4980 case "solbar": return '\u233F'; 4981 case "angzarr": return '\u237C'; 4982 case "lmoust": case "lmoustache": return '\u23B0'; 4983 case "rmoust": case "rmoustache": return '\u23B1'; 4984 case "tbrk": case "OverBracket": return '\u23B4'; 4985 case "bbrk": case "UnderBracket": return '\u23B5'; 4986 case "bbrktbrk": return '\u23B6'; 4987 case "OverParenthesis": return '\u23DC'; 4988 case "UnderParenthesis": return '\u23DD'; 4989 case "OverBrace": return '\u23DE'; 4990 case "UnderBrace": return '\u23DF'; 4991 case "trpezium": return '\u23E2'; 4992 case "elinters": return '\u23E7'; 4993 case "blank": return '\u2423'; 4994 case "oS": case "circledS": return '\u24C8'; 4995 case "boxh": case "HorizontalLine": return '\u2500'; 4996 case "boxv": return '\u2502'; 4997 case "boxdr": return '\u250C'; 4998 case "boxdl": return '\u2510'; 4999 case "boxur": return '\u2514'; 5000 case "boxul": return '\u2518'; 5001 case "boxvr": return '\u251C'; 5002 case "boxvl": return '\u2524'; 5003 case "boxhd": return '\u252C'; 5004 case "boxhu": return '\u2534'; 5005 case "boxvh": return '\u253C'; 5006 case "boxH": return '\u2550'; 5007 case "boxV": return '\u2551'; 5008 case "boxdR": return '\u2552'; 5009 case "boxDr": return '\u2553'; 5010 case "boxDR": return '\u2554'; 5011 case "boxdL": return '\u2555'; 5012 case "boxDl": return '\u2556'; 5013 case "boxDL": return '\u2557'; 5014 case "boxuR": return '\u2558'; 5015 case "boxUr": return '\u2559'; 5016 case "boxUR": return '\u255A'; 5017 case "boxuL": return '\u255B'; 5018 case "boxUl": return '\u255C'; 5019 case "boxUL": return '\u255D'; 5020 case "boxvR": return '\u255E'; 5021 case "boxVr": return '\u255F'; 5022 case "boxVR": return '\u2560'; 5023 case "boxvL": return '\u2561'; 5024 case "boxVl": return '\u2562'; 5025 case "boxVL": return '\u2563'; 5026 case "boxHd": return '\u2564'; 5027 case "boxhD": return '\u2565'; 5028 case "boxHD": return '\u2566'; 5029 case "boxHu": return '\u2567'; 5030 case "boxhU": return '\u2568'; 5031 case "boxHU": return '\u2569'; 5032 case "boxvH": return '\u256A'; 5033 case "boxVh": return '\u256B'; 5034 case "boxVH": return '\u256C'; 5035 case "uhblk": return '\u2580'; 5036 case "lhblk": return '\u2584'; 5037 case "block": return '\u2588'; 5038 case "blk14": return '\u2591'; 5039 case "blk12": return '\u2592'; 5040 case "blk34": return '\u2593'; 5041 case "squ": case "square": case "Square": return '\u25A1'; 5042 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 5043 case "EmptyVerySmallSquare": return '\u25AB'; 5044 case "rect": return '\u25AD'; 5045 case "marker": return '\u25AE'; 5046 case "fltns": return '\u25B1'; 5047 case "xutri": case "bigtriangleup": return '\u25B3'; 5048 case "utrif": case "blacktriangle": return '\u25B4'; 5049 case "utri": case "triangle": return '\u25B5'; 5050 case "rtrif": case "blacktriangleright": return '\u25B8'; 5051 case "rtri": case "triangleright": return '\u25B9'; 5052 case "xdtri": case "bigtriangledown": return '\u25BD'; 5053 case "dtrif": case "blacktriangledown": return '\u25BE'; 5054 case "dtri": case "triangledown": return '\u25BF'; 5055 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5056 case "ltri": case "triangleleft": return '\u25C3'; 5057 case "loz": case "lozenge": return '\u25CA'; 5058 case "cir": return '\u25CB'; 5059 case "tridot": return '\u25EC'; 5060 case "xcirc": case "bigcirc": return '\u25EF'; 5061 case "ultri": return '\u25F8'; 5062 case "urtri": return '\u25F9'; 5063 case "lltri": return '\u25FA'; 5064 case "EmptySmallSquare": return '\u25FB'; 5065 case "FilledSmallSquare": return '\u25FC'; 5066 case "starf": case "bigstar": return '\u2605'; 5067 case "star": return '\u2606'; 5068 case "phone": return '\u260E'; 5069 case "female": return '\u2640'; 5070 case "male": return '\u2642'; 5071 case "spades": case "spadesuit": return '\u2660'; 5072 case "clubs": case "clubsuit": return '\u2663'; 5073 case "hearts": case "heartsuit": return '\u2665'; 5074 case "diams": case "diamondsuit": return '\u2666'; 5075 case "sung": return '\u266A'; 5076 case "flat": return '\u266D'; 5077 case "natur": case "natural": return '\u266E'; 5078 case "sharp": return '\u266F'; 5079 case "check": case "checkmark": return '\u2713'; 5080 case "cross": return '\u2717'; 5081 case "malt": case "maltese": return '\u2720'; 5082 case "sext": return '\u2736'; 5083 case "VerticalSeparator": return '\u2758'; 5084 case "lbbrk": return '\u2772'; 5085 case "rbbrk": return '\u2773'; 5086 case "bsolhsub": return '\u27C8'; 5087 case "suphsol": return '\u27C9'; 5088 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5089 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5090 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5091 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5092 case "Lang": return '\u27EA'; 5093 case "Rang": return '\u27EB'; 5094 case "loang": return '\u27EC'; 5095 case "roang": return '\u27ED'; 5096 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5097 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5098 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5099 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5100 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5101 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5102 case "xmap": case "longmapsto": return '\u27FC'; 5103 case "dzigrarr": return '\u27FF'; 5104 case "nvlArr": return '\u2902'; 5105 case "nvrArr": return '\u2903'; 5106 case "nvHarr": return '\u2904'; 5107 case "Map": return '\u2905'; 5108 case "lbarr": return '\u290C'; 5109 case "rbarr": case "bkarow": return '\u290D'; 5110 case "lBarr": return '\u290E'; 5111 case "rBarr": case "dbkarow": return '\u290F'; 5112 case "RBarr": case "drbkarow": return '\u2910'; 5113 case "DDotrahd": return '\u2911'; 5114 case "UpArrowBar": return '\u2912'; 5115 case "DownArrowBar": return '\u2913'; 5116 case "Rarrtl": return '\u2916'; 5117 case "latail": return '\u2919'; 5118 case "ratail": return '\u291A'; 5119 case "lAtail": return '\u291B'; 5120 case "rAtail": return '\u291C'; 5121 case "larrfs": return '\u291D'; 5122 case "rarrfs": return '\u291E'; 5123 case "larrbfs": return '\u291F'; 5124 case "rarrbfs": return '\u2920'; 5125 case "nwarhk": return '\u2923'; 5126 case "nearhk": return '\u2924'; 5127 case "searhk": case "hksearow": return '\u2925'; 5128 case "swarhk": case "hkswarow": return '\u2926'; 5129 case "nwnear": return '\u2927'; 5130 case "nesear": case "toea": return '\u2928'; 5131 case "seswar": case "tosa": return '\u2929'; 5132 case "swnwar": return '\u292A'; 5133 case "rarrc": return '\u2933'; 5134 case "cudarrr": return '\u2935'; 5135 case "ldca": return '\u2936'; 5136 case "rdca": return '\u2937'; 5137 case "cudarrl": return '\u2938'; 5138 case "larrpl": return '\u2939'; 5139 case "curarrm": return '\u293C'; 5140 case "cularrp": return '\u293D'; 5141 case "rarrpl": return '\u2945'; 5142 case "harrcir": return '\u2948'; 5143 case "Uarrocir": return '\u2949'; 5144 case "lurdshar": return '\u294A'; 5145 case "ldrushar": return '\u294B'; 5146 case "LeftRightVector": return '\u294E'; 5147 case "RightUpDownVector": return '\u294F'; 5148 case "DownLeftRightVector": return '\u2950'; 5149 case "LeftUpDownVector": return '\u2951'; 5150 case "LeftVectorBar": return '\u2952'; 5151 case "RightVectorBar": return '\u2953'; 5152 case "RightUpVectorBar": return '\u2954'; 5153 case "RightDownVectorBar": return '\u2955'; 5154 case "DownLeftVectorBar": return '\u2956'; 5155 case "DownRightVectorBar": return '\u2957'; 5156 case "LeftUpVectorBar": return '\u2958'; 5157 case "LeftDownVectorBar": return '\u2959'; 5158 case "LeftTeeVector": return '\u295A'; 5159 case "RightTeeVector": return '\u295B'; 5160 case "RightUpTeeVector": return '\u295C'; 5161 case "RightDownTeeVector": return '\u295D'; 5162 case "DownLeftTeeVector": return '\u295E'; 5163 case "DownRightTeeVector": return '\u295F'; 5164 case "LeftUpTeeVector": return '\u2960'; 5165 case "LeftDownTeeVector": return '\u2961'; 5166 case "lHar": return '\u2962'; 5167 case "uHar": return '\u2963'; 5168 case "rHar": return '\u2964'; 5169 case "dHar": return '\u2965'; 5170 case "luruhar": return '\u2966'; 5171 case "ldrdhar": return '\u2967'; 5172 case "ruluhar": return '\u2968'; 5173 case "rdldhar": return '\u2969'; 5174 case "lharul": return '\u296A'; 5175 case "llhard": return '\u296B'; 5176 case "rharul": return '\u296C'; 5177 case "lrhard": return '\u296D'; 5178 case "udhar": case "UpEquilibrium": return '\u296E'; 5179 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5180 case "RoundImplies": return '\u2970'; 5181 case "erarr": return '\u2971'; 5182 case "simrarr": return '\u2972'; 5183 case "larrsim": return '\u2973'; 5184 case "rarrsim": return '\u2974'; 5185 case "rarrap": return '\u2975'; 5186 case "ltlarr": return '\u2976'; 5187 case "gtrarr": return '\u2978'; 5188 case "subrarr": return '\u2979'; 5189 case "suplarr": return '\u297B'; 5190 case "lfisht": return '\u297C'; 5191 case "rfisht": return '\u297D'; 5192 case "ufisht": return '\u297E'; 5193 case "dfisht": return '\u297F'; 5194 case "lopar": return '\u2985'; 5195 case "ropar": return '\u2986'; 5196 case "lbrke": return '\u298B'; 5197 case "rbrke": return '\u298C'; 5198 case "lbrkslu": return '\u298D'; 5199 case "rbrksld": return '\u298E'; 5200 case "lbrksld": return '\u298F'; 5201 case "rbrkslu": return '\u2990'; 5202 case "langd": return '\u2991'; 5203 case "rangd": return '\u2992'; 5204 case "lparlt": return '\u2993'; 5205 case "rpargt": return '\u2994'; 5206 case "gtlPar": return '\u2995'; 5207 case "ltrPar": return '\u2996'; 5208 case "vzigzag": return '\u299A'; 5209 case "vangrt": return '\u299C'; 5210 case "angrtvbd": return '\u299D'; 5211 case "ange": return '\u29A4'; 5212 case "range": return '\u29A5'; 5213 case "dwangle": return '\u29A6'; 5214 case "uwangle": return '\u29A7'; 5215 case "angmsdaa": return '\u29A8'; 5216 case "angmsdab": return '\u29A9'; 5217 case "angmsdac": return '\u29AA'; 5218 case "angmsdad": return '\u29AB'; 5219 case "angmsdae": return '\u29AC'; 5220 case "angmsdaf": return '\u29AD'; 5221 case "angmsdag": return '\u29AE'; 5222 case "angmsdah": return '\u29AF'; 5223 case "bemptyv": return '\u29B0'; 5224 case "demptyv": return '\u29B1'; 5225 case "cemptyv": return '\u29B2'; 5226 case "raemptyv": return '\u29B3'; 5227 case "laemptyv": return '\u29B4'; 5228 case "ohbar": return '\u29B5'; 5229 case "omid": return '\u29B6'; 5230 case "opar": return '\u29B7'; 5231 case "operp": return '\u29B9'; 5232 case "olcross": return '\u29BB'; 5233 case "odsold": return '\u29BC'; 5234 case "olcir": return '\u29BE'; 5235 case "ofcir": return '\u29BF'; 5236 case "olt": return '\u29C0'; 5237 case "ogt": return '\u29C1'; 5238 case "cirscir": return '\u29C2'; 5239 case "cirE": return '\u29C3'; 5240 case "solb": return '\u29C4'; 5241 case "bsolb": return '\u29C5'; 5242 case "boxbox": return '\u29C9'; 5243 case "trisb": return '\u29CD'; 5244 case "rtriltri": return '\u29CE'; 5245 case "LeftTriangleBar": return '\u29CF'; 5246 case "RightTriangleBar": return '\u29D0'; 5247 case "iinfin": return '\u29DC'; 5248 case "infintie": return '\u29DD'; 5249 case "nvinfin": return '\u29DE'; 5250 case "eparsl": return '\u29E3'; 5251 case "smeparsl": return '\u29E4'; 5252 case "eqvparsl": return '\u29E5'; 5253 case "lozf": case "blacklozenge": return '\u29EB'; 5254 case "RuleDelayed": return '\u29F4'; 5255 case "dsol": return '\u29F6'; 5256 case "xodot": case "bigodot": return '\u2A00'; 5257 case "xoplus": case "bigoplus": return '\u2A01'; 5258 case "xotime": case "bigotimes": return '\u2A02'; 5259 case "xuplus": case "biguplus": return '\u2A04'; 5260 case "xsqcup": case "bigsqcup": return '\u2A06'; 5261 case "qint": case "iiiint": return '\u2A0C'; 5262 case "fpartint": return '\u2A0D'; 5263 case "cirfnint": return '\u2A10'; 5264 case "awint": return '\u2A11'; 5265 case "rppolint": return '\u2A12'; 5266 case "scpolint": return '\u2A13'; 5267 case "npolint": return '\u2A14'; 5268 case "pointint": return '\u2A15'; 5269 case "quatint": return '\u2A16'; 5270 case "intlarhk": return '\u2A17'; 5271 case "pluscir": return '\u2A22'; 5272 case "plusacir": return '\u2A23'; 5273 case "simplus": return '\u2A24'; 5274 case "plusdu": return '\u2A25'; 5275 case "plussim": return '\u2A26'; 5276 case "plustwo": return '\u2A27'; 5277 case "mcomma": return '\u2A29'; 5278 case "minusdu": return '\u2A2A'; 5279 case "loplus": return '\u2A2D'; 5280 case "roplus": return '\u2A2E'; 5281 case "Cross": return '\u2A2F'; 5282 case "timesd": return '\u2A30'; 5283 case "timesbar": return '\u2A31'; 5284 case "smashp": return '\u2A33'; 5285 case "lotimes": return '\u2A34'; 5286 case "rotimes": return '\u2A35'; 5287 case "otimesas": return '\u2A36'; 5288 case "Otimes": return '\u2A37'; 5289 case "odiv": return '\u2A38'; 5290 case "triplus": return '\u2A39'; 5291 case "triminus": return '\u2A3A'; 5292 case "tritime": return '\u2A3B'; 5293 case "iprod": case "intprod": return '\u2A3C'; 5294 case "amalg": return '\u2A3F'; 5295 case "capdot": return '\u2A40'; 5296 case "ncup": return '\u2A42'; 5297 case "ncap": return '\u2A43'; 5298 case "capand": return '\u2A44'; 5299 case "cupor": return '\u2A45'; 5300 case "cupcap": return '\u2A46'; 5301 case "capcup": return '\u2A47'; 5302 case "cupbrcap": return '\u2A48'; 5303 case "capbrcup": return '\u2A49'; 5304 case "cupcup": return '\u2A4A'; 5305 case "capcap": return '\u2A4B'; 5306 case "ccups": return '\u2A4C'; 5307 case "ccaps": return '\u2A4D'; 5308 case "ccupssm": return '\u2A50'; 5309 case "And": return '\u2A53'; 5310 case "Or": return '\u2A54'; 5311 case "andand": return '\u2A55'; 5312 case "oror": return '\u2A56'; 5313 case "orslope": return '\u2A57'; 5314 case "andslope": return '\u2A58'; 5315 case "andv": return '\u2A5A'; 5316 case "orv": return '\u2A5B'; 5317 case "andd": return '\u2A5C'; 5318 case "ord": return '\u2A5D'; 5319 case "wedbar": return '\u2A5F'; 5320 case "sdote": return '\u2A66'; 5321 case "simdot": return '\u2A6A'; 5322 case "congdot": return '\u2A6D'; 5323 case "easter": return '\u2A6E'; 5324 case "apacir": return '\u2A6F'; 5325 case "apE": return '\u2A70'; 5326 case "eplus": return '\u2A71'; 5327 case "pluse": return '\u2A72'; 5328 case "Esim": return '\u2A73'; 5329 case "Colone": return '\u2A74'; 5330 case "Equal": return '\u2A75'; 5331 case "eDDot": case "ddotseq": return '\u2A77'; 5332 case "equivDD": return '\u2A78'; 5333 case "ltcir": return '\u2A79'; 5334 case "gtcir": return '\u2A7A'; 5335 case "ltquest": return '\u2A7B'; 5336 case "gtquest": return '\u2A7C'; 5337 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5338 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5339 case "lesdot": return '\u2A7F'; 5340 case "gesdot": return '\u2A80'; 5341 case "lesdoto": return '\u2A81'; 5342 case "gesdoto": return '\u2A82'; 5343 case "lesdotor": return '\u2A83'; 5344 case "gesdotol": return '\u2A84'; 5345 case "lap": case "lessapprox": return '\u2A85'; 5346 case "gap": case "gtrapprox": return '\u2A86'; 5347 case "lne": case "lneq": return '\u2A87'; 5348 case "gne": case "gneq": return '\u2A88'; 5349 case "lnap": case "lnapprox": return '\u2A89'; 5350 case "gnap": case "gnapprox": return '\u2A8A'; 5351 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5352 case "gEl": case "gtreqqless": return '\u2A8C'; 5353 case "lsime": return '\u2A8D'; 5354 case "gsime": return '\u2A8E'; 5355 case "lsimg": return '\u2A8F'; 5356 case "gsiml": return '\u2A90'; 5357 case "lgE": return '\u2A91'; 5358 case "glE": return '\u2A92'; 5359 case "lesges": return '\u2A93'; 5360 case "gesles": return '\u2A94'; 5361 case "els": case "eqslantless": return '\u2A95'; 5362 case "egs": case "eqslantgtr": return '\u2A96'; 5363 case "elsdot": return '\u2A97'; 5364 case "egsdot": return '\u2A98'; 5365 case "el": return '\u2A99'; 5366 case "eg": return '\u2A9A'; 5367 case "siml": return '\u2A9D'; 5368 case "simg": return '\u2A9E'; 5369 case "simlE": return '\u2A9F'; 5370 case "simgE": return '\u2AA0'; 5371 case "LessLess": return '\u2AA1'; 5372 case "GreaterGreater": return '\u2AA2'; 5373 case "glj": return '\u2AA4'; 5374 case "gla": return '\u2AA5'; 5375 case "ltcc": return '\u2AA6'; 5376 case "gtcc": return '\u2AA7'; 5377 case "lescc": return '\u2AA8'; 5378 case "gescc": return '\u2AA9'; 5379 case "smt": return '\u2AAA'; 5380 case "lat": return '\u2AAB'; 5381 case "smte": return '\u2AAC'; 5382 case "late": return '\u2AAD'; 5383 case "bumpE": return '\u2AAE'; 5384 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5385 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5386 case "prE": return '\u2AB3'; 5387 case "scE": return '\u2AB4'; 5388 case "prnE": case "precneqq": return '\u2AB5'; 5389 case "scnE": case "succneqq": return '\u2AB6'; 5390 case "prap": case "precapprox": return '\u2AB7'; 5391 case "scap": case "succapprox": return '\u2AB8'; 5392 case "prnap": case "precnapprox": return '\u2AB9'; 5393 case "scnap": case "succnapprox": return '\u2ABA'; 5394 case "Pr": return '\u2ABB'; 5395 case "Sc": return '\u2ABC'; 5396 case "subdot": return '\u2ABD'; 5397 case "supdot": return '\u2ABE'; 5398 case "subplus": return '\u2ABF'; 5399 case "supplus": return '\u2AC0'; 5400 case "submult": return '\u2AC1'; 5401 case "supmult": return '\u2AC2'; 5402 case "subedot": return '\u2AC3'; 5403 case "supedot": return '\u2AC4'; 5404 case "subE": case "subseteqq": return '\u2AC5'; 5405 case "supE": case "supseteqq": return '\u2AC6'; 5406 case "subsim": return '\u2AC7'; 5407 case "supsim": return '\u2AC8'; 5408 case "subnE": case "subsetneqq": return '\u2ACB'; 5409 case "supnE": case "supsetneqq": return '\u2ACC'; 5410 case "csub": return '\u2ACF'; 5411 case "csup": return '\u2AD0'; 5412 case "csube": return '\u2AD1'; 5413 case "csupe": return '\u2AD2'; 5414 case "subsup": return '\u2AD3'; 5415 case "supsub": return '\u2AD4'; 5416 case "subsub": return '\u2AD5'; 5417 case "supsup": return '\u2AD6'; 5418 case "suphsub": return '\u2AD7'; 5419 case "supdsub": return '\u2AD8'; 5420 case "forkv": return '\u2AD9'; 5421 case "topfork": return '\u2ADA'; 5422 case "mlcp": return '\u2ADB'; 5423 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5424 case "Vdashl": return '\u2AE6'; 5425 case "Barv": return '\u2AE7'; 5426 case "vBar": return '\u2AE8'; 5427 case "vBarv": return '\u2AE9'; 5428 case "Vbar": return '\u2AEB'; 5429 case "Not": return '\u2AEC'; 5430 case "bNot": return '\u2AED'; 5431 case "rnmid": return '\u2AEE'; 5432 case "cirmid": return '\u2AEF'; 5433 case "midcir": return '\u2AF0'; 5434 case "topcir": return '\u2AF1'; 5435 case "nhpar": return '\u2AF2'; 5436 case "parsim": return '\u2AF3'; 5437 case "parsl": return '\u2AFD'; 5438 case "fflig": return '\uFB00'; 5439 case "filig": return '\uFB01'; 5440 case "fllig": return '\uFB02'; 5441 case "ffilig": return '\uFB03'; 5442 case "ffllig": return '\uFB04'; 5443 case "Ascr": return '\U0001D49C'; 5444 case "Cscr": return '\U0001D49E'; 5445 case "Dscr": return '\U0001D49F'; 5446 case "Gscr": return '\U0001D4A2'; 5447 case "Jscr": return '\U0001D4A5'; 5448 case "Kscr": return '\U0001D4A6'; 5449 case "Nscr": return '\U0001D4A9'; 5450 case "Oscr": return '\U0001D4AA'; 5451 case "Pscr": return '\U0001D4AB'; 5452 case "Qscr": return '\U0001D4AC'; 5453 case "Sscr": return '\U0001D4AE'; 5454 case "Tscr": return '\U0001D4AF'; 5455 case "Uscr": return '\U0001D4B0'; 5456 case "Vscr": return '\U0001D4B1'; 5457 case "Wscr": return '\U0001D4B2'; 5458 case "Xscr": return '\U0001D4B3'; 5459 case "Yscr": return '\U0001D4B4'; 5460 case "Zscr": return '\U0001D4B5'; 5461 case "ascr": return '\U0001D4B6'; 5462 case "bscr": return '\U0001D4B7'; 5463 case "cscr": return '\U0001D4B8'; 5464 case "dscr": return '\U0001D4B9'; 5465 case "fscr": return '\U0001D4BB'; 5466 case "hscr": return '\U0001D4BD'; 5467 case "iscr": return '\U0001D4BE'; 5468 case "jscr": return '\U0001D4BF'; 5469 case "kscr": return '\U0001D4C0'; 5470 case "lscr": return '\U0001D4C1'; 5471 case "mscr": return '\U0001D4C2'; 5472 case "nscr": return '\U0001D4C3'; 5473 case "pscr": return '\U0001D4C5'; 5474 case "qscr": return '\U0001D4C6'; 5475 case "rscr": return '\U0001D4C7'; 5476 case "sscr": return '\U0001D4C8'; 5477 case "tscr": return '\U0001D4C9'; 5478 case "uscr": return '\U0001D4CA'; 5479 case "vscr": return '\U0001D4CB'; 5480 case "wscr": return '\U0001D4CC'; 5481 case "xscr": return '\U0001D4CD'; 5482 case "yscr": return '\U0001D4CE'; 5483 case "zscr": return '\U0001D4CF'; 5484 case "Afr": return '\U0001D504'; 5485 case "Bfr": return '\U0001D505'; 5486 case "Dfr": return '\U0001D507'; 5487 case "Efr": return '\U0001D508'; 5488 case "Ffr": return '\U0001D509'; 5489 case "Gfr": return '\U0001D50A'; 5490 case "Jfr": return '\U0001D50D'; 5491 case "Kfr": return '\U0001D50E'; 5492 case "Lfr": return '\U0001D50F'; 5493 case "Mfr": return '\U0001D510'; 5494 case "Nfr": return '\U0001D511'; 5495 case "Ofr": return '\U0001D512'; 5496 case "Pfr": return '\U0001D513'; 5497 case "Qfr": return '\U0001D514'; 5498 case "Sfr": return '\U0001D516'; 5499 case "Tfr": return '\U0001D517'; 5500 case "Ufr": return '\U0001D518'; 5501 case "Vfr": return '\U0001D519'; 5502 case "Wfr": return '\U0001D51A'; 5503 case "Xfr": return '\U0001D51B'; 5504 case "Yfr": return '\U0001D51C'; 5505 case "afr": return '\U0001D51E'; 5506 case "bfr": return '\U0001D51F'; 5507 case "cfr": return '\U0001D520'; 5508 case "dfr": return '\U0001D521'; 5509 case "efr": return '\U0001D522'; 5510 case "ffr": return '\U0001D523'; 5511 case "gfr": return '\U0001D524'; 5512 case "hfr": return '\U0001D525'; 5513 case "ifr": return '\U0001D526'; 5514 case "jfr": return '\U0001D527'; 5515 case "kfr": return '\U0001D528'; 5516 case "lfr": return '\U0001D529'; 5517 case "mfr": return '\U0001D52A'; 5518 case "nfr": return '\U0001D52B'; 5519 case "ofr": return '\U0001D52C'; 5520 case "pfr": return '\U0001D52D'; 5521 case "qfr": return '\U0001D52E'; 5522 case "rfr": return '\U0001D52F'; 5523 case "sfr": return '\U0001D530'; 5524 case "tfr": return '\U0001D531'; 5525 case "ufr": return '\U0001D532'; 5526 case "vfr": return '\U0001D533'; 5527 case "wfr": return '\U0001D534'; 5528 case "xfr": return '\U0001D535'; 5529 case "yfr": return '\U0001D536'; 5530 case "zfr": return '\U0001D537'; 5531 case "Aopf": return '\U0001D538'; 5532 case "Bopf": return '\U0001D539'; 5533 case "Dopf": return '\U0001D53B'; 5534 case "Eopf": return '\U0001D53C'; 5535 case "Fopf": return '\U0001D53D'; 5536 case "Gopf": return '\U0001D53E'; 5537 case "Iopf": return '\U0001D540'; 5538 case "Jopf": return '\U0001D541'; 5539 case "Kopf": return '\U0001D542'; 5540 case "Lopf": return '\U0001D543'; 5541 case "Mopf": return '\U0001D544'; 5542 case "Oopf": return '\U0001D546'; 5543 case "Sopf": return '\U0001D54A'; 5544 case "Topf": return '\U0001D54B'; 5545 case "Uopf": return '\U0001D54C'; 5546 case "Vopf": return '\U0001D54D'; 5547 case "Wopf": return '\U0001D54E'; 5548 case "Xopf": return '\U0001D54F'; 5549 case "Yopf": return '\U0001D550'; 5550 case "aopf": return '\U0001D552'; 5551 case "bopf": return '\U0001D553'; 5552 case "copf": return '\U0001D554'; 5553 case "dopf": return '\U0001D555'; 5554 case "eopf": return '\U0001D556'; 5555 case "fopf": return '\U0001D557'; 5556 case "gopf": return '\U0001D558'; 5557 case "hopf": return '\U0001D559'; 5558 case "iopf": return '\U0001D55A'; 5559 case "jopf": return '\U0001D55B'; 5560 case "kopf": return '\U0001D55C'; 5561 case "lopf": return '\U0001D55D'; 5562 case "mopf": return '\U0001D55E'; 5563 case "nopf": return '\U0001D55F'; 5564 case "oopf": return '\U0001D560'; 5565 case "popf": return '\U0001D561'; 5566 case "qopf": return '\U0001D562'; 5567 case "ropf": return '\U0001D563'; 5568 case "sopf": return '\U0001D564'; 5569 case "topf": return '\U0001D565'; 5570 case "uopf": return '\U0001D566'; 5571 case "vopf": return '\U0001D567'; 5572 case "wopf": return '\U0001D568'; 5573 case "xopf": return '\U0001D569'; 5574 case "yopf": return '\U0001D56A'; 5575 case "zopf": return '\U0001D56B'; 5576 5577 // and handling numeric entities 5578 default: 5579 if(entity[1] == '#') { 5580 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5581 auto hex = entity[3..$-1]; 5582 5583 auto p = intFromHex(to!string(hex).toLower()); 5584 return cast(dchar) p; 5585 } else { 5586 auto decimal = entity[2..$-1]; 5587 5588 // dealing with broken html entities 5589 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5590 decimal = decimal[1 .. $]; 5591 5592 if(decimal.length == 0) 5593 return ' '; // this is really broken html 5594 // done with dealing with broken stuff 5595 5596 auto p = std.conv.to!int(decimal); 5597 return cast(dchar) p; 5598 } 5599 } else 5600 return '\ufffd'; // replacement character diamond thing 5601 } 5602 5603 assert(0); 5604 } 5605 5606 import std.utf; 5607 import std.stdio; 5608 5609 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5610 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5611 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5612 /// Group: core_functionality 5613 string htmlEntitiesDecode(string data, bool strict = false) { 5614 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5615 if(data.indexOf("&") == -1) // all html entities begin with & 5616 return data; // if there are no entities in here, we can return the original slice and save some time 5617 5618 char[] a; // this seems to do a *better* job than appender! 5619 5620 char[4] buffer; 5621 5622 bool tryingEntity = false; 5623 dchar[16] entityBeingTried; 5624 int entityBeingTriedLength = 0; 5625 int entityAttemptIndex = 0; 5626 5627 foreach(dchar ch; data) { 5628 if(tryingEntity) { 5629 entityAttemptIndex++; 5630 entityBeingTried[entityBeingTriedLength++] = ch; 5631 5632 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5633 if(ch == '&') { 5634 if(strict) 5635 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5636 5637 // if not strict, let's try to parse both. 5638 5639 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5640 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5641 else 5642 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5643 5644 // tryingEntity is still true 5645 entityBeingTriedLength = 1; 5646 entityAttemptIndex = 0; // restarting o this 5647 } else 5648 if(ch == ';') { 5649 tryingEntity = false; 5650 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5651 } else if(ch == ' ') { 5652 // e.g. you & i 5653 if(strict) 5654 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5655 else { 5656 tryingEntity = false; 5657 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5658 } 5659 } else { 5660 if(entityAttemptIndex >= 9) { 5661 if(strict) 5662 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5663 else { 5664 tryingEntity = false; 5665 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5666 } 5667 } 5668 } 5669 } else { 5670 if(ch == '&') { 5671 tryingEntity = true; 5672 entityBeingTriedLength = 0; 5673 entityBeingTried[entityBeingTriedLength++] = ch; 5674 entityAttemptIndex = 0; 5675 } else { 5676 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5677 } 5678 } 5679 } 5680 5681 if(tryingEntity) { 5682 if(strict) 5683 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5684 5685 // otherwise, let's try to recover, at least so we don't drop any data 5686 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5687 // FIXME: what if we have "cool &"? should we try to parse it? 5688 } 5689 5690 return cast(string) a; // assumeUnique is actually kinda slow, lol 5691 } 5692 5693 /// Group: implementations 5694 abstract class SpecialElement : Element { 5695 this(Document _parentDocument) { 5696 super(_parentDocument); 5697 } 5698 5699 ///. 5700 override Element appendChild(Element e) { 5701 assert(0, "Cannot append to a special node"); 5702 } 5703 5704 ///. 5705 @property override int nodeType() const { 5706 return 100; 5707 } 5708 } 5709 5710 ///. 5711 /// Group: implementations 5712 class RawSource : SpecialElement { 5713 ///. 5714 this(Document _parentDocument, string s) { 5715 super(_parentDocument); 5716 source = s; 5717 tagName = "#raw"; 5718 } 5719 5720 ///. 5721 override string nodeValue() const { 5722 return this.toString(); 5723 } 5724 5725 ///. 5726 override string writeToAppender(Appender!string where = appender!string()) const { 5727 where.put(source); 5728 return source; 5729 } 5730 5731 override string toPrettyString(bool, int, string) const { 5732 return source; 5733 } 5734 5735 5736 override RawSource cloneNode(bool deep) { 5737 return new RawSource(parentDocument, source); 5738 } 5739 5740 ///. 5741 string source; 5742 } 5743 5744 /// Group: implementations 5745 abstract class ServerSideCode : SpecialElement { 5746 this(Document _parentDocument, string type) { 5747 super(_parentDocument); 5748 tagName = "#" ~ type; 5749 } 5750 5751 ///. 5752 override string nodeValue() const { 5753 return this.source; 5754 } 5755 5756 ///. 5757 override string writeToAppender(Appender!string where = appender!string()) const { 5758 auto start = where.data.length; 5759 where.put("<"); 5760 where.put(source); 5761 where.put(">"); 5762 return where.data[start .. $]; 5763 } 5764 5765 override string toPrettyString(bool, int, string) const { 5766 return "<" ~ source ~ ">"; 5767 } 5768 5769 ///. 5770 string source; 5771 } 5772 5773 ///. 5774 /// Group: implementations 5775 class PhpCode : ServerSideCode { 5776 ///. 5777 this(Document _parentDocument, string s) { 5778 super(_parentDocument, "php"); 5779 source = s; 5780 } 5781 5782 override PhpCode cloneNode(bool deep) { 5783 return new PhpCode(parentDocument, source); 5784 } 5785 } 5786 5787 ///. 5788 /// Group: implementations 5789 class AspCode : ServerSideCode { 5790 ///. 5791 this(Document _parentDocument, string s) { 5792 super(_parentDocument, "asp"); 5793 source = s; 5794 } 5795 5796 override AspCode cloneNode(bool deep) { 5797 return new AspCode(parentDocument, source); 5798 } 5799 } 5800 5801 ///. 5802 /// Group: implementations 5803 class BangInstruction : SpecialElement { 5804 ///. 5805 this(Document _parentDocument, string s) { 5806 super(_parentDocument); 5807 source = s; 5808 tagName = "#bpi"; 5809 } 5810 5811 ///. 5812 override string nodeValue() const { 5813 return this.source; 5814 } 5815 5816 override BangInstruction cloneNode(bool deep) { 5817 return new BangInstruction(parentDocument, source); 5818 } 5819 5820 ///. 5821 override string writeToAppender(Appender!string where = appender!string()) const { 5822 auto start = where.data.length; 5823 where.put("<!"); 5824 where.put(source); 5825 where.put(">"); 5826 return where.data[start .. $]; 5827 } 5828 5829 override string toPrettyString(bool, int, string) const { 5830 string s; 5831 s ~= "<!"; 5832 s ~= source; 5833 s ~= ">"; 5834 return s; 5835 } 5836 5837 ///. 5838 string source; 5839 } 5840 5841 ///. 5842 /// Group: implementations 5843 class QuestionInstruction : SpecialElement { 5844 ///. 5845 this(Document _parentDocument, string s) { 5846 super(_parentDocument); 5847 source = s; 5848 tagName = "#qpi"; 5849 } 5850 5851 override QuestionInstruction cloneNode(bool deep) { 5852 return new QuestionInstruction(parentDocument, source); 5853 } 5854 5855 ///. 5856 override string nodeValue() const { 5857 return this.source; 5858 } 5859 5860 ///. 5861 override string writeToAppender(Appender!string where = appender!string()) const { 5862 auto start = where.data.length; 5863 where.put("<"); 5864 where.put(source); 5865 where.put(">"); 5866 return where.data[start .. $]; 5867 } 5868 5869 override string toPrettyString(bool, int, string) const { 5870 string s; 5871 s ~= "<"; 5872 s ~= source; 5873 s ~= ">"; 5874 return s; 5875 } 5876 5877 5878 ///. 5879 string source; 5880 } 5881 5882 ///. 5883 /// Group: implementations 5884 class HtmlComment : SpecialElement { 5885 ///. 5886 this(Document _parentDocument, string s) { 5887 super(_parentDocument); 5888 source = s; 5889 tagName = "#comment"; 5890 } 5891 5892 override HtmlComment cloneNode(bool deep) { 5893 return new HtmlComment(parentDocument, source); 5894 } 5895 5896 ///. 5897 override string nodeValue() const { 5898 return this.source; 5899 } 5900 5901 ///. 5902 override string writeToAppender(Appender!string where = appender!string()) const { 5903 auto start = where.data.length; 5904 where.put("<!--"); 5905 where.put(source); 5906 where.put("-->"); 5907 return where.data[start .. $]; 5908 } 5909 5910 override string toPrettyString(bool, int, string) const { 5911 string s; 5912 s ~= "<!--"; 5913 s ~= source; 5914 s ~= "-->"; 5915 return s; 5916 } 5917 5918 5919 ///. 5920 string source; 5921 } 5922 5923 5924 5925 5926 ///. 5927 /// Group: implementations 5928 class TextNode : Element { 5929 public: 5930 ///. 5931 this(Document _parentDocument, string e) { 5932 super(_parentDocument); 5933 contents = e; 5934 tagName = "#text"; 5935 } 5936 5937 /// 5938 this(string e) { 5939 this(null, e); 5940 } 5941 5942 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5943 5944 ///. 5945 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5946 auto e = new TextNode(_parentDocument, ""); 5947 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5948 return e; 5949 } 5950 5951 ///. 5952 override @property TextNode cloneNode(bool deep) { 5953 auto n = new TextNode(parentDocument, contents); 5954 return n; 5955 } 5956 5957 ///. 5958 override string nodeValue() const { 5959 return this.contents; //toString(); 5960 } 5961 5962 ///. 5963 @property override int nodeType() const { 5964 return NodeType.Text; 5965 } 5966 5967 ///. 5968 override string writeToAppender(Appender!string where = appender!string()) const { 5969 string s; 5970 if(contents.length) 5971 s = htmlEntitiesEncode(contents, where); 5972 else 5973 s = ""; 5974 5975 assert(s !is null); 5976 return s; 5977 } 5978 5979 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5980 string s; 5981 5982 string contents = this.contents; 5983 // we will first collapse the whitespace per html 5984 // sort of. note this can break stuff yo!!!! 5985 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5986 string n = ""; 5987 bool lastWasWhitespace = indentationLevel > 0; 5988 foreach(char c; contents) { 5989 if(c.isSimpleWhite) { 5990 if(!lastWasWhitespace) 5991 n ~= ' '; 5992 lastWasWhitespace = true; 5993 } else { 5994 n ~= c; 5995 lastWasWhitespace = false; 5996 } 5997 } 5998 5999 contents = n; 6000 } 6001 6002 if(this.parentNode !is null && this.parentNode.tagName != "p") { 6003 contents = contents.strip; 6004 } 6005 6006 auto e = htmlEntitiesEncode(contents); 6007 import std.algorithm.iteration : splitter; 6008 bool first = true; 6009 foreach(line; splitter(e, "\n")) { 6010 if(first) { 6011 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 6012 first = false; 6013 } else { 6014 s ~= "\n"; 6015 if(insertComments) 6016 s ~= "<!--"; 6017 foreach(i; 0 .. indentationLevel) 6018 s ~= "\t"; 6019 if(insertComments) 6020 s ~= "-->"; 6021 } 6022 s ~= line.stripRight; 6023 } 6024 return s; 6025 } 6026 6027 ///. 6028 override Element appendChild(Element e) { 6029 assert(0, "Cannot append to a text node"); 6030 } 6031 6032 ///. 6033 string contents; 6034 // alias contents content; // I just mistype this a lot, 6035 } 6036 6037 /** 6038 There are subclasses of Element offering improved helper 6039 functions for the element in HTML. 6040 */ 6041 6042 ///. 6043 /// Group: implementations 6044 class Link : Element { 6045 6046 ///. 6047 this(Document _parentDocument) { 6048 super(_parentDocument); 6049 this.tagName = "a"; 6050 } 6051 6052 6053 ///. 6054 this(string href, string text) { 6055 super("a"); 6056 setAttribute("href", href); 6057 innerText = text; 6058 } 6059 /+ 6060 /// Returns everything in the href EXCEPT the query string 6061 @property string targetSansQuery() { 6062 6063 } 6064 6065 ///. 6066 @property string domainName() { 6067 6068 } 6069 6070 ///. 6071 @property string path 6072 +/ 6073 /// This gets a variable from the URL's query string. 6074 string getValue(string name) { 6075 auto vars = variablesHash(); 6076 if(name in vars) 6077 return vars[name]; 6078 return null; 6079 } 6080 6081 private string[string] variablesHash() { 6082 string href = getAttribute("href"); 6083 if(href is null) 6084 return null; 6085 6086 auto ques = href.indexOf("?"); 6087 string str = ""; 6088 if(ques != -1) { 6089 str = href[ques+1..$]; 6090 6091 auto fragment = str.indexOf("#"); 6092 if(fragment != -1) 6093 str = str[0..fragment]; 6094 } 6095 6096 string[] variables = str.split("&"); 6097 6098 string[string] hash; 6099 6100 foreach(var; variables) { 6101 auto index = var.indexOf("="); 6102 if(index == -1) 6103 hash[var] = ""; 6104 else { 6105 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6106 } 6107 } 6108 6109 return hash; 6110 } 6111 6112 ///. 6113 /*private*/ void updateQueryString(string[string] vars) { 6114 string href = getAttribute("href"); 6115 6116 auto question = href.indexOf("?"); 6117 if(question != -1) 6118 href = href[0..question]; 6119 6120 string frag = ""; 6121 auto fragment = href.indexOf("#"); 6122 if(fragment != -1) { 6123 frag = href[fragment..$]; 6124 href = href[0..fragment]; 6125 } 6126 6127 string query = "?"; 6128 bool first = true; 6129 foreach(name, value; vars) { 6130 if(!first) 6131 query ~= "&"; 6132 else 6133 first = false; 6134 6135 query ~= encodeComponent(name); 6136 if(value.length) 6137 query ~= "=" ~ encodeComponent(value); 6138 } 6139 6140 if(query != "?") 6141 href ~= query; 6142 6143 href ~= frag; 6144 6145 setAttribute("href", href); 6146 } 6147 6148 /// Sets or adds the variable with the given name to the given value 6149 /// It automatically URI encodes the values and takes care of the ? and &. 6150 override void setValue(string name, string variable) { 6151 auto vars = variablesHash(); 6152 vars[name] = variable; 6153 6154 updateQueryString(vars); 6155 } 6156 6157 /// Removes the given variable from the query string 6158 void removeValue(string name) { 6159 auto vars = variablesHash(); 6160 vars.remove(name); 6161 6162 updateQueryString(vars); 6163 } 6164 6165 /* 6166 ///. 6167 override string toString() { 6168 6169 } 6170 6171 ///. 6172 override string getAttribute(string name) { 6173 if(name == "href") { 6174 6175 } else 6176 return super.getAttribute(name); 6177 } 6178 */ 6179 } 6180 6181 ///. 6182 /// Group: implementations 6183 class Form : Element { 6184 6185 ///. 6186 this(Document _parentDocument) { 6187 super(_parentDocument); 6188 tagName = "form"; 6189 } 6190 6191 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6192 auto t = this.querySelector("fieldset div"); 6193 if(t is null) 6194 return super.addField(label, name, type, fieldOptions); 6195 else 6196 return t.addField(label, name, type, fieldOptions); 6197 } 6198 6199 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6200 auto type = "text"; 6201 auto t = this.querySelector("fieldset div"); 6202 if(t is null) 6203 return super.addField(label, name, type, fieldOptions); 6204 else 6205 return t.addField(label, name, type, fieldOptions); 6206 } 6207 6208 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6209 auto t = this.querySelector("fieldset div"); 6210 if(t is null) 6211 return super.addField(label, name, options, fieldOptions); 6212 else 6213 return t.addField(label, name, options, fieldOptions); 6214 } 6215 6216 override void setValue(string field, string value) { 6217 setValue(field, value, true); 6218 } 6219 6220 // FIXME: doesn't handle arrays; multiple fields can have the same name 6221 6222 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6223 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6224 /// the checked/selected attribute from all, and adds it to the one matching the value. 6225 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6226 6227 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6228 /// Otherwise, it makes a new input with type=hidden to keep the value. 6229 void setValue(string field, string value, bool makeNew) { 6230 auto eles = getField(field); 6231 if(eles.length == 0) { 6232 if(makeNew) { 6233 addInput(field, value); 6234 return; 6235 } else 6236 throw new Exception("form field does not exist"); 6237 } 6238 6239 if(eles.length == 1) { 6240 auto e = eles[0]; 6241 switch(e.tagName) { 6242 default: assert(0); 6243 case "textarea": 6244 e.innerText = value; 6245 break; 6246 case "input": 6247 string type = e.getAttribute("type"); 6248 if(type is null) { 6249 e.value = value; 6250 return; 6251 } 6252 switch(type) { 6253 case "checkbox": 6254 case "radio": 6255 if(value.length && value != "false") 6256 e.setAttribute("checked", "checked"); 6257 else 6258 e.removeAttribute("checked"); 6259 break; 6260 default: 6261 e.value = value; 6262 return; 6263 } 6264 break; 6265 case "select": 6266 bool found = false; 6267 foreach(child; e.tree) { 6268 if(child.tagName != "option") 6269 continue; 6270 string val = child.getAttribute("value"); 6271 if(val is null) 6272 val = child.innerText; 6273 if(val == value) { 6274 child.setAttribute("selected", "selected"); 6275 found = true; 6276 } else 6277 child.removeAttribute("selected"); 6278 } 6279 6280 if(!found) { 6281 e.addChild("option", value) 6282 .setAttribute("selected", "selected"); 6283 } 6284 break; 6285 } 6286 } else { 6287 // assume radio boxes 6288 foreach(e; eles) { 6289 string val = e.getAttribute("value"); 6290 //if(val is null) 6291 // throw new Exception("don't know what to do with radio boxes with null value"); 6292 if(val == value) 6293 e.setAttribute("checked", "checked"); 6294 else 6295 e.removeAttribute("checked"); 6296 } 6297 } 6298 } 6299 6300 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6301 /// it makes no attempt to find and modify existing elements in the form to the new values. 6302 void addValueArray(string key, string[] arrayOfValues) { 6303 foreach(arr; arrayOfValues) 6304 addChild("input", key, arr); 6305 } 6306 6307 /// Gets the value of the field; what would be given if it submitted right now. (so 6308 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6309 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6310 string getValue(string field) { 6311 auto eles = getField(field); 6312 if(eles.length == 0) 6313 return ""; 6314 if(eles.length == 1) { 6315 auto e = eles[0]; 6316 switch(e.tagName) { 6317 default: assert(0); 6318 case "input": 6319 if(e.type == "checkbox") { 6320 if(e.checked) 6321 return e.value.length ? e.value : "checked"; 6322 return ""; 6323 } else 6324 return e.value; 6325 case "textarea": 6326 return e.innerText; 6327 case "select": 6328 foreach(child; e.tree) { 6329 if(child.tagName != "option") 6330 continue; 6331 if(child.selected) 6332 return child.value; 6333 } 6334 break; 6335 } 6336 } else { 6337 // assuming radio 6338 foreach(e; eles) { 6339 if(e.checked) 6340 return e.value; 6341 } 6342 } 6343 6344 return ""; 6345 } 6346 6347 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6348 ///. 6349 string getPostableData() { 6350 bool[string] namesDone; 6351 6352 string ret; 6353 bool outputted = false; 6354 6355 foreach(e; getElementsBySelector("[name]")) { 6356 if(e.name in namesDone) 6357 continue; 6358 6359 if(outputted) 6360 ret ~= "&"; 6361 else 6362 outputted = true; 6363 6364 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6365 6366 namesDone[e.name] = true; 6367 } 6368 6369 return ret; 6370 } 6371 6372 /// Gets the actual elements with the given name 6373 Element[] getField(string name) { 6374 Element[] ret; 6375 foreach(e; tree) { 6376 if(e.name == name) 6377 ret ~= e; 6378 } 6379 return ret; 6380 } 6381 6382 /// Grabs the <label> with the given for tag, if there is one. 6383 Element getLabel(string forId) { 6384 foreach(e; tree) 6385 if(e.tagName == "label" && e.getAttribute("for") == forId) 6386 return e; 6387 return null; 6388 } 6389 6390 /// Adds a new INPUT field to the end of the form with the given attributes. 6391 Element addInput(string name, string value, string type = "hidden") { 6392 auto e = new Element(parentDocument, "input", null, true); 6393 e.name = name; 6394 e.value = value; 6395 e.type = type; 6396 6397 appendChild(e); 6398 6399 return e; 6400 } 6401 6402 /// Removes the given field from the form. It finds the element and knocks it right out. 6403 void removeField(string name) { 6404 foreach(e; getField(name)) 6405 e.parentNode.removeChild(e); 6406 } 6407 6408 /+ 6409 /// Returns all form members. 6410 @property Element[] elements() { 6411 6412 } 6413 6414 ///. 6415 string opDispatch(string name)(string v = null) 6416 // filter things that should actually be attributes on the form 6417 if( name != "method" && name != "action" && name != "enctype" 6418 && name != "style" && name != "name" && name != "id" && name != "class") 6419 { 6420 6421 } 6422 +/ 6423 /+ 6424 void submit() { 6425 // take its elements and submit them through http 6426 } 6427 +/ 6428 } 6429 6430 import std.conv; 6431 6432 ///. 6433 /// Group: implementations 6434 class Table : Element { 6435 6436 ///. 6437 this(Document _parentDocument) { 6438 super(_parentDocument); 6439 tagName = "table"; 6440 } 6441 6442 /// Creates an element with the given type and content. 6443 Element th(T)(T t) { 6444 Element e; 6445 if(parentDocument !is null) 6446 e = parentDocument.createElement("th"); 6447 else 6448 e = Element.make("th"); 6449 static if(is(T == Html)) 6450 e.innerHTML = t; 6451 else 6452 e.innerText = to!string(t); 6453 return e; 6454 } 6455 6456 /// ditto 6457 Element td(T)(T t) { 6458 Element e; 6459 if(parentDocument !is null) 6460 e = parentDocument.createElement("td"); 6461 else 6462 e = Element.make("td"); 6463 static if(is(T == Html)) 6464 e.innerHTML = t; 6465 else 6466 e.innerText = to!string(t); 6467 return e; 6468 } 6469 6470 /// . 6471 Element appendHeaderRow(T...)(T t) { 6472 return appendRowInternal("th", "thead", t); 6473 } 6474 6475 /// . 6476 Element appendFooterRow(T...)(T t) { 6477 return appendRowInternal("td", "tfoot", t); 6478 } 6479 6480 /// . 6481 Element appendRow(T...)(T t) { 6482 return appendRowInternal("td", "tbody", t); 6483 } 6484 6485 void addColumnClasses(string[] classes...) { 6486 auto grid = getGrid(); 6487 foreach(row; grid) 6488 foreach(i, cl; classes) { 6489 if(cl.length) 6490 if(i < row.length) 6491 row[i].addClass(cl); 6492 } 6493 } 6494 6495 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6496 Element row = Element.make("tr"); 6497 6498 foreach(e; t) { 6499 static if(is(typeof(e) : Element)) { 6500 if(e.tagName == "td" || e.tagName == "th") 6501 row.appendChild(e); 6502 else { 6503 Element a = Element.make(innerType); 6504 6505 a.appendChild(e); 6506 6507 row.appendChild(a); 6508 } 6509 } else static if(is(typeof(e) == Html)) { 6510 Element a = Element.make(innerType); 6511 a.innerHTML = e.source; 6512 row.appendChild(a); 6513 } else static if(is(typeof(e) == Element[])) { 6514 Element a = Element.make(innerType); 6515 foreach(ele; e) 6516 a.appendChild(ele); 6517 row.appendChild(a); 6518 } else static if(is(typeof(e) == string[])) { 6519 foreach(ele; e) { 6520 Element a = Element.make(innerType); 6521 a.innerText = to!string(ele); 6522 row.appendChild(a); 6523 } 6524 } else { 6525 Element a = Element.make(innerType); 6526 a.innerText = to!string(e); 6527 row.appendChild(a); 6528 } 6529 } 6530 6531 foreach(e; children) { 6532 if(e.tagName == findType) { 6533 e.appendChild(row); 6534 return row; 6535 } 6536 } 6537 6538 // the type was not found if we are here... let's add it so it is well-formed 6539 auto lol = this.addChild(findType); 6540 lol.appendChild(row); 6541 6542 return row; 6543 } 6544 6545 ///. 6546 Element captionElement() { 6547 Element cap; 6548 foreach(c; children) { 6549 if(c.tagName == "caption") { 6550 cap = c; 6551 break; 6552 } 6553 } 6554 6555 if(cap is null) { 6556 cap = Element.make("caption"); 6557 appendChild(cap); 6558 } 6559 6560 return cap; 6561 } 6562 6563 ///. 6564 @property string caption() { 6565 return captionElement().innerText; 6566 } 6567 6568 ///. 6569 @property void caption(string text) { 6570 captionElement().innerText = text; 6571 } 6572 6573 /// Gets the logical layout of the table as a rectangular grid of 6574 /// cells. It considers rowspan and colspan. A cell with a large 6575 /// span is represented in the grid by being referenced several times. 6576 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6577 /// <tfoot> portion if you pass one. 6578 /// 6579 /// Note: the rectangular grid might include null cells. 6580 /// 6581 /// This is kinda expensive so you should call once when you want the grid, 6582 /// then do lookups on the returned array. 6583 TableCell[][] getGrid(Element tablePortition = null) 6584 in { 6585 if(tablePortition is null) 6586 assert(tablePortition is null); 6587 else { 6588 assert(tablePortition !is null); 6589 assert(tablePortition.parentNode is this); 6590 assert( 6591 tablePortition.tagName == "tbody" 6592 || 6593 tablePortition.tagName == "tfoot" 6594 || 6595 tablePortition.tagName == "thead" 6596 ); 6597 } 6598 } 6599 body { 6600 if(tablePortition is null) 6601 tablePortition = this; 6602 6603 TableCell[][] ret; 6604 6605 // FIXME: will also return rows of sub tables! 6606 auto rows = tablePortition.getElementsByTagName("tr"); 6607 ret.length = rows.length; 6608 6609 int maxLength = 0; 6610 6611 int insertCell(int row, int position, TableCell cell) { 6612 if(row >= ret.length) 6613 return position; // not supposed to happen - a rowspan is prolly too big. 6614 6615 if(position == -1) { 6616 position++; 6617 foreach(item; ret[row]) { 6618 if(item is null) 6619 break; 6620 position++; 6621 } 6622 } 6623 6624 if(position < ret[row].length) 6625 ret[row][position] = cell; 6626 else 6627 foreach(i; ret[row].length .. position + 1) { 6628 if(i == position) 6629 ret[row] ~= cell; 6630 else 6631 ret[row] ~= null; 6632 } 6633 return position; 6634 } 6635 6636 foreach(i, rowElement; rows) { 6637 auto row = cast(TableRow) rowElement; 6638 assert(row !is null); 6639 assert(i < ret.length); 6640 6641 int position = 0; 6642 foreach(cellElement; rowElement.childNodes) { 6643 auto cell = cast(TableCell) cellElement; 6644 if(cell is null) 6645 continue; 6646 6647 // FIXME: colspan == 0 or rowspan == 0 6648 // is supposed to mean fill in the rest of 6649 // the table, not skip it 6650 foreach(int j; 0 .. cell.colspan) { 6651 foreach(int k; 0 .. cell.rowspan) 6652 // if the first row, always append. 6653 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6654 position++; 6655 } 6656 } 6657 6658 if(ret[i].length > maxLength) 6659 maxLength = cast(int) ret[i].length; 6660 } 6661 6662 // want to ensure it's rectangular 6663 foreach(ref r; ret) { 6664 foreach(i; r.length .. maxLength) 6665 r ~= null; 6666 } 6667 6668 return ret; 6669 } 6670 } 6671 6672 /// Represents a table row element - a <tr> 6673 /// Group: implementations 6674 class TableRow : Element { 6675 ///. 6676 this(Document _parentDocument) { 6677 super(_parentDocument); 6678 tagName = "tr"; 6679 } 6680 6681 // FIXME: the standard says there should be a lot more in here, 6682 // but meh, I never use it and it's a pain to implement. 6683 } 6684 6685 /// Represents anything that can be a table cell - <td> or <th> html. 6686 /// Group: implementations 6687 class TableCell : Element { 6688 ///. 6689 this(Document _parentDocument, string _tagName) { 6690 super(_parentDocument, _tagName); 6691 } 6692 6693 @property int rowspan() const { 6694 int ret = 1; 6695 auto it = getAttribute("rowspan"); 6696 if(it.length) 6697 ret = to!int(it); 6698 return ret; 6699 } 6700 6701 @property int colspan() const { 6702 int ret = 1; 6703 auto it = getAttribute("colspan"); 6704 if(it.length) 6705 ret = to!int(it); 6706 return ret; 6707 } 6708 6709 @property int rowspan(int i) { 6710 setAttribute("rowspan", to!string(i)); 6711 return i; 6712 } 6713 6714 @property int colspan(int i) { 6715 setAttribute("colspan", to!string(i)); 6716 return i; 6717 } 6718 6719 } 6720 6721 6722 ///. 6723 /// Group: implementations 6724 class MarkupException : Exception { 6725 6726 ///. 6727 this(string message, string file = __FILE__, size_t line = __LINE__) { 6728 super(message, file, line); 6729 } 6730 } 6731 6732 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6733 /// Group: implementations 6734 class ElementNotFoundException : Exception { 6735 6736 /// type == kind of element you were looking for and search == a selector describing the search. 6737 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6738 this.searchContext = searchContext; 6739 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6740 } 6741 6742 Element searchContext; 6743 } 6744 6745 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6746 /// 6747 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6748 /// Group: core_functionality 6749 struct Html { 6750 /// This string holds the actual html. Use it to retrieve the contents. 6751 string source; 6752 } 6753 6754 // for the observers 6755 enum DomMutationOperations { 6756 setAttribute, 6757 removeAttribute, 6758 appendChild, // tagname, attributes[], innerHTML 6759 insertBefore, 6760 truncateChildren, 6761 removeChild, 6762 appendHtml, 6763 replaceHtml, 6764 appendText, 6765 replaceText, 6766 replaceTextOnly 6767 } 6768 6769 // and for observers too 6770 struct DomMutationEvent { 6771 DomMutationOperations operation; 6772 Element target; 6773 Element related; // what this means differs with the operation 6774 Element related2; 6775 string relatedString; 6776 string relatedString2; 6777 } 6778 6779 6780 private immutable static string[] htmlSelfClosedElements = [ 6781 // html 4 6782 "img", "hr", "input", "br", "col", "link", "meta", 6783 // html 5 6784 "source" ]; 6785 6786 private immutable static string[] inlineElements = [ 6787 "span", "strong", "em", "b", "i", "a" 6788 ]; 6789 6790 6791 static import std.conv; 6792 6793 ///. 6794 int intFromHex(string hex) { 6795 int place = 1; 6796 int value = 0; 6797 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6798 int v; 6799 char q = hex[a]; 6800 if( q >= '0' && q <= '9') 6801 v = q - '0'; 6802 else if (q >= 'a' && q <= 'f') 6803 v = q - 'a' + 10; 6804 else throw new Exception("Illegal hex character: " ~ q); 6805 6806 value += v * place; 6807 6808 place *= 16; 6809 } 6810 6811 return value; 6812 } 6813 6814 6815 // CSS selector handling 6816 6817 // EXTENSIONS 6818 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6819 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6820 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6821 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6822 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6823 6824 6825 6826 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6827 // That might be useful to implement, though I do have parent selectors too. 6828 6829 ///. 6830 static immutable string[] selectorTokens = [ 6831 // It is important that the 2 character possibilities go first here for accurate lexing 6832 "~=", "*=", "|=", "^=", "$=", "!=", 6833 "::", ">>", 6834 "<<", // my any-parent extension (reciprocal of whitespace) 6835 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6836 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6837 ]; // other is white space or a name. 6838 6839 ///. 6840 sizediff_t idToken(string str, sizediff_t position) { 6841 sizediff_t tid = -1; 6842 char c = str[position]; 6843 foreach(a, token; selectorTokens) 6844 6845 if(c == token[0]) { 6846 if(token.length > 1) { 6847 if(position + 1 >= str.length || str[position+1] != token[1]) 6848 continue; // not this token 6849 } 6850 tid = a; 6851 break; 6852 } 6853 return tid; 6854 } 6855 6856 ///. 6857 // look, ma, no phobos! 6858 // new lexer by ketmar 6859 string[] lexSelector (string selstr) { 6860 6861 static sizediff_t idToken (string str, size_t stpos) { 6862 char c = str[stpos]; 6863 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6864 if (c == token[0]) { 6865 if (token.length > 1) { 6866 assert(token.length == 2, token); // we don't have 3-char tokens yet 6867 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6868 } 6869 return tidx; 6870 } 6871 } 6872 return -1; 6873 } 6874 6875 // skip spaces and comments 6876 static string removeLeadingBlanks (string str) { 6877 size_t curpos = 0; 6878 while (curpos < str.length) { 6879 immutable char ch = str[curpos]; 6880 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6881 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6882 // comment 6883 curpos += 2; 6884 while (curpos < str.length) { 6885 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6886 curpos += 2; 6887 break; 6888 } 6889 ++curpos; 6890 } 6891 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6892 ++curpos; 6893 6894 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6895 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6896 // That is not the same as ".foo.bar". If the space is stripped, important 6897 // information is lost, despite the tokens being separatable anyway. 6898 // 6899 // The parser really needs to be aware of the presence of a space. 6900 } else { 6901 break; 6902 } 6903 } 6904 return str[curpos..$]; 6905 } 6906 6907 static bool isBlankAt() (string str, size_t pos) { 6908 // we should consider unicode spaces too, but... unicode sux anyway. 6909 return 6910 (pos < str.length && // in string 6911 (str[pos] <= 32 || // space 6912 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6913 } 6914 6915 string[] tokens; 6916 // lexx it! 6917 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6918 if(selstr[0] == '\"' || selstr[0] == '\'') { 6919 auto end = selstr[0]; 6920 auto pos = 1; 6921 bool escaping; 6922 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6923 if(escaping) 6924 escaping = false; 6925 else if(selstr[pos] == '\\') 6926 escaping = true; 6927 pos++; 6928 } 6929 6930 // FIXME: do better unescaping 6931 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6932 if(pos+1 >= selstr.length) 6933 assert(0, selstr); 6934 selstr = selstr[pos + 1.. $]; 6935 continue; 6936 } 6937 6938 6939 // no tokens starts with escape 6940 immutable tid = idToken(selstr, 0); 6941 if (tid >= 0) { 6942 // special token 6943 tokens ~= selectorTokens[tid]; // it's funnier this way 6944 selstr = selstr[selectorTokens[tid].length..$]; 6945 continue; 6946 } 6947 // from start to space or special token 6948 size_t escapePos = size_t.max; 6949 size_t curpos = 0; // i can has chizburger^w escape at the start 6950 while (curpos < selstr.length) { 6951 if (selstr[curpos] == '\\') { 6952 // this is escape, just skip it and next char 6953 if (escapePos == size_t.max) escapePos = curpos; 6954 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6955 } else { 6956 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6957 ++curpos; 6958 } 6959 } 6960 // identifier 6961 if (escapePos != size_t.max) { 6962 // i hate it when it happens 6963 string id = selstr[0..escapePos]; 6964 while (escapePos < curpos) { 6965 if (curpos-escapePos < 2) break; 6966 id ~= selstr[escapePos+1]; // escaped char 6967 escapePos += 2; 6968 immutable stp = escapePos; 6969 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6970 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6971 } 6972 if (id.length > 0) tokens ~= id; 6973 } else { 6974 tokens ~= selstr[0..curpos]; 6975 } 6976 selstr = selstr[curpos..$]; 6977 } 6978 return tokens; 6979 } 6980 version(unittest_domd_lexer) unittest { 6981 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6982 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6983 assert(lexSelector(r" < <") == ["<", "<"]); 6984 assert(lexSelector(r" <<") == ["<<"]); 6985 assert(lexSelector(r" <</") == ["<<", "/"]); 6986 assert(lexSelector(r" <</*") == ["<<"]); 6987 assert(lexSelector(r" <\</*") == ["<", "<"]); 6988 assert(lexSelector(r"heh\") == ["heh"]); 6989 assert(lexSelector(r"alice \") == ["alice"]); 6990 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6991 } 6992 6993 ///. 6994 struct SelectorPart { 6995 string tagNameFilter; ///. 6996 string[] attributesPresent; /// [attr] 6997 string[2][] attributesEqual; /// [attr=value] 6998 string[2][] attributesStartsWith; /// [attr^=value] 6999 string[2][] attributesEndsWith; /// [attr$=value] 7000 // split it on space, then match to these 7001 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 7002 // split it on dash, then match to these 7003 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 7004 string[2][] attributesInclude; /// [attr*=value] 7005 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 7006 7007 string[] hasSelectors; /// :has(this) 7008 string[] notSelectors; /// :not(this) 7009 7010 string[] isSelectors; /// :is(this) 7011 string[] whereSelectors; /// :where(this) 7012 7013 ParsedNth[] nthOfType; /// . 7014 ParsedNth[] nthLastOfType; /// . 7015 ParsedNth[] nthChild; /// . 7016 7017 bool firstChild; ///. 7018 bool lastChild; ///. 7019 7020 bool firstOfType; /// . 7021 bool lastOfType; /// . 7022 7023 bool emptyElement; ///. 7024 bool whitespaceOnly; /// 7025 bool oddChild; ///. 7026 bool evenChild; ///. 7027 7028 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 7029 7030 bool rootElement; ///. 7031 7032 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 7033 7034 bool isCleanSlateExceptSeparation() { 7035 auto cp = this; 7036 cp.separation = -1; 7037 return cp is SelectorPart.init; 7038 } 7039 7040 ///. 7041 string toString() { 7042 string ret; 7043 switch(separation) { 7044 default: assert(0); 7045 case -1: break; 7046 case 0: ret ~= " "; break; 7047 case 1: ret ~= " > "; break; 7048 case 2: ret ~= " + "; break; 7049 case 3: ret ~= " ~ "; break; 7050 case 4: ret ~= " < "; break; 7051 } 7052 ret ~= tagNameFilter; 7053 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7054 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7055 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7056 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7057 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7058 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7059 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7060 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7061 7062 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7063 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7064 7065 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7066 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7067 7068 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7069 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7070 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7071 7072 if(firstChild) ret ~= ":first-child"; 7073 if(lastChild) ret ~= ":last-child"; 7074 if(firstOfType) ret ~= ":first-of-type"; 7075 if(lastOfType) ret ~= ":last-of-type"; 7076 if(emptyElement) ret ~= ":empty"; 7077 if(whitespaceOnly) ret ~= ":whitespace-only"; 7078 if(oddChild) ret ~= ":odd-child"; 7079 if(evenChild) ret ~= ":even-child"; 7080 if(rootElement) ret ~= ":root"; 7081 if(scopeElement) ret ~= ":scope"; 7082 7083 return ret; 7084 } 7085 7086 // USEFUL 7087 ///. 7088 bool matchElement(Element e) { 7089 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7090 // Each individual call is reasonably fast already, but it adds up. 7091 if(e is null) return false; 7092 if(e.nodeType != 1) return false; 7093 7094 if(tagNameFilter != "" && tagNameFilter != "*") 7095 if(e.tagName != tagNameFilter) 7096 return false; 7097 if(firstChild) { 7098 if(e.parentNode is null) 7099 return false; 7100 if(e.parentNode.childElements[0] !is e) 7101 return false; 7102 } 7103 if(lastChild) { 7104 if(e.parentNode is null) 7105 return false; 7106 auto ce = e.parentNode.childElements; 7107 if(ce[$-1] !is e) 7108 return false; 7109 } 7110 if(firstOfType) { 7111 if(e.parentNode is null) 7112 return false; 7113 auto ce = e.parentNode.childElements; 7114 foreach(c; ce) { 7115 if(c.tagName == e.tagName) { 7116 if(c is e) 7117 return true; 7118 else 7119 return false; 7120 } 7121 } 7122 } 7123 if(lastOfType) { 7124 if(e.parentNode is null) 7125 return false; 7126 auto ce = e.parentNode.childElements; 7127 foreach_reverse(c; ce) { 7128 if(c.tagName == e.tagName) { 7129 if(c is e) 7130 return true; 7131 else 7132 return false; 7133 } 7134 } 7135 } 7136 /+ 7137 if(scopeElement) { 7138 if(e !is this_) 7139 return false; 7140 } 7141 +/ 7142 if(emptyElement) { 7143 if(e.children.length) 7144 return false; 7145 } 7146 if(whitespaceOnly) { 7147 if(e.innerText.strip.length) 7148 return false; 7149 } 7150 if(rootElement) { 7151 if(e.parentNode !is null) 7152 return false; 7153 } 7154 if(oddChild || evenChild) { 7155 if(e.parentNode is null) 7156 return false; 7157 foreach(i, child; e.parentNode.childElements) { 7158 if(child is e) { 7159 if(oddChild && !(i&1)) 7160 return false; 7161 if(evenChild && (i&1)) 7162 return false; 7163 break; 7164 } 7165 } 7166 } 7167 7168 bool matchWithSeparator(string attr, string value, string separator) { 7169 foreach(s; attr.split(separator)) 7170 if(s == value) 7171 return true; 7172 return false; 7173 } 7174 7175 foreach(a; attributesPresent) 7176 if(a !in e.attributes) 7177 return false; 7178 foreach(a; attributesEqual) 7179 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7180 return false; 7181 foreach(a; attributesNotEqual) 7182 // FIXME: maybe it should say null counts... this just bit me. 7183 // I did [attr][attr!=value] to work around. 7184 // 7185 // if it's null, it's not equal, right? 7186 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7187 if(e.getAttribute(a[0]) == a[1]) 7188 return false; 7189 foreach(a; attributesInclude) 7190 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7191 return false; 7192 foreach(a; attributesStartsWith) 7193 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7194 return false; 7195 foreach(a; attributesEndsWith) 7196 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7197 return false; 7198 foreach(a; attributesIncludesSeparatedBySpaces) 7199 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7200 return false; 7201 foreach(a; attributesIncludesSeparatedByDashes) 7202 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7203 return false; 7204 foreach(a; hasSelectors) { 7205 if(e.querySelector(a) is null) 7206 return false; 7207 } 7208 foreach(a; notSelectors) { 7209 auto sel = Selector(a); 7210 if(sel.matchesElement(e)) 7211 return false; 7212 } 7213 foreach(a; isSelectors) { 7214 auto sel = Selector(a); 7215 if(!sel.matchesElement(e)) 7216 return false; 7217 } 7218 foreach(a; whereSelectors) { 7219 auto sel = Selector(a); 7220 if(!sel.matchesElement(e)) 7221 return false; 7222 } 7223 7224 foreach(a; nthChild) { 7225 if(e.parentNode is null) 7226 return false; 7227 7228 auto among = e.parentNode.childElements; 7229 7230 if(!a.solvesFor(among, e)) 7231 return false; 7232 } 7233 foreach(a; nthOfType) { 7234 if(e.parentNode is null) 7235 return false; 7236 7237 auto among = e.parentNode.childElements(e.tagName); 7238 7239 if(!a.solvesFor(among, e)) 7240 return false; 7241 } 7242 foreach(a; nthLastOfType) { 7243 if(e.parentNode is null) 7244 return false; 7245 7246 auto among = retro(e.parentNode.childElements(e.tagName)); 7247 7248 if(!a.solvesFor(among, e)) 7249 return false; 7250 } 7251 7252 return true; 7253 } 7254 } 7255 7256 struct ParsedNth { 7257 int multiplier; 7258 int adder; 7259 7260 string of; 7261 7262 this(string text) { 7263 auto original = text; 7264 consumeWhitespace(text); 7265 if(text.startsWith("odd")) { 7266 multiplier = 2; 7267 adder = 1; 7268 7269 text = text[3 .. $]; 7270 } else if(text.startsWith("even")) { 7271 multiplier = 2; 7272 adder = 1; 7273 7274 text = text[4 .. $]; 7275 } else { 7276 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7277 consumeWhitespace(text); 7278 if(text.length && text[0] == 'n') { 7279 multiplier = n; 7280 text = text[1 .. $]; 7281 consumeWhitespace(text); 7282 if(text.length) { 7283 if(text[0] == '+') { 7284 text = text[1 .. $]; 7285 adder = parseNumber(text); 7286 } else if(text[0] == '-') { 7287 text = text[1 .. $]; 7288 adder = -parseNumber(text); 7289 } else if(text[0] == 'o') { 7290 // continue, this is handled below 7291 } else 7292 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7293 } 7294 } else { 7295 adder = n; 7296 } 7297 } 7298 7299 consumeWhitespace(text); 7300 if(text.startsWith("of")) { 7301 text = text[2 .. $]; 7302 consumeWhitespace(text); 7303 of = text[0 .. $]; 7304 } 7305 } 7306 7307 string toString() { 7308 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7309 } 7310 7311 bool solvesFor(R)(R elements, Element e) { 7312 int idx = 1; 7313 bool found = false; 7314 foreach(ele; elements) { 7315 if(of.length) { 7316 auto sel = Selector(of); 7317 if(!sel.matchesElement(ele)) 7318 continue; 7319 } 7320 if(ele is e) { 7321 found = true; 7322 break; 7323 } 7324 idx++; 7325 } 7326 if(!found) return false; 7327 7328 // multiplier* n + adder = idx 7329 // if there is a solution for integral n, it matches 7330 7331 idx -= adder; 7332 if(multiplier) { 7333 if(idx % multiplier == 0) 7334 return true; 7335 } else { 7336 return idx == 0; 7337 } 7338 return false; 7339 } 7340 7341 private void consumeWhitespace(ref string text) { 7342 while(text.length && text[0] == ' ') 7343 text = text[1 .. $]; 7344 } 7345 7346 private int parseNumber(ref string text) { 7347 consumeWhitespace(text); 7348 if(text.length == 0) return 0; 7349 bool negative = text[0] == '-'; 7350 if(text[0] == '+') 7351 text = text[1 .. $]; 7352 if(negative) text = text[1 .. $]; 7353 int i = 0; 7354 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7355 i++; 7356 if(i == 0) 7357 return 0; 7358 int cool = to!int(text[0 .. i]); 7359 text = text[i .. $]; 7360 return negative ? -cool : cool; 7361 } 7362 } 7363 7364 // USEFUL 7365 ///. 7366 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7367 Element[] ret; 7368 if(!parts.length) { 7369 return [start]; // the null selector only matches the start point; it 7370 // is what terminates the recursion 7371 } 7372 7373 auto part = parts[0]; 7374 //writeln("checking ", part, " against ", start, " with ", part.separation); 7375 switch(part.separation) { 7376 default: assert(0); 7377 case -1: 7378 case 0: // tree 7379 foreach(e; start.tree) { 7380 if(part.separation == 0 && start is e) 7381 continue; // space doesn't match itself! 7382 if(part.matchElement(e)) { 7383 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7384 } 7385 } 7386 break; 7387 case 1: // children 7388 foreach(e; start.childNodes) { 7389 if(part.matchElement(e)) { 7390 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7391 } 7392 } 7393 break; 7394 case 2: // next-sibling 7395 auto e = start.nextSibling("*"); 7396 if(part.matchElement(e)) 7397 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7398 break; 7399 case 3: // younger sibling 7400 auto tmp = start.parentNode; 7401 if(tmp !is null) { 7402 sizediff_t pos = -1; 7403 auto children = tmp.childElements; 7404 foreach(i, child; children) { 7405 if(child is start) { 7406 pos = i; 7407 break; 7408 } 7409 } 7410 assert(pos != -1); 7411 foreach(e; children[pos+1..$]) { 7412 if(part.matchElement(e)) 7413 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7414 } 7415 } 7416 break; 7417 case 4: // immediate parent node, an extension of mine to walk back up the tree 7418 auto e = start.parentNode; 7419 if(part.matchElement(e)) { 7420 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7421 } 7422 /* 7423 Example of usefulness: 7424 7425 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7426 7427 table th < tr 7428 7429 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7430 */ 7431 break; 7432 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7433 /* 7434 Like with the < operator, this is best used to find some parent of a particular known element. 7435 7436 Say you have an anchor inside a 7437 */ 7438 } 7439 7440 return ret; 7441 } 7442 7443 /++ 7444 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7445 7446 See_Also: 7447 $(LIST 7448 * [Element.querySelector] 7449 * [Element.querySelectorAll] 7450 * [Element.matches] 7451 * [Element.closest] 7452 * [Document.querySelector] 7453 * [Document.querySelectorAll] 7454 ) 7455 +/ 7456 /// Group: core_functionality 7457 struct Selector { 7458 SelectorComponent[] components; 7459 string original; 7460 /++ 7461 Parses the selector string and constructs the usable structure. 7462 +/ 7463 this(string cssSelector) { 7464 components = parseSelectorString(cssSelector); 7465 original = cssSelector; 7466 } 7467 7468 /++ 7469 Returns true if the given element matches this selector, 7470 considered relative to an arbitrary element. 7471 7472 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7473 with [std.algorithm.iteration.filter]: 7474 7475 --- 7476 Selector sel = Selector("foo > bar"); 7477 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7478 --- 7479 +/ 7480 bool matchesElement(Element e, Element relativeTo = null) { 7481 foreach(component; components) 7482 if(component.matchElement(e, relativeTo)) 7483 return true; 7484 7485 return false; 7486 } 7487 7488 /++ 7489 Reciprocal of [Element.querySelectorAll] 7490 +/ 7491 Element[] getMatchingElements(Element start) { 7492 Element[] ret; 7493 foreach(component; components) 7494 ret ~= getElementsBySelectorParts(start, component.parts); 7495 return removeDuplicates(ret); 7496 } 7497 7498 /++ 7499 Like [getMatchingElements], but returns a lazy range. Be careful 7500 about mutating the dom as you iterate through this. 7501 +/ 7502 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7503 import std.algorithm.iteration; 7504 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7505 } 7506 7507 7508 /// Returns the string this was built from 7509 string toString() { 7510 return original; 7511 } 7512 7513 /++ 7514 Returns a string from the parsed result 7515 7516 7517 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7518 +/ 7519 string parsedToString() { 7520 string ret; 7521 7522 foreach(idx, component; components) { 7523 if(idx) ret ~= ", "; 7524 ret ~= component.toString(); 7525 } 7526 7527 return ret; 7528 } 7529 } 7530 7531 ///. 7532 struct SelectorComponent { 7533 ///. 7534 SelectorPart[] parts; 7535 7536 ///. 7537 string toString() { 7538 string ret; 7539 foreach(part; parts) 7540 ret ~= part.toString(); 7541 return ret; 7542 } 7543 7544 // USEFUL 7545 ///. 7546 Element[] getElements(Element start) { 7547 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7548 } 7549 7550 // USEFUL (but not implemented) 7551 /// If relativeTo == null, it assumes the root of the parent document. 7552 bool matchElement(Element e, Element relativeTo = null) { 7553 if(e is null) return false; 7554 Element where = e; 7555 int lastSeparation = -1; 7556 7557 auto lparts = parts; 7558 7559 if(parts.length && parts[0].separation > 0) { 7560 // if it starts with a non-trivial separator, inject 7561 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7562 // which implies html 7563 7564 // there is probably a MUCH better way to do this. 7565 auto dummy = SelectorPart.init; 7566 dummy.tagNameFilter = "*"; 7567 dummy.separation = 0; 7568 lparts = dummy ~ lparts; 7569 } 7570 7571 foreach(part; retro(lparts)) { 7572 7573 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7574 // writeln(parts); 7575 7576 if(lastSeparation == -1) { 7577 if(!part.matchElement(where)) 7578 return false; 7579 } else if(lastSeparation == 0) { // generic parent 7580 // need to go up the whole chain 7581 where = where.parentNode; 7582 7583 while(where !is null) { 7584 if(part.matchElement(where)) 7585 break; 7586 7587 if(where is relativeTo) 7588 return false; 7589 7590 where = where.parentNode; 7591 } 7592 7593 if(where is null) 7594 return false; 7595 } else if(lastSeparation == 1) { // the > operator 7596 where = where.parentNode; 7597 7598 if(!part.matchElement(where)) 7599 return false; 7600 } else if(lastSeparation == 2) { // the + operator 7601 //writeln("WHERE", where, " ", part); 7602 where = where.previousSibling("*"); 7603 7604 if(!part.matchElement(where)) 7605 return false; 7606 } else if(lastSeparation == 3) { // the ~ operator 7607 where = where.previousSibling("*"); 7608 while(where !is null) { 7609 if(part.matchElement(where)) 7610 break; 7611 7612 if(where is relativeTo) 7613 return false; 7614 7615 where = where.previousSibling("*"); 7616 } 7617 7618 if(where is null) 7619 return false; 7620 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7621 // FIXME 7622 } 7623 7624 lastSeparation = part.separation; 7625 7626 if(where is relativeTo) 7627 return false; // at end of line, if we aren't done by now, the match fails 7628 } 7629 return true; // if we got here, it is a success 7630 } 7631 7632 // the string should NOT have commas. Use parseSelectorString for that instead 7633 ///. 7634 static SelectorComponent fromString(string selector) { 7635 return parseSelector(lexSelector(selector)); 7636 } 7637 } 7638 7639 ///. 7640 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7641 SelectorComponent[] ret; 7642 auto tokens = lexSelector(selector); // this will parse commas too 7643 // and now do comma-separated slices (i haz phobosophobia!) 7644 int parensCount = 0; 7645 while (tokens.length > 0) { 7646 size_t end = 0; 7647 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7648 if(tokens[end] == "(") parensCount++; 7649 if(tokens[end] == ")") parensCount--; 7650 ++end; 7651 } 7652 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7653 if (tokens.length-end < 2) break; 7654 tokens = tokens[end+1..$]; 7655 } 7656 return ret; 7657 } 7658 7659 ///. 7660 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7661 SelectorComponent s; 7662 7663 SelectorPart current; 7664 void commit() { 7665 // might as well skip null items 7666 if(!current.isCleanSlateExceptSeparation()) { 7667 s.parts ~= current; 7668 current = current.init; // start right over 7669 } 7670 } 7671 enum State { 7672 Starting, 7673 ReadingClass, 7674 ReadingId, 7675 ReadingAttributeSelector, 7676 ReadingAttributeComparison, 7677 ExpectingAttributeCloser, 7678 ReadingPseudoClass, 7679 ReadingAttributeValue, 7680 7681 SkippingFunctionalSelector, 7682 } 7683 State state = State.Starting; 7684 string attributeName, attributeValue, attributeComparison; 7685 int parensCount; 7686 foreach(idx, token; tokens) { 7687 string readFunctionalSelector() { 7688 string s; 7689 if(tokens[idx + 1] != "(") 7690 throw new Exception("parse error"); 7691 int pc = 1; 7692 foreach(t; tokens[idx + 2 .. $]) { 7693 if(t == "(") 7694 pc++; 7695 if(t == ")") 7696 pc--; 7697 if(pc == 0) 7698 break; 7699 s ~= t; 7700 } 7701 7702 return s; 7703 } 7704 7705 sizediff_t tid = -1; 7706 foreach(i, item; selectorTokens) 7707 if(token == item) { 7708 tid = i; 7709 break; 7710 } 7711 final switch(state) { 7712 case State.Starting: // fresh, might be reading an operator or a tagname 7713 if(tid == -1) { 7714 if(!caseSensitiveTags) 7715 token = token.toLower(); 7716 7717 if(current.isCleanSlateExceptSeparation()) { 7718 current.tagNameFilter = token; 7719 // default thing, see comment under "*" below 7720 if(current.separation == -1) current.separation = 0; 7721 } else { 7722 // if it was already set, we must see two thingies 7723 // separated by whitespace... 7724 commit(); 7725 current.separation = 0; // tree 7726 current.tagNameFilter = token; 7727 } 7728 } else { 7729 // Selector operators 7730 switch(token) { 7731 case "*": 7732 current.tagNameFilter = "*"; 7733 // the idea here is if we haven't actually set a separation 7734 // yet (e.g. the > operator), it should assume the generic 7735 // whitespace (descendant) mode to avoid matching self with -1 7736 if(current.separation == -1) current.separation = 0; 7737 break; 7738 case " ": 7739 // If some other separation has already been set, 7740 // this is irrelevant whitespace, so we should skip it. 7741 // this happens in the case of "foo > bar" for example. 7742 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7743 continue; 7744 commit(); 7745 current.separation = 0; // tree 7746 break; 7747 case ">>": 7748 commit(); 7749 current.separation = 0; // alternate syntax for tree from html5 css 7750 break; 7751 case ">": 7752 commit(); 7753 current.separation = 1; // child 7754 break; 7755 case "+": 7756 commit(); 7757 current.separation = 2; // sibling directly after 7758 break; 7759 case "~": 7760 commit(); 7761 current.separation = 3; // any sibling after 7762 break; 7763 case "<": 7764 commit(); 7765 current.separation = 4; // immediate parent of 7766 break; 7767 case "[": 7768 state = State.ReadingAttributeSelector; 7769 if(current.separation == -1) current.separation = 0; 7770 break; 7771 case ".": 7772 state = State.ReadingClass; 7773 if(current.separation == -1) current.separation = 0; 7774 break; 7775 case "#": 7776 state = State.ReadingId; 7777 if(current.separation == -1) current.separation = 0; 7778 break; 7779 case ":": 7780 case "::": 7781 state = State.ReadingPseudoClass; 7782 if(current.separation == -1) current.separation = 0; 7783 break; 7784 7785 default: 7786 assert(0, token); 7787 } 7788 } 7789 break; 7790 case State.ReadingClass: 7791 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7792 state = State.Starting; 7793 break; 7794 case State.ReadingId: 7795 current.attributesEqual ~= ["id", token]; 7796 state = State.Starting; 7797 break; 7798 case State.ReadingPseudoClass: 7799 switch(token) { 7800 case "first-of-type": 7801 current.firstOfType = true; 7802 break; 7803 case "last-of-type": 7804 current.lastOfType = true; 7805 break; 7806 case "only-of-type": 7807 current.firstOfType = true; 7808 current.lastOfType = true; 7809 break; 7810 case "first-child": 7811 current.firstChild = true; 7812 break; 7813 case "last-child": 7814 current.lastChild = true; 7815 break; 7816 case "only-child": 7817 current.firstChild = true; 7818 current.lastChild = true; 7819 break; 7820 case "scope": 7821 current.scopeElement = true; 7822 break; 7823 case "empty": 7824 // one with no children 7825 current.emptyElement = true; 7826 break; 7827 case "whitespace-only": 7828 current.whitespaceOnly = true; 7829 break; 7830 case "link": 7831 current.attributesPresent ~= "href"; 7832 break; 7833 case "root": 7834 current.rootElement = true; 7835 break; 7836 case "nth-child": 7837 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7838 state = State.SkippingFunctionalSelector; 7839 continue; 7840 case "nth-of-type": 7841 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7842 state = State.SkippingFunctionalSelector; 7843 continue; 7844 case "nth-last-of-type": 7845 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7846 state = State.SkippingFunctionalSelector; 7847 continue; 7848 case "is": 7849 state = State.SkippingFunctionalSelector; 7850 current.isSelectors ~= readFunctionalSelector(); 7851 continue; // now the rest of the parser skips past the parens we just handled 7852 case "where": 7853 state = State.SkippingFunctionalSelector; 7854 current.whereSelectors ~= readFunctionalSelector(); 7855 continue; // now the rest of the parser skips past the parens we just handled 7856 case "not": 7857 state = State.SkippingFunctionalSelector; 7858 current.notSelectors ~= readFunctionalSelector(); 7859 continue; // now the rest of the parser skips past the parens we just handled 7860 case "has": 7861 state = State.SkippingFunctionalSelector; 7862 current.hasSelectors ~= readFunctionalSelector(); 7863 continue; // now the rest of the parser skips past the parens we just handled 7864 // back to standards though not quite right lol 7865 case "disabled": 7866 current.attributesPresent ~= "disabled"; 7867 break; 7868 case "checked": 7869 current.attributesPresent ~= "checked"; 7870 break; 7871 7872 case "visited", "active", "hover", "target", "focus", "selected": 7873 current.attributesPresent ~= "nothing"; 7874 // FIXME 7875 /+ 7876 // extensions not implemented 7877 //case "text": // takes the text in the element and wraps it in an element, returning it 7878 +/ 7879 goto case; 7880 case "before", "after": 7881 current.attributesPresent ~= "FIXME"; 7882 7883 break; 7884 // My extensions 7885 case "odd-child": 7886 current.oddChild = true; 7887 break; 7888 case "even-child": 7889 current.evenChild = true; 7890 break; 7891 default: 7892 //if(token.indexOf("lang") == -1) 7893 //assert(0, token); 7894 break; 7895 } 7896 state = State.Starting; 7897 break; 7898 case State.SkippingFunctionalSelector: 7899 if(token == "(") { 7900 parensCount++; 7901 } else if(token == ")") { 7902 parensCount--; 7903 } 7904 7905 if(parensCount == 0) 7906 state = State.Starting; 7907 break; 7908 case State.ReadingAttributeSelector: 7909 attributeName = token; 7910 attributeComparison = null; 7911 attributeValue = null; 7912 state = State.ReadingAttributeComparison; 7913 break; 7914 case State.ReadingAttributeComparison: 7915 // FIXME: these things really should be quotable in the proper lexer... 7916 if(token != "]") { 7917 if(token.indexOf("=") == -1) { 7918 // not a comparison; consider it 7919 // part of the attribute 7920 attributeValue ~= token; 7921 } else { 7922 attributeComparison = token; 7923 state = State.ReadingAttributeValue; 7924 } 7925 break; 7926 } 7927 goto case; 7928 case State.ExpectingAttributeCloser: 7929 if(token != "]") { 7930 // not the closer; consider it part of comparison 7931 if(attributeComparison == "") 7932 attributeName ~= token; 7933 else 7934 attributeValue ~= token; 7935 break; 7936 } 7937 7938 // Selector operators 7939 switch(attributeComparison) { 7940 default: assert(0); 7941 case "": 7942 current.attributesPresent ~= attributeName; 7943 break; 7944 case "=": 7945 current.attributesEqual ~= [attributeName, attributeValue]; 7946 break; 7947 case "|=": 7948 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7949 break; 7950 case "~=": 7951 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7952 break; 7953 case "$=": 7954 current.attributesEndsWith ~= [attributeName, attributeValue]; 7955 break; 7956 case "^=": 7957 current.attributesStartsWith ~= [attributeName, attributeValue]; 7958 break; 7959 case "*=": 7960 current.attributesInclude ~= [attributeName, attributeValue]; 7961 break; 7962 case "!=": 7963 current.attributesNotEqual ~= [attributeName, attributeValue]; 7964 break; 7965 } 7966 7967 state = State.Starting; 7968 break; 7969 case State.ReadingAttributeValue: 7970 attributeValue = token; 7971 state = State.ExpectingAttributeCloser; 7972 break; 7973 } 7974 } 7975 7976 commit(); 7977 7978 return s; 7979 } 7980 7981 ///. 7982 Element[] removeDuplicates(Element[] input) { 7983 Element[] ret; 7984 7985 bool[Element] already; 7986 foreach(e; input) { 7987 if(e in already) continue; 7988 already[e] = true; 7989 ret ~= e; 7990 } 7991 7992 return ret; 7993 } 7994 7995 // done with CSS selector handling 7996 7997 7998 // FIXME: use the better parser from html.d 7999 /// This is probably not useful to you unless you're writing a browser or something like that. 8000 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 8001 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 8002 class CssStyle { 8003 ///. 8004 this(string rule, string content) { 8005 rule = rule.strip(); 8006 content = content.strip(); 8007 8008 if(content.length == 0) 8009 return; 8010 8011 originatingRule = rule; 8012 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 8013 8014 foreach(part; content.split(";")) { 8015 part = part.strip(); 8016 if(part.length == 0) 8017 continue; 8018 auto idx = part.indexOf(":"); 8019 if(idx == -1) 8020 continue; 8021 //throw new Exception("Bad css rule (no colon): " ~ part); 8022 8023 Property p; 8024 8025 p.name = part[0 .. idx].strip(); 8026 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 8027 p.givenExplicitly = true; 8028 p.specificity = originatingSpecificity; 8029 8030 properties ~= p; 8031 } 8032 8033 foreach(property; properties) 8034 expandShortForm(property, originatingSpecificity); 8035 } 8036 8037 ///. 8038 Specificity getSpecificityOfRule(string rule) { 8039 Specificity s; 8040 if(rule.length == 0) { // inline 8041 // s.important = 2; 8042 } else { 8043 // FIXME 8044 } 8045 8046 return s; 8047 } 8048 8049 string originatingRule; ///. 8050 Specificity originatingSpecificity; ///. 8051 8052 ///. 8053 union Specificity { 8054 uint score; ///. 8055 // version(little_endian) 8056 ///. 8057 struct { 8058 ubyte tags; ///. 8059 ubyte classes; ///. 8060 ubyte ids; ///. 8061 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8062 } 8063 } 8064 8065 ///. 8066 struct Property { 8067 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8068 string name; ///. 8069 string value; ///. 8070 Specificity specificity; ///. 8071 // do we care about the original source rule? 8072 } 8073 8074 ///. 8075 Property[] properties; 8076 8077 ///. 8078 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8079 string name = unCamelCase(nameGiven); 8080 if(value is null) 8081 return getValue(name); 8082 else 8083 return setValue(name, value, 0x02000000 /* inline specificity */); 8084 } 8085 8086 /// takes dash style name 8087 string getValue(string name) { 8088 foreach(property; properties) 8089 if(property.name == name) 8090 return property.value; 8091 return null; 8092 } 8093 8094 /// takes dash style name 8095 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8096 value = value.replace("! important", "!important"); 8097 if(value.indexOf("!important") != -1) { 8098 newSpecificity.important = 1; // FIXME 8099 value = value.replace("!important", "").strip(); 8100 } 8101 8102 foreach(ref property; properties) 8103 if(property.name == name) { 8104 if(newSpecificity.score >= property.specificity.score) { 8105 property.givenExplicitly = explicit; 8106 expandShortForm(property, newSpecificity); 8107 return (property.value = value); 8108 } else { 8109 if(name == "display") 8110 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8111 return value; // do nothing - the specificity is too low 8112 } 8113 } 8114 8115 // it's not here... 8116 8117 Property p; 8118 p.givenExplicitly = true; 8119 p.name = name; 8120 p.value = value; 8121 p.specificity = originatingSpecificity; 8122 8123 properties ~= p; 8124 expandShortForm(p, originatingSpecificity); 8125 8126 return value; 8127 } 8128 8129 private void expandQuadShort(string name, string value, Specificity specificity) { 8130 auto parts = value.split(" "); 8131 switch(parts.length) { 8132 case 1: 8133 setValue(name ~"-left", parts[0], specificity, false); 8134 setValue(name ~"-right", parts[0], specificity, false); 8135 setValue(name ~"-top", parts[0], specificity, false); 8136 setValue(name ~"-bottom", parts[0], specificity, false); 8137 break; 8138 case 2: 8139 setValue(name ~"-left", parts[1], specificity, false); 8140 setValue(name ~"-right", parts[1], specificity, false); 8141 setValue(name ~"-top", parts[0], specificity, false); 8142 setValue(name ~"-bottom", parts[0], specificity, false); 8143 break; 8144 case 3: 8145 setValue(name ~"-top", parts[0], specificity, false); 8146 setValue(name ~"-right", parts[1], specificity, false); 8147 setValue(name ~"-bottom", parts[2], specificity, false); 8148 setValue(name ~"-left", parts[2], specificity, false); 8149 8150 break; 8151 case 4: 8152 setValue(name ~"-top", parts[0], specificity, false); 8153 setValue(name ~"-right", parts[1], specificity, false); 8154 setValue(name ~"-bottom", parts[2], specificity, false); 8155 setValue(name ~"-left", parts[3], specificity, false); 8156 break; 8157 default: 8158 assert(0, value); 8159 } 8160 } 8161 8162 ///. 8163 void expandShortForm(Property p, Specificity specificity) { 8164 switch(p.name) { 8165 case "margin": 8166 case "padding": 8167 expandQuadShort(p.name, p.value, specificity); 8168 break; 8169 case "border": 8170 case "outline": 8171 setValue(p.name ~ "-left", p.value, specificity, false); 8172 setValue(p.name ~ "-right", p.value, specificity, false); 8173 setValue(p.name ~ "-top", p.value, specificity, false); 8174 setValue(p.name ~ "-bottom", p.value, specificity, false); 8175 break; 8176 8177 case "border-top": 8178 case "border-bottom": 8179 case "border-left": 8180 case "border-right": 8181 case "outline-top": 8182 case "outline-bottom": 8183 case "outline-left": 8184 case "outline-right": 8185 8186 default: {} 8187 } 8188 } 8189 8190 ///. 8191 override string toString() { 8192 string ret; 8193 if(originatingRule.length) 8194 ret = originatingRule ~ " {"; 8195 8196 foreach(property; properties) { 8197 if(!property.givenExplicitly) 8198 continue; // skip the inferred shit 8199 8200 if(originatingRule.length) 8201 ret ~= "\n\t"; 8202 else 8203 ret ~= " "; 8204 8205 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8206 } 8207 8208 if(originatingRule.length) 8209 ret ~= "\n}\n"; 8210 8211 return ret; 8212 } 8213 } 8214 8215 string cssUrl(string url) { 8216 return "url(\"" ~ url ~ "\")"; 8217 } 8218 8219 /// This probably isn't useful, unless you're writing a browser or something like that. 8220 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8221 /// as text. 8222 /// 8223 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8224 /// that you can apply to your documents to build the complete computedStyle object. 8225 class StyleSheet { 8226 ///. 8227 CssStyle[] rules; 8228 8229 ///. 8230 this(string source) { 8231 // FIXME: handle @ rules and probably could improve lexer 8232 // add nesting? 8233 int state; 8234 string currentRule; 8235 string currentValue; 8236 8237 string* currentThing = ¤tRule; 8238 foreach(c; source) { 8239 handle: switch(state) { 8240 default: assert(0); 8241 case 0: // starting - we assume we're reading a rule 8242 switch(c) { 8243 case '@': 8244 state = 4; 8245 break; 8246 case '/': 8247 state = 1; 8248 break; 8249 case '{': 8250 currentThing = ¤tValue; 8251 break; 8252 case '}': 8253 if(currentThing is ¤tValue) { 8254 rules ~= new CssStyle(currentRule, currentValue); 8255 8256 currentRule = ""; 8257 currentValue = ""; 8258 8259 currentThing = ¤tRule; 8260 } else { 8261 // idk what is going on here. 8262 // check sveit.com to reproduce 8263 currentRule = ""; 8264 currentValue = ""; 8265 } 8266 break; 8267 default: 8268 (*currentThing) ~= c; 8269 } 8270 break; 8271 case 1: // expecting * 8272 if(c == '*') 8273 state = 2; 8274 else { 8275 state = 0; 8276 (*currentThing) ~= "/" ~ c; 8277 } 8278 break; 8279 case 2: // inside comment 8280 if(c == '*') 8281 state = 3; 8282 break; 8283 case 3: // expecting / to end comment 8284 if(c == '/') 8285 state = 0; 8286 else 8287 state = 2; // it's just a comment so no need to append 8288 break; 8289 case 4: 8290 if(c == '{') 8291 state = 5; 8292 if(c == ';') 8293 state = 0; // just skipping import 8294 break; 8295 case 5: 8296 if(c == '}') 8297 state = 0; // skipping font face probably 8298 } 8299 } 8300 } 8301 8302 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8303 void apply(Document document) { 8304 foreach(rule; rules) { 8305 if(rule.originatingRule.length == 0) 8306 continue; // this shouldn't happen here in a stylesheet 8307 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8308 // note: this should be a different object than the inline style 8309 // since givenExplicitly is likely destroyed here 8310 auto current = element.computedStyle; 8311 8312 foreach(item; rule.properties) 8313 current.setValue(item.name, item.value, item.specificity); 8314 } 8315 } 8316 } 8317 } 8318 8319 8320 /// This is kinda private; just a little utility container for use by the ElementStream class. 8321 final class Stack(T) { 8322 this() { 8323 internalLength = 0; 8324 arr = initialBuffer[]; 8325 } 8326 8327 ///. 8328 void push(T t) { 8329 if(internalLength >= arr.length) { 8330 auto oldarr = arr; 8331 if(arr.length < 4096) 8332 arr = new T[arr.length * 2]; 8333 else 8334 arr = new T[arr.length + 4096]; 8335 arr[0 .. oldarr.length] = oldarr[]; 8336 } 8337 8338 arr[internalLength] = t; 8339 internalLength++; 8340 } 8341 8342 ///. 8343 T pop() { 8344 assert(internalLength); 8345 internalLength--; 8346 return arr[internalLength]; 8347 } 8348 8349 ///. 8350 T peek() { 8351 assert(internalLength); 8352 return arr[internalLength - 1]; 8353 } 8354 8355 ///. 8356 @property bool empty() { 8357 return internalLength ? false : true; 8358 } 8359 8360 ///. 8361 private T[] arr; 8362 private size_t internalLength; 8363 private T[64] initialBuffer; 8364 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8365 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8366 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8367 } 8368 8369 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8370 final class ElementStream { 8371 8372 ///. 8373 @property Element front() { 8374 return current.element; 8375 } 8376 8377 /// Use Element.tree instead. 8378 this(Element start) { 8379 current.element = start; 8380 current.childPosition = -1; 8381 isEmpty = false; 8382 stack = new Stack!(Current); 8383 } 8384 8385 /* 8386 Handle it 8387 handle its children 8388 8389 */ 8390 8391 ///. 8392 void popFront() { 8393 more: 8394 if(isEmpty) return; 8395 8396 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8397 8398 current.childPosition++; 8399 if(current.childPosition >= current.element.children.length) { 8400 if(stack.empty()) 8401 isEmpty = true; 8402 else { 8403 current = stack.pop(); 8404 goto more; 8405 } 8406 } else { 8407 stack.push(current); 8408 current.element = current.element.children[current.childPosition]; 8409 current.childPosition = -1; 8410 } 8411 } 8412 8413 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8414 void currentKilled() { 8415 if(stack.empty) // should never happen 8416 isEmpty = true; 8417 else { 8418 current = stack.pop(); 8419 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8420 } 8421 } 8422 8423 ///. 8424 @property bool empty() { 8425 return isEmpty; 8426 } 8427 8428 private: 8429 8430 struct Current { 8431 Element element; 8432 int childPosition; 8433 } 8434 8435 Current current; 8436 8437 Stack!(Current) stack; 8438 8439 bool isEmpty; 8440 } 8441 8442 8443 8444 // unbelievable. 8445 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8446 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8447 static import std.algorithm; 8448 auto found = std.algorithm.find(haystack, needle); 8449 if(found.length == 0) 8450 return -1; 8451 return haystack.length - found.length; 8452 } 8453 8454 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8455 assert(position < arr.length); 8456 T[] ret; 8457 ret.length = arr.length + what.length; 8458 int a = 0; 8459 foreach(i; arr[0..position+1]) 8460 ret[a++] = i; 8461 8462 foreach(i; what) 8463 ret[a++] = i; 8464 8465 foreach(i; arr[position+1..$]) 8466 ret[a++] = i; 8467 8468 return ret; 8469 } 8470 8471 package bool isInArray(T)(T item, T[] arr) { 8472 foreach(i; arr) 8473 if(item == i) 8474 return true; 8475 return false; 8476 } 8477 8478 private string[string] aadup(in string[string] arr) { 8479 string[string] ret; 8480 foreach(k, v; arr) 8481 ret[k] = v; 8482 return ret; 8483 } 8484 8485 // dom event support, if you want to use it 8486 8487 /// used for DOM events 8488 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8489 8490 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8491 class Event { 8492 this(string eventName, Element target) { 8493 this.eventName = eventName; 8494 this.srcElement = target; 8495 } 8496 8497 /// Prevents the default event handler (if there is one) from being called 8498 void preventDefault() { 8499 defaultPrevented = true; 8500 } 8501 8502 /// Stops the event propagation immediately. 8503 void stopPropagation() { 8504 propagationStopped = true; 8505 } 8506 8507 bool defaultPrevented; 8508 bool propagationStopped; 8509 string eventName; 8510 8511 Element srcElement; 8512 alias srcElement target; 8513 8514 Element relatedTarget; 8515 8516 int clientX; 8517 int clientY; 8518 8519 int button; 8520 8521 bool isBubbling; 8522 8523 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8524 void send() { 8525 if(srcElement is null) 8526 return; 8527 8528 auto e = srcElement; 8529 8530 if(eventName in e.bubblingEventHandlers) 8531 foreach(handler; e.bubblingEventHandlers[eventName]) 8532 handler(e, this); 8533 8534 if(!defaultPrevented) 8535 if(eventName in e.defaultEventHandlers) 8536 e.defaultEventHandlers[eventName](e, this); 8537 } 8538 8539 /// this dispatches the element using the capture -> target -> bubble process 8540 void dispatch() { 8541 if(srcElement is null) 8542 return; 8543 8544 // first capture, then bubble 8545 8546 Element[] chain; 8547 Element curr = srcElement; 8548 while(curr) { 8549 auto l = curr; 8550 chain ~= l; 8551 curr = curr.parentNode; 8552 8553 } 8554 8555 isBubbling = false; 8556 8557 foreach(e; chain.retro()) { 8558 if(eventName in e.capturingEventHandlers) 8559 foreach(handler; e.capturingEventHandlers[eventName]) 8560 handler(e, this); 8561 8562 // the default on capture should really be to always do nothing 8563 8564 //if(!defaultPrevented) 8565 // if(eventName in e.defaultEventHandlers) 8566 // e.defaultEventHandlers[eventName](e.element, this); 8567 8568 if(propagationStopped) 8569 break; 8570 } 8571 8572 isBubbling = true; 8573 if(!propagationStopped) 8574 foreach(e; chain) { 8575 if(eventName in e.bubblingEventHandlers) 8576 foreach(handler; e.bubblingEventHandlers[eventName]) 8577 handler(e, this); 8578 8579 if(propagationStopped) 8580 break; 8581 } 8582 8583 if(!defaultPrevented) 8584 foreach(e; chain) { 8585 if(eventName in e.defaultEventHandlers) 8586 e.defaultEventHandlers[eventName](e, this); 8587 } 8588 } 8589 } 8590 8591 struct FormFieldOptions { 8592 // usable for any 8593 8594 /// this is a regex pattern used to validate the field 8595 string pattern; 8596 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8597 bool isRequired; 8598 /// this is displayed as an example to the user 8599 string placeholder; 8600 8601 // usable for numeric ones 8602 8603 8604 // convenience methods to quickly get some options 8605 @property static FormFieldOptions none() { 8606 FormFieldOptions f; 8607 return f; 8608 } 8609 8610 static FormFieldOptions required() { 8611 FormFieldOptions f; 8612 f.isRequired = true; 8613 return f; 8614 } 8615 8616 static FormFieldOptions regex(string pattern, bool required = false) { 8617 FormFieldOptions f; 8618 f.pattern = pattern; 8619 f.isRequired = required; 8620 return f; 8621 } 8622 8623 static FormFieldOptions fromElement(Element e) { 8624 FormFieldOptions f; 8625 if(e.hasAttribute("required")) 8626 f.isRequired = true; 8627 if(e.hasAttribute("pattern")) 8628 f.pattern = e.pattern; 8629 if(e.hasAttribute("placeholder")) 8630 f.placeholder = e.placeholder; 8631 return f; 8632 } 8633 8634 Element applyToElement(Element e) { 8635 if(this.isRequired) 8636 e.required = "required"; 8637 if(this.pattern.length) 8638 e.pattern = this.pattern; 8639 if(this.placeholder.length) 8640 e.placeholder = this.placeholder; 8641 return e; 8642 } 8643 } 8644 8645 // this needs to look just like a string, but can expand as needed 8646 version(no_dom_stream) 8647 alias string Utf8Stream; 8648 else 8649 class Utf8Stream { 8650 protected: 8651 // these two should be overridden in subclasses to actually do the stream magic 8652 string getMore() { 8653 if(getMoreHelper !is null) 8654 return getMoreHelper(); 8655 return null; 8656 } 8657 8658 bool hasMore() { 8659 if(hasMoreHelper !is null) 8660 return hasMoreHelper(); 8661 return false; 8662 } 8663 // the rest should be ok 8664 8665 public: 8666 this(string d) { 8667 this.data = d; 8668 } 8669 8670 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8671 this.getMoreHelper = getMoreHelper; 8672 this.hasMoreHelper = hasMoreHelper; 8673 8674 if(hasMore()) 8675 this.data ~= getMore(); 8676 8677 stdout.flush(); 8678 } 8679 8680 @property final size_t length() { 8681 // the parser checks length primarily directly before accessing the next character 8682 // so this is the place we'll hook to append more if possible and needed. 8683 if(lastIdx + 1 >= data.length && hasMore()) { 8684 data ~= getMore(); 8685 } 8686 return data.length; 8687 } 8688 8689 final char opIndex(size_t idx) { 8690 if(idx > lastIdx) 8691 lastIdx = idx; 8692 return data[idx]; 8693 } 8694 8695 final string opSlice(size_t start, size_t end) { 8696 if(end > lastIdx) 8697 lastIdx = end; 8698 return data[start .. end]; 8699 } 8700 8701 final size_t opDollar() { 8702 return length(); 8703 } 8704 8705 final Utf8Stream opBinary(string op : "~")(string s) { 8706 this.data ~= s; 8707 return this; 8708 } 8709 8710 final Utf8Stream opOpAssign(string op : "~")(string s) { 8711 this.data ~= s; 8712 return this; 8713 } 8714 8715 final Utf8Stream opAssign(string rhs) { 8716 this.data = rhs; 8717 return this; 8718 } 8719 private: 8720 string data; 8721 8722 size_t lastIdx; 8723 8724 bool delegate() hasMoreHelper; 8725 string delegate() getMoreHelper; 8726 8727 8728 /+ 8729 // used to maybe clear some old stuff 8730 // you might have to remove elements parsed with it too since they can hold slices into the 8731 // old stuff, preventing gc 8732 void dropFront(int bytes) { 8733 posAdjustment += bytes; 8734 data = data[bytes .. $]; 8735 } 8736 8737 int posAdjustment; 8738 +/ 8739 } 8740 8741 void fillForm(T)(Form form, T obj, string name) { 8742 import arsd.database; 8743 fillData((k, v) => form.setValue(k, v), obj, name); 8744 } 8745 8746 8747 /+ 8748 /+ 8749 Syntax: 8750 8751 Tag: tagname#id.class 8752 Tree: Tag(Children, comma, separated...) 8753 Children: Tee or Variable 8754 Variable: $varname with optional |funcname following. 8755 8756 If a variable has a tree after it, it breaks the variable down: 8757 * if array, foreach it does the tree 8758 * if struct, it breaks down the member variables 8759 8760 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8761 +/ 8762 struct Stringplate { 8763 /++ 8764 8765 +/ 8766 this(string s) { 8767 8768 } 8769 8770 /++ 8771 8772 +/ 8773 Element expand(T...)(T vars) { 8774 return null; 8775 } 8776 } 8777 /// 8778 unittest { 8779 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8780 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8781 } 8782 +/ 8783 8784 bool allAreInlineHtml(const(Element)[] children) { 8785 foreach(child; children) { 8786 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8787 // cool 8788 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 8789 // cool 8790 } else { 8791 // prolly block 8792 return false; 8793 } 8794 } 8795 return true; 8796 } 8797 8798 private bool isSimpleWhite(dchar c) { 8799 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8800 } 8801 8802 unittest { 8803 // Test for issue #120 8804 string s = `<html> 8805 <body> 8806 <P>AN 8807 <P>bubbles</P> 8808 <P>giggles</P> 8809 </body> 8810 </html>`; 8811 auto doc = new Document(); 8812 doc.parseUtf8(s, false, false); 8813 auto s2 = doc.toString(); 8814 assert( 8815 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8816 "paragraph order incorrect:\n" ~ s2); 8817 } 8818 8819 unittest { 8820 // test for suncarpet email dec 24 2019 8821 // arbitrary id asduiwh 8822 auto document = new Document("<html> 8823 <head> 8824 <meta charset=\"utf-8\"></meta> 8825 <title>Element.querySelector Test</title> 8826 </head> 8827 <body> 8828 <div id=\"foo\"> 8829 <div>Foo</div> 8830 <div>Bar</div> 8831 </div> 8832 </body> 8833 </html>"); 8834 8835 auto doc = document; 8836 8837 assert(doc.querySelectorAll("div div").length == 2); 8838 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8839 assert(doc.querySelectorAll("> html").length == 0); 8840 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8841 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8842 8843 8844 assert(doc.root.matches("html")); 8845 assert(!doc.root.matches("nothtml")); 8846 assert(doc.querySelector("#foo > div").matches("div")); 8847 assert(doc.querySelector("body > #foo").matches("#foo")); 8848 8849 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8850 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8851 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8852 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8853 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8854 8855 // also confirming the querySelector works via the mdn definition 8856 auto foo = doc.requireSelector("#foo"); 8857 assert(foo.querySelector("#foo > div") !is null); 8858 assert(foo.querySelector("body #foo > div") !is null); 8859 8860 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8861 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8862 //assert(foo.querySelectorAll("#foo > div").length == 2); 8863 } 8864 8865 unittest { 8866 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8867 auto document = new Document(`<article> 8868 <div id="div-01">Here is div-01 8869 <div id="div-02">Here is div-02 8870 <div id="div-03">Here is div-03</div> 8871 </div> 8872 </div> 8873 </article>`, true, true); 8874 8875 auto el = document.getElementById("div-03"); 8876 assert(el.closest("#div-02").id == "div-02"); 8877 assert(el.closest("div div").id == "div-03"); 8878 assert(el.closest("article > div").id == "div-01"); 8879 assert(el.closest(":not(div)").tagName == "article"); 8880 8881 assert(el.closest("p") is null); 8882 assert(el.closest("p, div") is el); 8883 } 8884 8885 unittest { 8886 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8887 auto document = new Document(`<test> 8888 <div class="foo"><p>cool</p><span>bar</span></div> 8889 <main><p>two</p></main> 8890 </test>`); 8891 8892 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8893 assert(document.querySelector("div:where(.foo)") !is null); 8894 } 8895 8896 unittest { 8897 immutable string html = q{ 8898 <root> 8899 <div class="roundedbox"> 8900 <table> 8901 <caption class="boxheader">Recent Reviews</caption> 8902 <tr> 8903 <th>Game</th> 8904 <th>User</th> 8905 <th>Rating</th> 8906 <th>Created</th> 8907 </tr> 8908 8909 <tr> 8910 <td>June 13, 2020 15:10</td> 8911 <td><a href="/reviews/8833">[Show]</a></td> 8912 </tr> 8913 8914 <tr> 8915 <td>June 13, 2020 15:02</td> 8916 <td><a href="/reviews/8832">[Show]</a></td> 8917 </tr> 8918 8919 <tr> 8920 <td>June 13, 2020 14:41</td> 8921 <td><a href="/reviews/8831">[Show]</a></td> 8922 </tr> 8923 </table> 8924 </div> 8925 </root> 8926 }; 8927 8928 auto doc = new Document(cast(string)html); 8929 // this should select the second table row, but... 8930 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8931 assert(rd !is null); 8932 assert(rd.href == "/reviews/8832"); 8933 8934 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8935 assert(rd !is null); 8936 assert(rd.href == "/reviews/8832"); 8937 } 8938 8939 unittest { 8940 try { 8941 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 8942 assert(0); 8943 } catch(Exception e) { 8944 // good; it should throw an exception, not an error. 8945 } 8946 } 8947 8948 /* 8949 Copyright: Adam D. Ruppe, 2010 - 2021 8950 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8951 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8952 8953 Copyright Adam D. Ruppe 2010-2021. 8954 Distributed under the Boost Software License, Version 1.0. 8955 (See accompanying file LICENSE_1_0.txt or copy at 8956 http://www.boost.org/LICENSE_1_0.txt) 8957 */ 8958 8959