1 // FIXME: xml namespace support??? 2 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 3 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 4 5 // FIXME: the scriptable list is quite arbitrary 6 7 8 // xml entity references?! 9 10 /++ 11 This is an html DOM implementation, started with cloning 12 what the browser offers in Javascript, but going well beyond 13 it in convenience. 14 15 If you can do it in Javascript, you can probably do it with 16 this module, and much more. 17 18 --- 19 import arsd.dom; 20 21 void main() { 22 auto document = new Document("<html><p>paragraph</p></html>"); 23 writeln(document.querySelector("p")); 24 document.root.innerHTML = "<p>hey</p>"; 25 writeln(document); 26 } 27 --- 28 29 BTW: this file optionally depends on `arsd.characterencodings`, to 30 help it correctly read files from the internet. You should be able to 31 get characterencodings.d from the same place you got this file. 32 33 If you want it to stand alone, just always use the `Document.parseUtf8` 34 function or the constructor that takes a string. 35 36 Symbol_groups: 37 38 core_functionality = 39 40 These members provide core functionality. The members on these classes 41 will provide most your direct interaction. 42 43 bonus_functionality = 44 45 These provide additional functionality for special use cases. 46 47 implementations = 48 49 These provide implementations of other functionality. 50 +/ 51 module arsd.dom; 52 53 // FIXME: support the css standard namespace thing in the selectors too 54 55 version(with_arsd_jsvar) 56 import arsd.jsvar; 57 else { 58 enum scriptable = "arsd_jsvar_compatible"; 59 } 60 61 // this is only meant to be used at compile time, as a filter for opDispatch 62 // lists the attributes we want to allow without the use of .attr 63 bool isConvenientAttribute(string name) { 64 static immutable list = [ 65 "name", "id", "href", "value", 66 "checked", "selected", "type", 67 "src", "content", "pattern", 68 "placeholder", "required", "alt", 69 "rel", 70 "method", "action", "enctype" 71 ]; 72 foreach(l; list) 73 if(name == l) return true; 74 return false; 75 } 76 77 78 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 79 // FIXME: failing to close a paragraph sometimes messes things up too 80 81 // FIXME: it would be kinda cool to have some support for internal DTDs 82 // and maybe XPath as well, to some extent 83 /* 84 we could do 85 meh this sux 86 87 auto xpath = XPath(element); 88 89 // get the first p 90 xpath.p[0].a["href"] 91 */ 92 93 94 /++ 95 The main document interface, including a html or xml parser. 96 97 There's three main ways to create a Document: 98 99 If you want to parse something and inspect the tags, you can use the [this|constructor]: 100 --- 101 // create and parse some HTML in one call 102 auto document = new Document("<html></html>"); 103 104 // or some XML 105 auto document = new Document("<xml></xml>", true, true); // strict mode enabled 106 107 // or better yet: 108 auto document = new XmlDocument("<xml></xml>"); // specialized subclass 109 --- 110 111 If you want to download something and parse it in one call, the [fromUrl] static function can help: 112 --- 113 auto document = Document.fromUrl("http://dlang.org/"); 114 --- 115 (note that this requires my [arsd.characterencodings] and [arsd.http2] libraries) 116 117 And, if you need to inspect things like `<%= foo %>` tags and comments, you can add them to the dom like this, with the [enableAddingSpecialTagsToDom] 118 and [parseUtf8] or [parseGarbage] functions: 119 --- 120 auto document = new Document(); 121 document.enableAddingSpecialTagsToDom(); 122 document.parseUtf8("<example></example>", true, true); // changes the trues to false to switch from xml to html mode 123 --- 124 125 You can also modify things like [selfClosedElements] and [rawSourceElements] before calling the `parse` family of functions to do further advanced tasks. 126 127 However you parse it, it will put a few things into special variables. 128 129 [root] contains the root document. 130 [prolog] contains the instructions before the root (like `<!DOCTYPE html>`). To keep the original things, you will need to [enableAddingSpecialTagsToDom] first, otherwise the library will return generic strings in there. [piecesBeforeRoot] will have other parsed instructions, if [enableAddingSpecialTagsToDom] is called. 131 [piecesAfterRoot] will contain any xml-looking data after the root tag is closed. 132 133 Most often though, you will not need to look at any of that data, since `Document` itself has methods like [querySelector], [appendChild], and more which will forward to the root [Element] for you. 134 +/ 135 /// Group: core_functionality 136 class Document : FileResource, DomParent { 137 inout(Document) asDocument() inout { return this; } 138 inout(Element) asElement() inout { return null; } 139 140 void processNodeWhileParsing(Element parent, Element child) { 141 parent.appendChild(child); 142 } 143 144 /++ 145 Convenience method for web scraping. Requires [arsd.http2] to be 146 included in the build as well as [arsd.characterencodings]. 147 148 This will download the file from the given url and create a document 149 off it, using a strict constructor or a [parseGarbage], depending on 150 the value of `strictMode`. 151 +/ 152 static Document fromUrl()(string url, bool strictMode = false) { 153 import arsd.http2; 154 auto client = new HttpClient(); 155 156 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 157 auto res = req.waitForCompletion(); 158 159 auto document = new Document(); 160 if(strictMode) { 161 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 162 } else { 163 document.parseGarbage(cast(string) res.content); 164 } 165 166 return document; 167 } 168 169 /++ 170 Creates a document with the given source data. If you want HTML behavior, use `caseSensitive` and `struct` set to `false`. For XML mode, set them to `true`. 171 172 Please note that anything after the root element will be found in [piecesAfterRoot]. Comments, processing instructions, and other special tags will be stripped out b default. You can customize this by using the zero-argument constructor and setting callbacks on the [parseSawComment], [parseSawBangInstruction], [parseSawAspCode], [parseSawPhpCode], and [parseSawQuestionInstruction] members, then calling one of the [parseUtf8], [parseGarbage], or [parse] functions. Calling the convenience method, [enableAddingSpecialTagsToDom], will enable all those things at once. 173 174 See_Also: 175 [parseGarbage] 176 [parseUtf8] 177 [parseUrl] 178 +/ 179 this(string data, bool caseSensitive = false, bool strict = false) { 180 parseUtf8(data, caseSensitive, strict); 181 } 182 183 /** 184 Creates an empty document. It has *nothing* in it at all, ready. 185 */ 186 this() { 187 188 } 189 190 /++ 191 This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 192 It returns a struct that forwards calls to all elements it holds, and returns itself so you 193 can chain it. 194 195 Example: document["p"].innerText("hello").addClass("modified"); 196 197 Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 198 199 Note: always use function calls (not property syntax) and don't use toString in there for best results. 200 201 You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 202 you could put in some kind of custom filter function tho. 203 +/ 204 ElementCollection opIndex(string selector) { 205 auto e = ElementCollection(this.root); 206 return e[selector]; 207 } 208 209 string _contentType = "text/html; charset=utf-8"; 210 211 /// If you're using this for some other kind of XML, you can 212 /// set the content type here. 213 /// 214 /// Note: this has no impact on the function of this class. 215 /// It is only used if the document is sent via a protocol like HTTP. 216 /// 217 /// This may be called by parse() if it recognizes the data. Otherwise, 218 /// if you don't set it, it assumes text/html; charset=utf-8. 219 @property string contentType(string mimeType) { 220 _contentType = mimeType; 221 return _contentType; 222 } 223 224 /// implementing the FileResource interface, useful for sending via 225 /// http automatically. 226 @property string filename() const { return null; } 227 228 /// implementing the FileResource interface, useful for sending via 229 /// http automatically. 230 override @property string contentType() const { 231 return _contentType; 232 } 233 234 /// implementing the FileResource interface; it calls toString. 235 override immutable(ubyte)[] getData() const { 236 return cast(immutable(ubyte)[]) this.toString(); 237 } 238 239 240 /* 241 /// Concatenates any consecutive text nodes 242 void normalize() { 243 244 } 245 */ 246 247 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 248 /// Call this before calling parse(). 249 250 /++ 251 Adds objects to the dom representing things normally stripped out during the default parse, like comments, `<!instructions>`, `<% code%>`, and `<? code?>` all at once. 252 253 Note this will also preserve the prolog and doctype from the original file, if there was one. 254 255 See_Also: 256 [parseSawComment] 257 [parseSawAspCode] 258 [parseSawPhpCode] 259 [parseSawQuestionInstruction] 260 [parseSawBangInstruction] 261 +/ 262 void enableAddingSpecialTagsToDom() { 263 parseSawComment = (string) => true; 264 parseSawAspCode = (string) => true; 265 parseSawPhpCode = (string) => true; 266 parseSawQuestionInstruction = (string) => true; 267 parseSawBangInstruction = (string) => true; 268 } 269 270 /// If the parser sees a html comment, it will call this callback 271 /// <!-- comment --> will call parseSawComment(" comment ") 272 /// Return true if you want the node appended to the document. It will be in a [HtmlComment] object. 273 bool delegate(string) parseSawComment; 274 275 /// If the parser sees <% asp code... %>, it will call this callback. 276 /// It will be passed "% asp code... %" or "%= asp code .. %" 277 /// Return true if you want the node appended to the document. It will be in an [AspCode] object. 278 bool delegate(string) parseSawAspCode; 279 280 /// If the parser sees <?php php code... ?>, it will call this callback. 281 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 282 /// Note: dom.d cannot identify the other php <? code ?> short format. 283 /// Return true if you want the node appended to the document. It will be in a [PhpCode] object. 284 bool delegate(string) parseSawPhpCode; 285 286 /// if it sees a <?xxx> that is not php or asp 287 /// it calls this function with the contents. 288 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 289 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 290 /// Return true if you want the node appended to the document. It will be in a [QuestionInstruction] object. 291 bool delegate(string) parseSawQuestionInstruction; 292 293 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 294 /// it calls this function with the contents. 295 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 296 /// Return true if you want the node appended to the document. It will be in a [BangInstruction] object. 297 bool delegate(string) parseSawBangInstruction; 298 299 /// Given the kind of garbage you find on the Internet, try to make sense of it. 300 /// Equivalent to document.parse(data, false, false, null); 301 /// (Case-insensitive, non-strict, determine character encoding from the data.) 302 303 /// NOTE: this makes no attempt at added security, but it will try to recover from anything instead of throwing. 304 /// 305 /// It is a template so it lazily imports characterencodings. 306 void parseGarbage()(string data) { 307 parse(data, false, false, null); 308 } 309 310 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 311 /// Will throw exceptions on things like unclosed tags. 312 void parseStrict(string data, bool pureXmlMode = false) { 313 parseStream(toUtf8Stream(data), true, true, pureXmlMode); 314 } 315 316 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 317 /// tag soup, but does NOT try to correct bad character encodings. 318 /// 319 /// They will still throw an exception. 320 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 321 parseStream(toUtf8Stream(data), caseSensitive, strict); 322 } 323 324 // this is a template so we get lazy import behavior 325 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 326 import arsd.characterencodings; 327 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 328 if(dataEncoding is null) { 329 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 330 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 331 // Now, XML and HTML can both list encoding in the document, but we can't really parse 332 // it here without changing a lot of code until we know the encoding. So I'm going to 333 // do some hackish string checking. 334 if(dataEncoding is null) { 335 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 336 // first, look for an XML prolog 337 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 338 if(idx != -1) { 339 idx += "encoding=\"".length; 340 // we're probably past the prolog if it's this far in; we might be looking at 341 // content. Forget about it. 342 if(idx > 100) 343 idx = -1; 344 } 345 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 346 if(idx == -1) { 347 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 348 if(idx != -1) { 349 idx += "charset=".length; 350 if(dataAsBytes[idx] == '"') 351 idx++; 352 } 353 } 354 355 // found something in either branch... 356 if(idx != -1) { 357 // read till a quote or about 12 chars, whichever comes first... 358 auto end = idx; 359 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 360 end++; 361 362 dataEncoding = cast(string) dataAsBytes[idx .. end]; 363 } 364 // otherwise, we just don't know. 365 } 366 } 367 368 if(dataEncoding is null) { 369 if(strict) 370 throw new MarkupException("I couldn't figure out the encoding of this document."); 371 else 372 // if we really don't know by here, it means we already tried UTF-8, 373 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 374 // tags... let's assume it's Windows-1252, since that's probably the most 375 // common aside from utf that wouldn't be labeled. 376 377 dataEncoding = "Windows 1252"; 378 } 379 380 // and now, go ahead and convert it. 381 382 string data; 383 384 if(!strict) { 385 // if we're in non-strict mode, we need to check 386 // the document for mislabeling too; sometimes 387 // web documents will say they are utf-8, but aren't 388 // actually properly encoded. If it fails to validate, 389 // we'll assume it's actually Windows encoding - the most 390 // likely candidate for mislabeled garbage. 391 dataEncoding = dataEncoding.toLower(); 392 dataEncoding = dataEncoding.replace(" ", ""); 393 dataEncoding = dataEncoding.replace("-", ""); 394 dataEncoding = dataEncoding.replace("_", ""); 395 if(dataEncoding == "utf8") { 396 try { 397 validate(rawdata); 398 } catch(UTFException e) { 399 dataEncoding = "Windows 1252"; 400 } 401 } 402 } 403 404 if(dataEncoding != "UTF-8") { 405 if(strict) 406 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 407 else { 408 try { 409 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 410 } catch(Exception e) { 411 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 412 } 413 } 414 } else 415 data = rawdata; 416 417 return toUtf8Stream(data); 418 } 419 420 private 421 Utf8Stream toUtf8Stream(in string rawdata) { 422 string data = rawdata; 423 static if(is(Utf8Stream == string)) 424 return data; 425 else 426 return new Utf8Stream(data); 427 } 428 429 /++ 430 List of elements that can be assumed to be self-closed 431 in this document. The default for a Document are a hard-coded 432 list of ones appropriate for HTML. For [XmlDocument], it defaults 433 to empty. You can modify this after construction but before parsing. 434 435 History: 436 Added February 8, 2021 (included in dub release 9.2) 437 438 Changed from `string[]` to `immutable(string)[]` on 439 February 4, 2024 (dub v11.5) to plug a hole discovered 440 by the OpenD compiler's diagnostics. 441 +/ 442 immutable(string)[] selfClosedElements = htmlSelfClosedElements; 443 444 /++ 445 List of elements that contain raw CDATA content for this 446 document, e.g. `<script>` and `<style>` for HTML. The parser 447 will read until the closing string and put everything else 448 in a [RawSource] object for future processing, not trying to 449 do any further child nodes or attributes, etc. 450 451 History: 452 Added February 4, 2024 (dub v11.5) 453 454 +/ 455 immutable(string)[] rawSourceElements = htmlRawSourceElements; 456 457 /++ 458 List of elements that are considered inline for pretty printing. 459 The default for a Document are hard-coded to something appropriate 460 for HTML. For [XmlDocument], it defaults to empty. You can modify 461 this after construction but before parsing. 462 463 History: 464 Added June 21, 2021 (included in dub release 10.1) 465 466 Changed from `string[]` to `immutable(string)[]` on 467 February 4, 2024 (dub v11.5) to plug a hole discovered 468 by the OpenD compiler's diagnostics. 469 +/ 470 immutable(string)[] inlineElements = htmlInlineElements; 471 472 /** 473 Take XMLish data and try to make the DOM tree out of it. 474 475 The goal isn't to be perfect, but to just be good enough to 476 approximate Javascript's behavior. 477 478 If strict, it throws on something that doesn't make sense. 479 (Examples: mismatched tags. It doesn't validate!) 480 If not strict, it tries to recover anyway, and only throws 481 when something is REALLY unworkable. 482 483 If strict is false, it uses a magic list of tags that needn't 484 be closed. If you are writing a document specifically for this, 485 try to avoid such - use self closed tags at least. Easier to parse. 486 487 The dataEncoding argument can be used to pass a specific 488 charset encoding for automatic conversion. If null (which is NOT 489 the default!), it tries to determine from the data itself, 490 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 491 492 If this assumption is wrong, it can throw on non-ascii 493 characters! 494 495 496 Note that it previously assumed the data was encoded as UTF-8, which 497 is why the dataEncoding argument defaults to that. 498 499 So it shouldn't break backward compatibility. 500 501 But, if you want the best behavior on wild data - figuring it out from the document 502 instead of assuming - you'll probably want to change that argument to null. 503 504 This is a template so it lazily imports arsd.characterencodings, which is required 505 to fix up data encodings. 506 507 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 508 dependency. If it is data from the Internet though, a random website, the encoding 509 is often a lie. This function, if dataEncoding == null, can correct for that, or 510 you can try parseGarbage. In those cases, arsd.characterencodings is required to 511 compile. 512 */ 513 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 514 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 515 parseStream(data, caseSensitive, strict); 516 } 517 518 // note: this work best in strict mode, unless data is just a simple string wrapper 519 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false, bool pureXmlMode = false) { 520 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 521 // of my big app. 522 523 assert(data !is null); 524 525 // go through character by character. 526 // if you see a <, consider it a tag. 527 // name goes until the first non tagname character 528 // then see if it self closes or has an attribute 529 530 // if not in a tag, anything not a tag is a big text 531 // node child. It ends as soon as it sees a < 532 533 // Whitespace in text or attributes is preserved, but not between attributes 534 535 // & and friends are converted when I know them, left the same otherwise 536 537 538 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 539 //validate(data); // it *must* be UTF-8 for this to work correctly 540 541 sizediff_t pos = 0; 542 543 clear(); 544 545 loose = !caseSensitive; 546 547 bool sawImproperNesting = false; 548 bool paragraphHackfixRequired = false; 549 550 int getLineNumber(sizediff_t p) { 551 int line = 1; 552 foreach(c; data[0..p]) 553 if(c == '\n') 554 line++; 555 return line; 556 } 557 558 void parseError(string message) { 559 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 560 } 561 562 bool eatWhitespace() { 563 bool ateAny = false; 564 while(pos < data.length && data[pos].isSimpleWhite) { 565 pos++; 566 ateAny = true; 567 } 568 return ateAny; 569 } 570 571 string readTagName() { 572 // remember to include : for namespaces 573 // basically just keep going until >, /, or whitespace 574 auto start = pos; 575 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 576 { 577 pos++; 578 if(pos == data.length) { 579 if(strict) 580 throw new Exception("tag name incomplete when file ended"); 581 else 582 break; 583 } 584 } 585 586 if(!caseSensitive) 587 return toLower(data[start..pos]); 588 else 589 return data[start..pos]; 590 } 591 592 string readAttributeName() { 593 // remember to include : for namespaces 594 // basically just keep going until >, /, or whitespace 595 auto start = pos; 596 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 597 { 598 if(data[pos] == '<') { 599 if(strict) 600 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 601 else 602 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 603 } 604 pos++; 605 if(pos == data.length) { 606 if(strict) 607 throw new Exception("unterminated attribute name"); 608 else 609 break; 610 } 611 } 612 613 if(!caseSensitive) 614 return toLower(data[start..pos]); 615 else 616 return data[start..pos]; 617 } 618 619 string readAttributeValue() { 620 if(pos >= data.length) { 621 if(strict) 622 throw new Exception("no attribute value before end of file"); 623 else 624 return null; 625 } 626 switch(data[pos]) { 627 case '\'': 628 case '"': 629 auto started = pos; 630 char end = data[pos]; 631 pos++; 632 auto start = pos; 633 while(pos < data.length && data[pos] != end) 634 pos++; 635 if(strict && pos == data.length) 636 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 637 string v = htmlEntitiesDecode(data[start..pos], strict); 638 pos++; // skip over the end 639 return v; 640 default: 641 if(strict) 642 parseError("Attributes must be quoted"); 643 // read until whitespace or terminator (/> or >) 644 auto start = pos; 645 while( 646 pos < data.length && 647 data[pos] != '>' && 648 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 649 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 650 !data[pos].isSimpleWhite) 651 pos++; 652 653 string v = htmlEntitiesDecode(data[start..pos], strict); 654 // don't skip the end - we'll need it later 655 return v; 656 } 657 } 658 659 TextNode readTextNode() { 660 auto start = pos; 661 while(pos < data.length && data[pos] != '<') { 662 pos++; 663 } 664 665 return TextNode.fromUndecodedString(this, data[start..pos]); 666 } 667 668 // this is obsolete! 669 RawSource readCDataNode() { 670 auto start = pos; 671 while(pos < data.length && data[pos] != '<') { 672 pos++; 673 } 674 675 return new RawSource(this, data[start..pos]); 676 } 677 678 679 struct Ele { 680 int type; // element or closing tag or nothing 681 /* 682 type == 0 means regular node, self-closed (element is valid) 683 type == 1 means closing tag (payload is the tag name, element may be valid) 684 type == 2 means you should ignore it completely 685 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 686 type == 4 means the document was totally empty 687 */ 688 Element element; // for type == 0 or type == 3 689 string payload; // for type == 1 690 } 691 // recursively read a tag 692 Ele readElement(string[] parentChain = null) { 693 // FIXME: this is the slowest function in this module, by far, even in strict mode. 694 // Loose mode should perform decently, but strict mode is the important one. 695 if(!strict && parentChain is null) 696 parentChain = []; 697 698 static string[] recentAutoClosedTags; 699 700 if(pos >= data.length) 701 { 702 if(strict) { 703 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 704 } else { 705 if(parentChain.length) 706 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 707 else 708 return Ele(4); // signal emptiness upstream 709 } 710 } 711 712 if(data[pos] != '<') { 713 return Ele(0, readTextNode(), null); 714 } 715 716 enforce(data[pos] == '<'); 717 pos++; 718 if(pos == data.length) { 719 if(strict) 720 throw new MarkupException("Found trailing < at end of file"); 721 // if not strict, we'll just skip the switch 722 } else 723 switch(data[pos]) { 724 // I don't care about these, so I just want to skip them 725 case '!': // might be a comment, a doctype, or a special instruction 726 pos++; 727 728 // FIXME: we should store these in the tree too 729 // though I like having it stripped out tbh. 730 731 if(pos == data.length) { 732 if(strict) 733 throw new MarkupException("<! opened at end of file"); 734 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 735 // comment 736 pos += 2; 737 738 // FIXME: technically, a comment is anything 739 // between -- and -- inside a <!> block. 740 // so in <!-- test -- lol> , the " lol" is NOT a comment 741 // and should probably be handled differently in here, but for now 742 // I'll just keep running until --> since that's the common way 743 744 auto commentStart = pos; 745 while(pos+3 < data.length && data[pos..pos+3] != "-->") 746 pos++; 747 748 auto end = commentStart; 749 750 if(pos + 3 >= data.length) { 751 if(strict) 752 throw new MarkupException("unclosed comment"); 753 end = data.length; 754 pos = data.length; 755 } else { 756 end = pos; 757 assert(data[pos] == '-'); 758 pos++; 759 assert(data[pos] == '-'); 760 pos++; 761 assert(data[pos] == '>'); 762 pos++; 763 } 764 765 if(parseSawComment !is null) 766 if(parseSawComment(data[commentStart .. end])) { 767 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 768 } 769 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 770 pos += 7; 771 772 auto cdataStart = pos; 773 774 ptrdiff_t end = -1; 775 typeof(end) cdataEnd; 776 777 if(pos < data.length) { 778 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 779 end = data[pos .. $].indexOf("]]>"); 780 } 781 782 if(end == -1) { 783 if(strict) 784 throw new MarkupException("Unclosed CDATA section"); 785 end = pos; 786 cdataEnd = pos; 787 } else { 788 cdataEnd = pos + end; 789 pos = cdataEnd + 3; 790 } 791 792 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 793 } else { 794 auto start = pos; 795 while(pos < data.length && data[pos] != '>') 796 pos++; 797 798 auto bangEnds = pos; 799 if(pos == data.length) { 800 if(strict) 801 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 802 } else pos++; // skipping the > 803 804 if(parseSawBangInstruction !is null) 805 if(parseSawBangInstruction(data[start .. bangEnds])) { 806 // FIXME: these should be able to modify the parser state, 807 // doing things like adding entities, somehow. 808 809 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 810 } 811 } 812 813 /* 814 if(pos < data.length && data[pos] == '>') 815 pos++; // skip the > 816 else 817 assert(!strict); 818 */ 819 break; 820 case '%': 821 case '?': 822 /* 823 Here's what we want to support: 824 825 <% asp code %> 826 <%= asp code %> 827 <?php php code ?> 828 <?= php code ?> 829 830 The contents don't really matter, just if it opens with 831 one of the above for, it ends on the two char terminator. 832 833 <?something> 834 this is NOT php code 835 because I've seen this in the wild: <?EM-dummyText> 836 837 This could be php with shorttags which would be cut off 838 prematurely because if(a >) - that > counts as the close 839 of the tag, but since dom.d can't tell the difference 840 between that and the <?EM> real world example, it will 841 not try to look for the ?> ending. 842 843 The difference between this and the asp/php stuff is that it 844 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 845 on >. 846 */ 847 848 char end = data[pos]; 849 auto started = pos; 850 bool isAsp = end == '%'; 851 int currentIndex = 0; 852 bool isPhp = false; 853 bool isEqualTag = false; 854 int phpCount = 0; 855 856 more: 857 pos++; // skip the start 858 if(pos == data.length) { 859 if(strict) 860 throw new MarkupException("Unclosed <"~end~" by end of file"); 861 } else { 862 currentIndex++; 863 if(currentIndex == 1 && data[pos] == '=') { 864 if(!isAsp) 865 isPhp = true; 866 isEqualTag = true; 867 goto more; 868 } 869 if(currentIndex == 1 && data[pos] == 'p') 870 phpCount++; 871 if(currentIndex == 2 && data[pos] == 'h') 872 phpCount++; 873 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 874 isPhp = true; 875 876 if(data[pos] == '>') { 877 if((isAsp || isPhp) && data[pos - 1] != end) 878 goto more; 879 // otherwise we're done 880 } else 881 goto more; 882 } 883 884 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 885 auto code = data[started .. pos]; 886 887 888 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 889 if(pos < data.length) 890 pos++; // get past the > 891 892 if(isAsp && parseSawAspCode !is null) { 893 if(parseSawAspCode(code)) { 894 return Ele(3, new AspCode(this, code), null); 895 } 896 } else if(isPhp && parseSawPhpCode !is null) { 897 if(parseSawPhpCode(code)) { 898 return Ele(3, new PhpCode(this, code), null); 899 } 900 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 901 if(parseSawQuestionInstruction(code)) { 902 return Ele(3, new QuestionInstruction(this, code), null); 903 } 904 } 905 break; 906 case '/': // closing an element 907 pos++; // skip the start 908 auto p = pos; 909 while(pos < data.length && data[pos] != '>') 910 pos++; 911 //writefln("</%s>", data[p..pos]); 912 if(pos == data.length && data[pos-1] != '>') { 913 if(strict) 914 throw new MarkupException("File ended before closing tag had a required >"); 915 else 916 data ~= ">"; // just hack it in 917 } 918 pos++; // skip the '>' 919 920 string tname = data[p..pos-1]; 921 if(!strict) 922 tname = tname.strip; 923 if(!caseSensitive) 924 tname = tname.toLower(); 925 926 return Ele(1, null, tname); // closing tag reports itself here 927 case ' ': // assume it isn't a real element... 928 if(strict) { 929 parseError("bad markup - improperly placed <"); 930 assert(0); // parseError always throws 931 } else 932 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 933 default: 934 935 if(!strict) { 936 // what about something that kinda looks like a tag, but isn't? 937 auto nextTag = data[pos .. $].indexOf("<"); 938 auto closeTag = data[pos .. $].indexOf(">"); 939 if(closeTag != -1 && nextTag != -1) 940 if(nextTag < closeTag) { 941 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 942 943 auto equal = data[pos .. $].indexOf("=\""); 944 if(equal != -1 && equal < closeTag) { 945 // this MIGHT be ok, soldier on 946 } else { 947 // definitely no good, this must be a (horribly distorted) text node 948 pos++; // skip the < we're on - don't want text node to end prematurely 949 auto node = readTextNode(); 950 node.contents = "<" ~ node.contents; // put this back 951 return Ele(0, node, null); 952 } 953 } 954 } 955 956 string tagName = readTagName(); 957 string[string] attributes; 958 959 Ele addTag(bool selfClosed) { 960 if(selfClosed) 961 pos++; 962 else { 963 if(!strict) 964 if(tagName.isInArray(selfClosedElements)) 965 // these are de-facto self closed 966 selfClosed = true; 967 } 968 969 import std.algorithm.comparison; 970 971 if(strict) { 972 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 973 } else { 974 // if we got here, it's probably because a slash was in an 975 // unquoted attribute - don't trust the selfClosed value 976 if(!selfClosed) 977 selfClosed = tagName.isInArray(selfClosedElements); 978 979 while(pos < data.length && data[pos] != '>') 980 pos++; 981 982 if(pos >= data.length) { 983 // the tag never closed 984 assert(data.length != 0); 985 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 986 } 987 } 988 989 auto whereThisTagStarted = pos; // for better error messages 990 991 pos++; 992 993 auto e = createElement(tagName); 994 e.attributes = attributes; 995 version(dom_node_indexes) { 996 if(e.dataset.nodeIndex.length == 0) 997 e.dataset.nodeIndex = to!string(&(e.attributes)); 998 } 999 e.selfClosed = selfClosed; 1000 e.parseAttributes(); 1001 1002 1003 // HACK to handle script and style as a raw data section as it is in HTML browsers 1004 if(!pureXmlMode && tagName.isInArray(rawSourceElements)) { 1005 if(!selfClosed) { 1006 string closer = "</" ~ tagName ~ ">"; 1007 ptrdiff_t ending; 1008 if(pos >= data.length) 1009 ending = -1; 1010 else 1011 ending = indexOf(data[pos..$], closer); 1012 1013 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 1014 /* 1015 if(loose && ending == -1 && pos < data.length) 1016 ending = indexOf(data[pos..$], closer.toUpper()); 1017 */ 1018 if(ending == -1) { 1019 if(strict) 1020 throw new Exception("tag " ~ tagName ~ " never closed"); 1021 else { 1022 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 1023 if(pos < data.length) { 1024 e = new TextNode(this, data[pos .. $]); 1025 pos = data.length; 1026 } 1027 } 1028 } else { 1029 ending += pos; 1030 e.innerRawSource = data[pos..ending]; 1031 pos = ending + closer.length; 1032 } 1033 } 1034 return Ele(0, e, null); 1035 } 1036 1037 bool closed = selfClosed; 1038 1039 void considerHtmlParagraphHack(Element n) { 1040 assert(!strict); 1041 if(e.tagName == "p" && e.tagName == n.tagName) { 1042 // html lets you write <p> para 1 <p> para 1 1043 // but in the dom tree, they should be siblings, not children. 1044 paragraphHackfixRequired = true; 1045 } 1046 } 1047 1048 //writef("<%s>", tagName); 1049 while(!closed) { 1050 Ele n; 1051 if(strict) 1052 n = readElement(); 1053 else 1054 n = readElement(parentChain ~ tagName); 1055 1056 if(n.type == 4) return n; // the document is empty 1057 1058 if(n.type == 3 && n.element !is null) { 1059 // special node, append if possible 1060 if(e !is null) 1061 processNodeWhileParsing(e, n.element); 1062 else 1063 piecesBeforeRoot ~= n.element; 1064 } else if(n.type == 0) { 1065 if(!strict) 1066 considerHtmlParagraphHack(n.element); 1067 processNodeWhileParsing(e, n.element); 1068 } else if(n.type == 1) { 1069 bool found = false; 1070 if(n.payload != tagName) { 1071 if(strict) 1072 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 1073 else { 1074 sawImproperNesting = true; 1075 // this is so we don't drop several levels of awful markup 1076 if(n.element) { 1077 if(!strict) 1078 considerHtmlParagraphHack(n.element); 1079 processNodeWhileParsing(e, n.element); 1080 n.element = null; 1081 } 1082 1083 // is the element open somewhere up the chain? 1084 foreach(i, parent; parentChain) 1085 if(parent == n.payload) { 1086 recentAutoClosedTags ~= tagName; 1087 // just rotating it so we don't inadvertently break stuff with vile crap 1088 if(recentAutoClosedTags.length > 4) 1089 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 1090 1091 n.element = e; 1092 return n; 1093 } 1094 1095 // if not, this is a text node; we can't fix it up... 1096 1097 // If it's already in the tree somewhere, assume it is closed by algorithm 1098 // and we shouldn't output it - odds are the user just flipped a couple tags 1099 foreach(ele; e.tree) { 1100 if(ele.tagName == n.payload) { 1101 found = true; 1102 break; 1103 } 1104 } 1105 1106 foreach(ele; recentAutoClosedTags) { 1107 if(ele == n.payload) { 1108 found = true; 1109 break; 1110 } 1111 } 1112 1113 if(!found) // if not found in the tree though, it's probably just text 1114 processNodeWhileParsing(e, TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1115 } 1116 } else { 1117 if(n.element) { 1118 if(!strict) 1119 considerHtmlParagraphHack(n.element); 1120 processNodeWhileParsing(e, n.element); 1121 } 1122 } 1123 1124 if(n.payload == tagName) // in strict mode, this is always true 1125 closed = true; 1126 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1127 } 1128 //writef("</%s>\n", tagName); 1129 return Ele(0, e, null); 1130 } 1131 1132 // if a tag was opened but not closed by end of file, we can arrive here 1133 if(!strict && pos >= data.length) 1134 return addTag(false); 1135 //else if(strict) assert(0); // should be caught before 1136 1137 switch(data[pos]) { 1138 default: assert(0); 1139 case '/': // self closing tag 1140 return addTag(true); 1141 case '>': 1142 return addTag(false); 1143 case ' ': 1144 case '\t': 1145 case '\n': 1146 case '\r': 1147 // there might be attributes... 1148 moreAttributes: 1149 eatWhitespace(); 1150 1151 // same deal as above the switch.... 1152 if(!strict && pos >= data.length) 1153 return addTag(false); 1154 1155 if(strict && pos >= data.length) 1156 throw new MarkupException("tag open, didn't find > before end of file"); 1157 1158 switch(data[pos]) { 1159 case '/': // self closing tag 1160 return addTag(true); 1161 case '>': // closed tag; open -- we now read the contents 1162 return addTag(false); 1163 default: // it is an attribute 1164 string attrName = readAttributeName(); 1165 string attrValue = attrName; 1166 1167 bool ateAny = eatWhitespace(); 1168 // the spec allows this too, sigh https://www.w3.org/TR/REC-xml/#NT-Eq 1169 //if(strict && ateAny) 1170 //throw new MarkupException("inappropriate whitespace after attribute name"); 1171 1172 if(pos >= data.length) { 1173 if(strict) 1174 assert(0, "this should have thrown in readAttributeName"); 1175 else { 1176 data ~= ">"; 1177 goto blankValue; 1178 } 1179 } 1180 if(data[pos] == '=') { 1181 pos++; 1182 1183 ateAny = eatWhitespace(); 1184 // the spec actually allows this! 1185 //if(strict && ateAny) 1186 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1187 1188 attrValue = readAttributeValue(); 1189 1190 eatWhitespace(); 1191 } 1192 1193 blankValue: 1194 1195 if(strict && attrName in attributes) 1196 throw new MarkupException("Repeated attribute: " ~ attrName); 1197 1198 if(attrName.strip().length) 1199 attributes[attrName] = attrValue; 1200 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1201 1202 if(!strict && pos < data.length && data[pos] == '<') { 1203 // this is the broken tag that doesn't have a > at the end 1204 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1205 // let's insert one as a hack 1206 goto case '>'; 1207 } 1208 1209 goto moreAttributes; 1210 } 1211 } 1212 } 1213 1214 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1215 //assert(0); 1216 } 1217 1218 eatWhitespace(); 1219 Ele r; 1220 do { 1221 r = readElement(); // there SHOULD only be one element... 1222 1223 if(r.type == 3 && r.element !is null) 1224 piecesBeforeRoot ~= r.element; 1225 1226 if(r.type == 4) 1227 break; // the document is completely empty... 1228 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1229 1230 root = r.element; 1231 if(root !is null) 1232 root.parent_ = this; 1233 1234 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1235 while(r.type != 4) { 1236 r = readElement(); 1237 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1238 if(r.element !is null) 1239 piecesAfterRoot ~= r.element; 1240 } 1241 } 1242 1243 if(root is null) 1244 { 1245 if(strict) 1246 assert(0, "empty document should be impossible in strict mode"); 1247 else 1248 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1249 } 1250 1251 if(paragraphHackfixRequired) { 1252 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1253 1254 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1255 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1256 1257 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1258 // Kind of inefficient because we can't detect when we recurse back out of a node. 1259 Element[Element] insertLocations; 1260 auto iterator = root.tree; 1261 foreach(ele; iterator) { 1262 if(ele.parentNode is null) 1263 continue; 1264 1265 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1266 auto shouldBePreviousSibling = ele.parentNode; 1267 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1268 if (auto p = holder in insertLocations) { 1269 shouldBePreviousSibling = *p; 1270 assert(shouldBePreviousSibling.parentNode is holder); 1271 } 1272 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1273 insertLocations[holder] = ele; 1274 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1275 } 1276 } 1277 } 1278 } 1279 1280 /* end massive parse function */ 1281 1282 /// Gets the <title> element's innerText, if one exists 1283 @property string title() { 1284 bool doesItMatch(Element e) { 1285 return (e.tagName == "title"); 1286 } 1287 1288 auto e = findFirst(&doesItMatch); 1289 if(e) 1290 return e.innerText(); 1291 return ""; 1292 } 1293 1294 /// Sets the title of the page, creating a <title> element if needed. 1295 @property void title(string t) { 1296 bool doesItMatch(Element e) { 1297 return (e.tagName == "title"); 1298 } 1299 1300 auto e = findFirst(&doesItMatch); 1301 1302 if(!e) { 1303 e = createElement("title"); 1304 auto heads = getElementsByTagName("head"); 1305 if(heads.length) 1306 heads[0].appendChild(e); 1307 } 1308 1309 if(e) 1310 e.innerText = t; 1311 } 1312 1313 // FIXME: would it work to alias root this; ???? might be a good idea 1314 /// These functions all forward to the root element. See the documentation in the Element class. 1315 Element getElementById(string id) { 1316 return root.getElementById(id); 1317 } 1318 1319 /// ditto 1320 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1321 if( is(SomeElementType : Element)) 1322 out(ret) { assert(ret !is null); } 1323 do { 1324 return root.requireElementById!(SomeElementType)(id, file, line); 1325 } 1326 1327 /// ditto 1328 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1329 if( is(SomeElementType : Element)) 1330 out(ret) { assert(ret !is null); } 1331 do { 1332 auto e = cast(SomeElementType) querySelector(selector); 1333 if(e is null) 1334 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1335 return e; 1336 } 1337 1338 /// ditto 1339 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1340 if(is(SomeElementType : Element)) 1341 { 1342 auto e = cast(SomeElementType) querySelector(selector); 1343 return MaybeNullElement!SomeElementType(e); 1344 } 1345 1346 /// ditto 1347 @scriptable 1348 Element querySelector(string selector) { 1349 // see comment below on Document.querySelectorAll 1350 auto s = Selector(selector);//, !loose); 1351 foreach(ref comp; s.components) 1352 if(comp.parts.length && comp.parts[0].separation == 0) 1353 comp.parts[0].separation = -1; 1354 foreach(e; s.getMatchingElementsLazy(this.root)) 1355 return e; 1356 return null; 1357 1358 } 1359 1360 /// ditto 1361 @scriptable 1362 Element[] querySelectorAll(string selector) { 1363 // In standards-compliant code, the document is slightly magical 1364 // in that it is a pseudoelement at top level. It should actually 1365 // match the root as one of its children. 1366 // 1367 // In versions of dom.d before Dec 29 2019, this worked because 1368 // querySelectorAll was willing to return itself. With that bug fix 1369 // (search "arbitrary id asduiwh" in this file for associated unittest) 1370 // this would have failed. Hence adding back the root if it matches the 1371 // selector itself. 1372 // 1373 // I'd love to do this better later. 1374 1375 auto s = Selector(selector);//, !loose); 1376 foreach(ref comp; s.components) 1377 if(comp.parts.length && comp.parts[0].separation == 0) 1378 comp.parts[0].separation = -1; 1379 return s.getMatchingElements(this.root, null); 1380 } 1381 1382 /// ditto 1383 deprecated("use querySelectorAll instead") 1384 Element[] getElementsBySelector(string selector) { 1385 return root.getElementsBySelector(selector); 1386 } 1387 1388 /// ditto 1389 @scriptable 1390 Element[] getElementsByTagName(string tag) { 1391 return root.getElementsByTagName(tag); 1392 } 1393 1394 /// ditto 1395 @scriptable 1396 Element[] getElementsByClassName(string tag) { 1397 return root.getElementsByClassName(tag); 1398 } 1399 1400 /** FIXME: btw, this could just be a lazy range...... */ 1401 Element getFirstElementByTagName(string tag) { 1402 if(loose) 1403 tag = tag.toLower(); 1404 bool doesItMatch(Element e) { 1405 return e.tagName == tag; 1406 } 1407 return findFirst(&doesItMatch); 1408 } 1409 1410 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1411 Element mainBody() { 1412 return getFirstElementByTagName("body"); 1413 } 1414 1415 /// this uses a weird thing... it's [name=] if no colon and 1416 /// [property=] if colon 1417 string getMeta(string name) { 1418 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1419 auto e = querySelector("head meta["~thing~"="~name~"]"); 1420 if(e is null) 1421 return null; 1422 return e.content; 1423 } 1424 1425 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1426 void setMeta(string name, string value) { 1427 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1428 auto e = querySelector("head meta["~thing~"="~name~"]"); 1429 if(e is null) { 1430 e = requireSelector("head").addChild("meta"); 1431 e.setAttribute(thing, name); 1432 } 1433 1434 e.content = value; 1435 } 1436 1437 ///. 1438 Form[] forms() { 1439 return cast(Form[]) getElementsByTagName("form"); 1440 } 1441 1442 ///. 1443 Form createForm() 1444 out(ret) { 1445 assert(ret !is null); 1446 } 1447 do { 1448 return cast(Form) createElement("form"); 1449 } 1450 1451 ///. 1452 Element createElement(string name) { 1453 if(loose) 1454 name = name.toLower(); 1455 1456 auto e = Element.make(name, null, null, selfClosedElements); 1457 1458 return e; 1459 1460 // return new Element(this, name, null, selfClosed); 1461 } 1462 1463 ///. 1464 Element createFragment() { 1465 return new DocumentFragment(this); 1466 } 1467 1468 ///. 1469 Element createTextNode(string content) { 1470 return new TextNode(this, content); 1471 } 1472 1473 1474 ///. 1475 Element findFirst(bool delegate(Element) doesItMatch) { 1476 if(root is null) 1477 return null; 1478 Element result; 1479 1480 bool goThroughElement(Element e) { 1481 if(doesItMatch(e)) { 1482 result = e; 1483 return true; 1484 } 1485 1486 foreach(child; e.children) { 1487 if(goThroughElement(child)) 1488 return true; 1489 } 1490 1491 return false; 1492 } 1493 1494 goThroughElement(root); 1495 1496 return result; 1497 } 1498 1499 ///. 1500 void clear() { 1501 root = null; 1502 loose = false; 1503 } 1504 1505 private string _prolog = "<!DOCTYPE html>\n"; 1506 private bool prologWasSet = false; // set to true if the user changed it 1507 1508 /++ 1509 Returns or sets the string before the root element. This is, for example, 1510 `<!DOCTYPE html>\n` or similar. 1511 +/ 1512 @property string prolog() const { 1513 // if the user explicitly changed it, do what they want 1514 // or if we didn't keep/find stuff from the document itself, 1515 // we'll use the builtin one as a default. 1516 if(prologWasSet || piecesBeforeRoot.length == 0) 1517 return _prolog; 1518 1519 string p; 1520 foreach(e; piecesBeforeRoot) 1521 p ~= e.toString() ~ "\n"; 1522 return p; 1523 } 1524 1525 /// ditto 1526 void setProlog(string d) { 1527 _prolog = d; 1528 prologWasSet = true; 1529 } 1530 1531 /++ 1532 Returns the document as string form. Please note that if there is anything in [piecesAfterRoot], 1533 they are discarded. If you want to add them to the file, loop over that and append it yourself 1534 (but remember xml isn't supposed to have anything after the root element). 1535 +/ 1536 override string toString() const { 1537 return prolog ~ root.toString(); 1538 } 1539 1540 /++ 1541 Writes it out with whitespace for easier eyeball debugging 1542 1543 Do NOT use for anything other than eyeball debugging, 1544 because whitespace may be significant content in XML. 1545 +/ 1546 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1547 import std.string; 1548 string s = prolog.strip; 1549 1550 /* 1551 if(insertComments) s ~= "<!--"; 1552 s ~= "\n"; 1553 if(insertComments) s ~= "-->"; 1554 */ 1555 1556 s ~= root.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1557 foreach(a; piecesAfterRoot) 1558 s ~= a.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1559 return s; 1560 } 1561 1562 /// The root element, like `<html>`. Most the methods on Document forward to this object. 1563 Element root; 1564 1565 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1566 Element[] piecesBeforeRoot; 1567 1568 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1569 Element[] piecesAfterRoot; 1570 1571 ///. 1572 bool loose; 1573 1574 1575 1576 // what follows are for mutation events that you can observe 1577 void delegate(DomMutationEvent)[] eventObservers; 1578 1579 void dispatchMutationEvent(DomMutationEvent e) { 1580 foreach(o; eventObservers) 1581 o(e); 1582 } 1583 } 1584 1585 /++ 1586 Basic parsing of HTML tag soup 1587 1588 If you simply make a `new Document("some string")` or use [Document.fromUrl] to automatically 1589 download a page (that's function is shorthand for `new Document(arsd.http2.get(your_given_url).contentText)`), 1590 the Document parser will assume it is broken HTML. It will try to fix up things like charset messes, missing 1591 closing tags, flipped tags, inconsistent letter cases, and other forms of commonly found HTML on the web. 1592 1593 It isn't exactly the same as what a HTML5 web browser does in all cases, but it usually it, and where it 1594 disagrees, it is still usually good enough (but sometimes a bug). 1595 +/ 1596 unittest { 1597 auto document = new Document(`<html><body><p>hello <P>there`); 1598 // this will automatically try to normalize the html and fix up broken tags, etc 1599 // so notice how it added the missing closing tags here and made them all lower case 1600 assert(document.toString() == "<!DOCTYPE html>\n<html><body><p>hello </p><p>there</p></body></html>", document.toString()); 1601 } 1602 1603 /++ 1604 Stricter parsing of HTML 1605 1606 When you are writing the HTML yourself, you can remove most ambiguity by making it throw exceptions instead 1607 of trying to automatically fix up things basic parsing tries to do. Using strict mode accomplishes this. 1608 1609 This will help guarantee that you have well-formed HTML, which means it is going to parse a lot more reliably 1610 by all users - browsers, dom.d, other libraries, all behave better with well-formed input... people too! 1611 1612 (note it is not a full *validator*, just a well-formedness checker. Full validation is a lot more work for very 1613 little benefit in my experience, so I stopped here.) 1614 +/ 1615 unittest { 1616 try { 1617 auto document = new Document(`<html><body><p>hello <P>there`, true, true); // turns on strict and case sensitive mode to ctor 1618 assert(0); // never reached, the constructor will throw because strict mode is turned on 1619 } catch(Exception e) { 1620 1621 } 1622 1623 // you can also create the object first, then use the [parseStrict] method 1624 auto document = new Document; 1625 document.parseStrict(`<foo></foo>`); // this is invalid html - no such foo tag - but it is well-formed, since it is opened and closed properly, so it passes 1626 1627 } 1628 1629 /++ 1630 Custom HTML extensions 1631 1632 dom.d is a custom HTML parser, which means you can add custom HTML extensions to it too. It normally reads 1633 and discards things like ASP style `<% ... %>` code as well as XML processing instruction / PHP style embeds `<? ... ?>` 1634 but you can keep this data if you call a function to opt into it in before parsing. 1635 1636 Additionally, you can add special tags to be read like `<script>` to preserve its insides for future processing 1637 via the `.innerRawSource` member. 1638 +/ 1639 unittest { 1640 auto document = new Document; // construct an empty thing first 1641 document.enableAddingSpecialTagsToDom(); // add the special tags like <% ... %> etc 1642 document.rawSourceElements ~= "embedded-plaintext"; // tell it we want a custom 1643 1644 document.parseStrict(`<html> 1645 <% some asp code %> 1646 <script>embedded && javascript</script> 1647 <embedded-plaintext>my <custom> plaintext & stuff</embedded-plaintext> 1648 </html>`); 1649 1650 // please note that if we did `document.toString()` right now, the original source - almost your same 1651 // string you passed to parseStrict - would be spit back out. Meaning the embedded-plaintext still has its 1652 // special text inside it. Another parser won't understand how to use this! So if you want to pass this 1653 // document somewhere else, you need to do some transformations. 1654 // 1655 // This differs from cases like CDATA sections, which dom.d will automatically convert into plain html entities 1656 // on the output that can be read by anyone. 1657 1658 assert(document.root.tagName == "html"); // the root element is normal 1659 1660 int foundCount; 1661 // now let's loop through the whole tree 1662 foreach(element; document.root.tree) { 1663 // the asp thing will be in 1664 if(auto asp = cast(AspCode) element) { 1665 // you use the `asp.source` member to get the code for these 1666 assert(asp.source == "% some asp code %"); 1667 foundCount++; 1668 } else if(element.tagName == "script") { 1669 // and for raw source elements - script, style, or the ones you add, 1670 // you use the innerHTML method to get the code inside 1671 assert(element.innerHTML == "embedded && javascript"); 1672 foundCount++; 1673 } else if(element.tagName == "embedded-plaintext") { 1674 // and innerHTML again 1675 assert(element.innerHTML == "my <custom> plaintext & stuff"); 1676 foundCount++; 1677 } 1678 1679 } 1680 1681 assert(foundCount == 3); 1682 1683 // writeln(document.toString()); 1684 } 1685 1686 // FIXME: <textarea> contents are treated kinda special in html5 as well... 1687 1688 /++ 1689 Demoing CDATA, entities, and non-ascii characters. 1690 1691 The previous example mentioned CDATA, let's show you what that does too. These are all read in as plain strings accessible in the DOM - there is no CDATA, no entities once you get inside the object model - but when you convert back into a string, it will normalize them in a particular way. 1692 1693 This is not exactly standards compliant completely in and out thanks to it doing some transformations... but I find it more useful - it reads the data in consistently and writes it out consistently, both in ways that work well for interop. Take a look: 1694 +/ 1695 unittest { 1696 auto document = new Document(`<html> 1697 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1698 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1699 <p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p> 1700 </html>`, true, true); // strict mode turned on 1701 1702 // Inside the object model, things are simplified to D strings. 1703 auto paragraphs = document.querySelectorAll("p"); 1704 // no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string 1705 assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output."); 1706 // but note on the second paragraph, the entity has been converted to the appropriate *character* in the object 1707 assert(paragraphs[1].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output."); 1708 // and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string: 1709 assert(paragraphs[2].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too."); 1710 // and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing. 1711 assert(paragraphs[2].childNodes.length == 1 && paragraphs[2].childNodes[0].nodeType == NodeType.Text); 1712 1713 // And now, in the output string, we can see they are normalized thusly: 1714 assert(document.toString() == "<!DOCTYPE html>\n<html> 1715 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1716 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1717 <p>xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.</p> 1718 </html>"); 1719 } 1720 1721 /++ 1722 Streaming parsing 1723 1724 dom.d normally takes a big string and returns a big DOM object tree - hence its name. This is usually the simplest 1725 code to read and write, so I prefer to stick to that, but if you wanna jump through a few hoops, you can still make 1726 dom.d work with streams. 1727 1728 It is awkward - again, dom.d's whole design is based on building the dom tree, but you can do it if you're willing to 1729 subclass a little and trust the garbage collector. Here's how. 1730 +/ 1731 unittest { 1732 bool encountered; 1733 class StreamDocument : Document { 1734 // the normal behavior for this function is to `parent.appendChild(child)` 1735 // but we can override to read it as it is processed and not append it 1736 override void processNodeWhileParsing(Element parent, Element child) { 1737 if(child.tagName == "bar") 1738 encountered = true; 1739 // note that each element's object is created but then discarded as garbage. 1740 // the GC will take care of it, even with a large document, whereas the normal 1741 // object tree could become quite large. 1742 } 1743 1744 this() { 1745 super("<foo><bar></bar></foo>"); 1746 } 1747 } 1748 1749 auto test = new StreamDocument(); 1750 assert(encountered); // it should have been seen 1751 assert(test.querySelector("bar") is null); // but not appended to the dom node, since we didn't append it 1752 } 1753 1754 /++ 1755 Basic parsing of XML. 1756 1757 dom.d is not technically a standards-compliant xml parser and doesn't implement all xml features, 1758 but its stricter parse options together with turning off HTML's special tag handling (e.g. treating 1759 `<script>` and `<style>` the same as any other tag) gets close enough to work fine for a great many 1760 use cases. 1761 1762 For more information, see [XmlDocument]. 1763 +/ 1764 unittest { 1765 auto xml = new XmlDocument(`<my-stuff>hello</my-stuff>`); 1766 } 1767 1768 interface DomParent { 1769 inout(Document) asDocument() inout; 1770 inout(Element) asElement() inout; 1771 } 1772 1773 /++ 1774 This represents almost everything in the DOM and offers a lot of inspection and manipulation functions. Element, or its subclasses, are what makes the dom tree. 1775 +/ 1776 /// Group: core_functionality 1777 class Element : DomParent { 1778 inout(Document) asDocument() inout { return null; } 1779 inout(Element) asElement() inout { return this; } 1780 1781 /// Returns a collection of elements by selector. 1782 /// See: [Document.opIndex] 1783 ElementCollection opIndex(string selector) { 1784 auto e = ElementCollection(this); 1785 return e[selector]; 1786 } 1787 1788 /++ 1789 Returns the child node with the particular index. 1790 1791 Be aware that child nodes include text nodes, including 1792 whitespace-only nodes. 1793 +/ 1794 Element opIndex(size_t index) { 1795 if(index >= children.length) 1796 return null; 1797 return this.children[index]; 1798 } 1799 1800 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1801 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1802 if( 1803 is(SomeElementType : Element) 1804 ) 1805 out(ret) { 1806 assert(ret !is null); 1807 } 1808 do { 1809 auto e = cast(SomeElementType) getElementById(id); 1810 if(e is null) 1811 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1812 return e; 1813 } 1814 1815 /// ditto but with selectors instead of ids 1816 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1817 if( 1818 is(SomeElementType : Element) 1819 ) 1820 out(ret) { 1821 assert(ret !is null); 1822 } 1823 do { 1824 auto e = cast(SomeElementType) querySelector(selector); 1825 if(e is null) 1826 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1827 return e; 1828 } 1829 1830 1831 /++ 1832 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1833 +/ 1834 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1835 if(is(SomeElementType : Element)) 1836 { 1837 auto e = cast(SomeElementType) querySelector(selector); 1838 return MaybeNullElement!SomeElementType(e); 1839 } 1840 1841 1842 1843 /// get all the classes on this element 1844 @property string[] classes() const { 1845 // FIXME: remove blank names 1846 auto cs = split(className, " "); 1847 foreach(ref c; cs) 1848 c = c.strip(); 1849 return cs; 1850 } 1851 1852 /++ 1853 The object [classList] returns. 1854 +/ 1855 static struct ClassListHelper { 1856 Element this_; 1857 this(inout(Element) this_) inout { 1858 this.this_ = this_; 1859 } 1860 1861 /// 1862 bool contains(string cn) const { 1863 return this_.hasClass(cn); 1864 } 1865 1866 /// 1867 void add(string cn) { 1868 this_.addClass(cn); 1869 } 1870 1871 /// 1872 void remove(string cn) { 1873 this_.removeClass(cn); 1874 } 1875 1876 /// 1877 void toggle(string cn) { 1878 if(contains(cn)) 1879 remove(cn); 1880 else 1881 add(cn); 1882 } 1883 1884 // this thing supposed to be iterable in javascript but idk how i want to do it in D. meh 1885 /+ 1886 string[] opIndex() const { 1887 return this_.classes; 1888 } 1889 +/ 1890 } 1891 1892 /++ 1893 Returns a helper object to work with classes, just like javascript. 1894 1895 History: 1896 Added August 25, 2022 1897 +/ 1898 @property inout(ClassListHelper) classList() inout { 1899 return inout(ClassListHelper)(this); 1900 } 1901 // FIXME: classList is supposed to whitespace and duplicates when you use it. need to test. 1902 1903 unittest { 1904 Element element = Element.make("div"); 1905 element.classList.add("foo"); 1906 assert(element.classList.contains("foo")); 1907 element.classList.remove("foo"); 1908 assert(!element.classList.contains("foo")); 1909 element.classList.toggle("bar"); 1910 assert(element.classList.contains("bar")); 1911 } 1912 1913 /// ditto 1914 alias classNames = classes; 1915 1916 1917 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1918 @scriptable 1919 Element addClass(string c) { 1920 if(hasClass(c)) 1921 return this; // don't add it twice 1922 1923 string cn = getAttribute("class"); 1924 if(cn.length == 0) { 1925 setAttribute("class", c); 1926 return this; 1927 } else { 1928 setAttribute("class", cn ~ " " ~ c); 1929 } 1930 1931 return this; 1932 } 1933 1934 /// Removes a particular class name. 1935 @scriptable 1936 Element removeClass(string c) { 1937 if(!hasClass(c)) 1938 return this; 1939 string n; 1940 foreach(name; classes) { 1941 if(c == name) 1942 continue; // cut it out 1943 if(n.length) 1944 n ~= " "; 1945 n ~= name; 1946 } 1947 1948 className = n.strip(); 1949 1950 return this; 1951 } 1952 1953 /// Returns whether the given class appears in this element. 1954 bool hasClass(string c) const { 1955 string cn = className; 1956 1957 auto idx = cn.indexOf(c); 1958 if(idx == -1) 1959 return false; 1960 1961 foreach(cla; cn.split(" ")) 1962 if(cla.strip == c) 1963 return true; 1964 return false; 1965 1966 /* 1967 int rightSide = idx + c.length; 1968 1969 bool checkRight() { 1970 if(rightSide == cn.length) 1971 return true; // it's the only class 1972 else if(iswhite(cn[rightSide])) 1973 return true; 1974 return false; // this is a substring of something else.. 1975 } 1976 1977 if(idx == 0) { 1978 return checkRight(); 1979 } else { 1980 if(!iswhite(cn[idx - 1])) 1981 return false; // substring 1982 return checkRight(); 1983 } 1984 1985 assert(0); 1986 */ 1987 } 1988 1989 1990 /* ******************************* 1991 DOM Mutation 1992 *********************************/ 1993 /++ 1994 Family of convenience functions to quickly add a tag with some text or 1995 other relevant info (for example, it's a src for an <img> element 1996 instead of inner text). They forward to [Element.make] then calls [appendChild]. 1997 1998 --- 1999 div.addChild("span", "hello there"); 2000 div.addChild("div", Html("<p>children of the div</p>")); 2001 --- 2002 +/ 2003 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 2004 in { 2005 assert(tagName !is null); 2006 } 2007 out(e) { 2008 //assert(e.parentNode is this); 2009 //assert(e.parentDocument is this.parentDocument); 2010 } 2011 do { 2012 auto e = Element.make(tagName, childInfo, childInfo2); 2013 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 2014 // return the parent. That will break existing code though. 2015 return appendChild(e); 2016 } 2017 2018 /// ditto 2019 Element addChild(Element e) { 2020 return this.appendChild(e); 2021 } 2022 2023 /// ditto 2024 Element addChild(string tagName, Element firstChild, string info2 = null) 2025 in { 2026 assert(firstChild !is null); 2027 } 2028 out(ret) { 2029 assert(ret !is null); 2030 assert(ret.parentNode is this); 2031 assert(firstChild.parentNode is ret); 2032 2033 assert(ret.parentDocument is this.parentDocument); 2034 //assert(firstChild.parentDocument is this.parentDocument); 2035 } 2036 do { 2037 auto e = Element.make(tagName, "", info2); 2038 e.appendChild(firstChild); 2039 this.appendChild(e); 2040 return e; 2041 } 2042 2043 /// ditto 2044 Element addChild(string tagName, in Html innerHtml, string info2 = null) 2045 in { 2046 } 2047 out(ret) { 2048 assert(ret !is null); 2049 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 2050 assert(ret.parentDocument is this.parentDocument); 2051 } 2052 do { 2053 auto e = Element.make(tagName, "", info2); 2054 this.appendChild(e); 2055 e.innerHTML = innerHtml.source; 2056 return e; 2057 } 2058 2059 2060 /// Another convenience function. Adds a child directly after the current one, returning 2061 /// the new child. 2062 /// 2063 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 2064 /// See_Also: [addChild] 2065 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 2066 in { 2067 assert(tagName !is null); 2068 assert(parentNode !is null); 2069 } 2070 out(e) { 2071 assert(e.parentNode is this.parentNode); 2072 assert(e.parentDocument is this.parentDocument); 2073 } 2074 do { 2075 auto e = Element.make(tagName, childInfo, childInfo2); 2076 return parentNode.insertAfter(this, e); 2077 } 2078 2079 /// ditto 2080 Element addSibling(Element e) { 2081 return parentNode.insertAfter(this, e); 2082 } 2083 2084 /// Convenience function to append text intermixed with other children. 2085 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 2086 /// or div.addChildren("Hello, ", user.name, "!"); 2087 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 2088 void addChildren(T...)(T t) { 2089 foreach(item; t) { 2090 static if(is(item : Element)) 2091 appendChild(item); 2092 else static if (is(isSomeString!(item))) 2093 appendText(to!string(item)); 2094 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 2095 } 2096 } 2097 2098 /// Appends the list of children to this element. 2099 void appendChildren(Element[] children) { 2100 foreach(ele; children) 2101 appendChild(ele); 2102 } 2103 2104 /// Removes this element form its current parent and appends it to the given `newParent`. 2105 void reparent(Element newParent) 2106 in { 2107 assert(newParent !is null); 2108 assert(parentNode !is null); 2109 } 2110 out { 2111 assert(this.parentNode is newParent); 2112 //assert(isInArray(this, newParent.children)); 2113 } 2114 do { 2115 parentNode.removeChild(this); 2116 newParent.appendChild(this); 2117 } 2118 2119 /** 2120 Strips this tag out of the document, putting its inner html 2121 as children of the parent. 2122 2123 For example, given: `<p>hello <b>there</b></p>`, if you 2124 call `stripOut` on the `b` element, you'll be left with 2125 `<p>hello there<p>`. 2126 2127 The idea here is to make it easy to get rid of garbage 2128 markup you aren't interested in. 2129 */ 2130 void stripOut() 2131 in { 2132 assert(parentNode !is null); 2133 } 2134 out { 2135 assert(parentNode is null); 2136 assert(children.length == 0); 2137 } 2138 do { 2139 foreach(c; children) 2140 c.parentNode = null; // remove the parent 2141 if(children.length) 2142 parentNode.replaceChild(this, this.children); 2143 else 2144 parentNode.removeChild(this); 2145 this.children.length = 0; // we reparented them all above 2146 } 2147 2148 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 2149 /// if the element already isn't in a tree, it does nothing. 2150 Element removeFromTree() 2151 in { 2152 2153 } 2154 out(var) { 2155 assert(this.parentNode is null); 2156 assert(var is this); 2157 } 2158 do { 2159 if(this.parentNode is null) 2160 return this; 2161 2162 this.parentNode.removeChild(this); 2163 2164 return this; 2165 } 2166 2167 /++ 2168 Wraps this element inside the given element. 2169 It's like `this.replaceWith(what); what.appendchild(this);` 2170 2171 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 2172 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 2173 +/ 2174 Element wrapIn(Element what) 2175 in { 2176 assert(what !is null); 2177 } 2178 out(ret) { 2179 assert(this.parentNode is what); 2180 assert(ret is what); 2181 } 2182 do { 2183 this.replaceWith(what); 2184 what.appendChild(this); 2185 2186 return what; 2187 } 2188 2189 /// Replaces this element with something else in the tree. 2190 Element replaceWith(Element e) 2191 in { 2192 assert(this.parentNode !is null); 2193 } 2194 do { 2195 e.removeFromTree(); 2196 this.parentNode.replaceChild(this, e); 2197 return e; 2198 } 2199 2200 /** 2201 Fetches the first consecutive text nodes concatenated together. 2202 2203 2204 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 2205 2206 See_also: [directText], [innerText] 2207 */ 2208 string firstInnerText() const { 2209 string s; 2210 foreach(child; children) { 2211 if(child.nodeType != NodeType.Text) 2212 break; 2213 2214 s ~= child.nodeValue(); 2215 } 2216 return s; 2217 } 2218 2219 2220 /** 2221 Returns the text directly under this element. 2222 2223 2224 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 2225 past child tags. So, `<example>some <b>bold</b> text</example>` 2226 will return `some text` because it only gets the text, skipping non-text children. 2227 2228 See_also: [firstInnerText], [innerText] 2229 */ 2230 @property string directText() { 2231 string ret; 2232 foreach(e; children) { 2233 if(e.nodeType == NodeType.Text) 2234 ret ~= e.nodeValue(); 2235 } 2236 2237 return ret; 2238 } 2239 2240 /** 2241 Sets the direct text, without modifying other child nodes. 2242 2243 2244 Unlike [innerText], this does *not* remove existing elements in the element. 2245 2246 It only replaces the first text node it sees. 2247 2248 If there are no text nodes, it calls [appendText]. 2249 2250 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 2251 */ 2252 @property void directText(string text) { 2253 foreach(e; children) { 2254 if(e.nodeType == NodeType.Text) { 2255 auto it = cast(TextNode) e; 2256 it.contents = text; 2257 return; 2258 } 2259 } 2260 2261 appendText(text); 2262 } 2263 2264 // do nothing, this is primarily a virtual hook 2265 // for links and forms 2266 void setValue(string field, string value) { } 2267 2268 2269 // this is a thing so i can remove observer support if it gets slow 2270 // I have not implemented all these yet 2271 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 2272 if(parentDocument is null) return; 2273 DomMutationEvent me; 2274 me.operation = operation; 2275 me.target = this; 2276 me.relatedString = s1; 2277 me.relatedString2 = s2; 2278 me.related = r; 2279 me.related2 = r2; 2280 parentDocument.dispatchMutationEvent(me); 2281 } 2282 2283 // putting all the members up front 2284 2285 // this ought to be private. don't use it directly. 2286 Element[] children; 2287 2288 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 2289 string tagName; 2290 2291 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 2292 string[string] attributes; 2293 2294 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 2295 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 2296 private bool selfClosed; 2297 2298 private DomParent parent_; 2299 2300 /// Get the parent Document object that contains this element. 2301 /// It may be null, so remember to check for that. 2302 @property inout(Document) parentDocument() inout { 2303 if(this.parent_ is null) 2304 return null; 2305 auto p = cast() this.parent_.asElement; 2306 auto prev = cast() this; 2307 while(p) { 2308 prev = p; 2309 if(p.parent_ is null) 2310 return null; 2311 p = cast() p.parent_.asElement; 2312 } 2313 return cast(inout) prev.parent_.asDocument; 2314 } 2315 2316 /*deprecated*/ @property void parentDocument(Document doc) { 2317 parent_ = doc; 2318 } 2319 2320 /// Returns the parent node in the tree this element is attached to. 2321 inout(Element) parentNode() inout { 2322 if(parent_ is null) 2323 return null; 2324 2325 auto p = parent_.asElement; 2326 2327 if(cast(DocumentFragment) p) { 2328 if(p.parent_ is null) 2329 return null; 2330 else 2331 return p.parent_.asElement; 2332 } 2333 2334 return p; 2335 } 2336 2337 //protected 2338 Element parentNode(Element e) { 2339 parent_ = e; 2340 return e; 2341 } 2342 2343 // these are here for event handlers. Don't forget that this library never fires events. 2344 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 2345 2346 version(dom_with_events) { 2347 EventHandler[][string] bubblingEventHandlers; 2348 EventHandler[][string] capturingEventHandlers; 2349 EventHandler[string] defaultEventHandlers; 2350 2351 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 2352 if(event.length > 2 && event[0..2] == "on") 2353 event = event[2 .. $]; 2354 2355 if(useCapture) 2356 capturingEventHandlers[event] ~= handler; 2357 else 2358 bubblingEventHandlers[event] ~= handler; 2359 } 2360 } 2361 2362 2363 // and now methods 2364 2365 /++ 2366 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2367 2368 History: 2369 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2370 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2371 the parameter. 2372 See_Also: 2373 [addChild], [addSibling] 2374 +/ 2375 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2376 bool selfClosed = tagName.isInArray(selfClosedElements); 2377 2378 Element e; 2379 // want to create the right kind of object for the given tag... 2380 switch(tagName) { 2381 case "#text": 2382 e = new TextNode(null, childInfo); 2383 return e; 2384 // break; 2385 case "table": 2386 e = new Table(null); 2387 break; 2388 case "a": 2389 e = new Link(null); 2390 break; 2391 case "form": 2392 e = new Form(null); 2393 break; 2394 case "tr": 2395 e = new TableRow(null); 2396 break; 2397 case "td", "th": 2398 e = new TableCell(null, tagName); 2399 break; 2400 default: 2401 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2402 } 2403 2404 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2405 e.tagName = tagName; 2406 e.selfClosed = selfClosed; 2407 2408 if(childInfo !is null) 2409 switch(tagName) { 2410 /* html5 convenience tags */ 2411 case "audio": 2412 if(childInfo.length) 2413 e.addChild("source", childInfo); 2414 if(childInfo2 !is null) 2415 e.appendText(childInfo2); 2416 break; 2417 case "source": 2418 e.src = childInfo; 2419 if(childInfo2 !is null) 2420 e.type = childInfo2; 2421 break; 2422 /* regular html 4 stuff */ 2423 case "img": 2424 e.src = childInfo; 2425 if(childInfo2 !is null) 2426 e.alt = childInfo2; 2427 break; 2428 case "link": 2429 e.href = childInfo; 2430 if(childInfo2 !is null) 2431 e.rel = childInfo2; 2432 break; 2433 case "option": 2434 e.innerText = childInfo; 2435 if(childInfo2 !is null) 2436 e.value = childInfo2; 2437 break; 2438 case "input": 2439 e.type = "hidden"; 2440 e.name = childInfo; 2441 if(childInfo2 !is null) 2442 e.value = childInfo2; 2443 break; 2444 case "button": 2445 e.innerText = childInfo; 2446 if(childInfo2 !is null) 2447 e.type = childInfo2; 2448 break; 2449 case "a": 2450 e.innerText = childInfo; 2451 if(childInfo2 !is null) 2452 e.href = childInfo2; 2453 break; 2454 case "script": 2455 case "style": 2456 e.innerRawSource = childInfo; 2457 break; 2458 case "meta": 2459 e.name = childInfo; 2460 if(childInfo2 !is null) 2461 e.content = childInfo2; 2462 break; 2463 /* generically, assume we were passed text and perhaps class */ 2464 default: 2465 e.innerText = childInfo; 2466 if(childInfo2.length) 2467 e.className = childInfo2; 2468 } 2469 2470 return e; 2471 } 2472 2473 /// ditto 2474 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2475 // FIXME: childInfo2 is ignored when info1 is null 2476 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2477 m.innerHTML = innerHtml.source; 2478 return m; 2479 } 2480 2481 /// ditto 2482 static Element make(string tagName, Element child, string childInfo2 = null) { 2483 auto m = Element.make(tagName, cast(string) null, childInfo2); 2484 m.appendChild(child); 2485 return m; 2486 } 2487 2488 2489 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2490 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2491 tagName = _tagName; 2492 if(_attributes !is null) 2493 attributes = _attributes; 2494 selfClosed = _selfClosed; 2495 2496 version(dom_node_indexes) 2497 this.dataset.nodeIndex = to!string(&(this.attributes)); 2498 2499 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2500 } 2501 2502 /++ 2503 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2504 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2505 2506 History: 2507 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2508 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2509 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2510 something here if you like. 2511 +/ 2512 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2513 tagName = _tagName; 2514 if(_attributes !is null) 2515 attributes = _attributes; 2516 selfClosed = tagName.isInArray(selfClosedElements); 2517 2518 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2519 //children.length = 8; 2520 //children.length = 0; 2521 2522 version(dom_node_indexes) 2523 this.dataset.nodeIndex = to!string(&(this.attributes)); 2524 } 2525 2526 private this(Document _parentDocument) { 2527 version(dom_node_indexes) 2528 this.dataset.nodeIndex = to!string(&(this.attributes)); 2529 } 2530 2531 2532 /* ******************************* 2533 Navigating the DOM 2534 *********************************/ 2535 2536 /// Returns the first child of this element. If it has no children, returns null. 2537 /// Remember, text nodes are children too. 2538 @property Element firstChild() { 2539 return children.length ? children[0] : null; 2540 } 2541 2542 /// Returns the last child of the element, or null if it has no children. Remember, text nodes are children too. 2543 @property Element lastChild() { 2544 return children.length ? children[$ - 1] : null; 2545 } 2546 2547 // FIXME UNTESTED 2548 /// the next or previous element you would encounter if you were reading it in the source. May be a text node or other special non-tag object if you enabled them. 2549 Element nextInSource() { 2550 auto n = firstChild; 2551 if(n is null) 2552 n = nextSibling(); 2553 if(n is null) { 2554 auto p = this.parentNode; 2555 while(p !is null && n is null) { 2556 n = p.nextSibling; 2557 } 2558 } 2559 2560 return n; 2561 } 2562 2563 /// ditto 2564 Element previousInSource() { 2565 auto p = previousSibling; 2566 if(p is null) { 2567 auto par = parentNode; 2568 if(par) 2569 p = par.lastChild; 2570 if(p is null) 2571 p = par; 2572 } 2573 return p; 2574 } 2575 2576 /++ 2577 Returns the next or previous sibling that is not a text node. Please note: the behavior with comments is subject to change. Currently, it will return a comment or other nodes if it is in the tree (if you enabled it with [Document.enableAddingSpecialTagsToDom] or [Document.parseSawComment]) and not if you didn't, but the implementation will probably change at some point to skip them regardless. 2578 2579 Equivalent to [previousSibling]/[nextSibling]("*"). 2580 2581 Please note it may return `null`. 2582 +/ 2583 @property Element previousElementSibling() { 2584 return previousSibling("*"); 2585 } 2586 2587 /// ditto 2588 @property Element nextElementSibling() { 2589 return nextSibling("*"); 2590 } 2591 2592 /++ 2593 Returns the next or previous sibling matching the `tagName` filter. The default filter of `null` will return the first sibling it sees, even if it is a comment or text node, or anything else. A filter of `"*"` will match any tag with a name. Otherwise, the string must match the [tagName] of the sibling you want to find. 2594 +/ 2595 @property Element previousSibling(string tagName = null) { 2596 if(this.parentNode is null) 2597 return null; 2598 Element ps = null; 2599 foreach(e; this.parentNode.childNodes) { 2600 if(e is this) 2601 break; 2602 if(tagName == "*" && e.nodeType != NodeType.Text) { 2603 ps = e; 2604 } else if(tagName is null || e.tagName == tagName) 2605 ps = e; 2606 } 2607 2608 return ps; 2609 } 2610 2611 /// ditto 2612 @property Element nextSibling(string tagName = null) { 2613 if(this.parentNode is null) 2614 return null; 2615 Element ns = null; 2616 bool mightBe = false; 2617 foreach(e; this.parentNode.childNodes) { 2618 if(e is this) { 2619 mightBe = true; 2620 continue; 2621 } 2622 if(mightBe) { 2623 if(tagName == "*" && e.nodeType != NodeType.Text) { 2624 ns = e; 2625 break; 2626 } 2627 if(tagName is null || e.tagName == tagName) { 2628 ns = e; 2629 break; 2630 } 2631 } 2632 } 2633 2634 return ns; 2635 } 2636 2637 2638 /++ 2639 Gets the nearest node, going up the chain, with the given tagName 2640 May return null or throw. The type `T` will specify a subclass like 2641 [Form], [Table], or [Link], which it will cast for you when found. 2642 +/ 2643 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2644 if(tagName is null) { 2645 static if(is(T == Form)) 2646 tagName = "form"; 2647 else static if(is(T == Table)) 2648 tagName = "table"; 2649 else static if(is(T == Link)) 2650 tagName == "a"; 2651 } 2652 2653 auto par = this.parentNode; 2654 while(par !is null) { 2655 if(tagName is null || par.tagName == tagName) 2656 break; 2657 par = par.parentNode; 2658 } 2659 2660 static if(!is(T == Element)) { 2661 auto t = cast(T) par; 2662 if(t is null) 2663 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2664 } else 2665 auto t = par; 2666 2667 return t; 2668 } 2669 2670 /++ 2671 Searches this element and the tree of elements under it for one matching the given `id` attribute. 2672 +/ 2673 Element getElementById(string id) { 2674 // FIXME: I use this function a lot, and it's kinda slow 2675 // not terribly slow, but not great. 2676 foreach(e; tree) 2677 if(e.id == id) 2678 return e; 2679 return null; 2680 } 2681 2682 /++ 2683 Returns a child element that matches the given `selector`. 2684 2685 Note: you can give multiple selectors, separated by commas. 2686 It will return the first match it finds. 2687 2688 Tip: to use namespaces, escape the colon in the name: 2689 2690 --- 2691 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2692 --- 2693 +/ 2694 @scriptable 2695 Element querySelector(string selector) { 2696 Selector s = Selector(selector); 2697 2698 foreach(ref comp; s.components) 2699 if(comp.parts.length && comp.parts[0].separation > 0) { 2700 // this is illegal in standard dom, but i use it a lot 2701 // gonna insert a :scope thing 2702 2703 SelectorPart part; 2704 part.separation = -1; 2705 part.scopeElement = true; 2706 comp.parts = part ~ comp.parts; 2707 } 2708 2709 foreach(ele; tree) 2710 if(s.matchesElement(ele, this)) 2711 return ele; 2712 return null; 2713 } 2714 2715 /// If the element matches the given selector. Previously known as `matchesSelector`. 2716 @scriptable 2717 bool matches(string selector) { 2718 /+ 2719 bool caseSensitiveTags = true; 2720 if(parentDocument && parentDocument.loose) 2721 caseSensitiveTags = false; 2722 +/ 2723 2724 Selector s = Selector(selector); 2725 return s.matchesElement(this); 2726 } 2727 2728 /// Returns itself or the closest parent that matches the given selector, or null if none found 2729 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2730 @scriptable 2731 Element closest(string selector) { 2732 Element e = this; 2733 while(e !is null) { 2734 if(e.matches(selector)) 2735 return e; 2736 e = e.parentNode; 2737 } 2738 return null; 2739 } 2740 2741 /** 2742 Returns elements that match the given CSS selector 2743 2744 * -- all, default if nothing else is there 2745 2746 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2747 2748 It is all additive 2749 2750 OP 2751 2752 space = descendant 2753 > = direct descendant 2754 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2755 2756 [foo] Foo is present as an attribute 2757 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2758 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2759 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2760 2761 [item$=sdas] ends with 2762 [item^-sdsad] begins with 2763 2764 Quotes are optional here. 2765 2766 Pseudos: 2767 :first-child 2768 :last-child 2769 :link (same as a[href] for our purposes here) 2770 2771 2772 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2773 2774 2775 2776 This ONLY cares about elements. text, etc, are ignored 2777 2778 2779 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2780 2781 The name `getElementsBySelector` was the original name, written back before the name `querySelector` was standardized (this library is older than you might think!), but they do the same thing.. 2782 */ 2783 @scriptable 2784 Element[] querySelectorAll(string selector) { 2785 // FIXME: this function could probably use some performance attention 2786 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2787 2788 2789 bool caseSensitiveTags = true; 2790 if(parentDocument && parentDocument.loose) 2791 caseSensitiveTags = false; 2792 2793 Element[] ret; 2794 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2795 ret ~= sel.getElements(this, null); 2796 return ret; 2797 } 2798 2799 /// ditto 2800 alias getElementsBySelector = querySelectorAll; 2801 2802 /++ 2803 Returns child elements that have the given class name or tag name. 2804 2805 Please note the standard specifies this should return a live node list. This means, in Javascript for example, if you loop over the value returned by getElementsByTagName and getElementsByClassName and remove the elements, the length of the list will decrease. When I implemented this, I figured that was more trouble than it was worth and returned a plain array instead. By the time I had the infrastructure to make it simple, I didn't want to do the breaking change. 2806 2807 So these is incompatible with Javascript in the face of live dom mutation and will likely remain so. 2808 +/ 2809 Element[] getElementsByClassName(string cn) { 2810 // is this correct? 2811 return getElementsBySelector("." ~ cn); 2812 } 2813 2814 /// ditto 2815 Element[] getElementsByTagName(string tag) { 2816 if(parentDocument && parentDocument.loose) 2817 tag = tag.toLower(); 2818 Element[] ret; 2819 foreach(e; tree) 2820 if(e.tagName == tag) 2821 ret ~= e; 2822 return ret; 2823 } 2824 2825 2826 /* ******************************* 2827 Attributes 2828 *********************************/ 2829 2830 /** 2831 Gets the given attribute value, or null if the 2832 attribute is not set. 2833 2834 Note that the returned string is decoded, so it no longer contains any xml entities. 2835 */ 2836 @scriptable 2837 string getAttribute(string name) const { 2838 if(parentDocument && parentDocument.loose) 2839 name = name.toLower(); 2840 auto e = name in attributes; 2841 if(e) 2842 return *e; 2843 else 2844 return null; 2845 } 2846 2847 /** 2848 Sets an attribute. Returns this for easy chaining 2849 */ 2850 @scriptable 2851 Element setAttribute(string name, string value) { 2852 if(parentDocument && parentDocument.loose) 2853 name = name.toLower(); 2854 2855 // I never use this shit legitimately and neither should you 2856 auto it = name.toLower(); 2857 if(it == "href" || it == "src") { 2858 auto v = value.strip().toLower(); 2859 if(v.startsWith("vbscript:")) 2860 value = value[9..$]; 2861 if(v.startsWith("javascript:")) 2862 value = value[11..$]; 2863 } 2864 2865 attributes[name] = value; 2866 2867 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2868 2869 return this; 2870 } 2871 2872 /** 2873 Returns if the attribute exists. 2874 */ 2875 @scriptable 2876 bool hasAttribute(string name) { 2877 if(parentDocument && parentDocument.loose) 2878 name = name.toLower(); 2879 2880 if(name in attributes) 2881 return true; 2882 else 2883 return false; 2884 } 2885 2886 /** 2887 Removes the given attribute from the element. 2888 */ 2889 @scriptable 2890 Element removeAttribute(string name) 2891 out(ret) { 2892 assert(ret is this); 2893 } 2894 do { 2895 if(parentDocument && parentDocument.loose) 2896 name = name.toLower(); 2897 if(name in attributes) 2898 attributes.remove(name); 2899 2900 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2901 return this; 2902 } 2903 2904 /** 2905 Gets or sets the class attribute's contents. Returns 2906 an empty string if it has no class. 2907 */ 2908 @property string className() const { 2909 auto c = getAttribute("class"); 2910 if(c is null) 2911 return ""; 2912 return c; 2913 } 2914 2915 /// ditto 2916 @property Element className(string c) { 2917 setAttribute("class", c); 2918 return this; 2919 } 2920 2921 /** 2922 Provides easy access to common HTML attributes, object style. 2923 2924 --- 2925 auto element = Element.make("a"); 2926 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2927 string where = a.href; // same as a.getAttribute("href"); 2928 --- 2929 2930 */ 2931 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2932 if(v !is null) 2933 setAttribute(name, v); 2934 return getAttribute(name); 2935 } 2936 2937 /** 2938 Old access to attributes. Use [attrs] instead. 2939 2940 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2941 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2942 2943 Instead, use element.attrs.attribute, element.attrs["attribute"], 2944 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2945 */ 2946 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2947 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2948 } 2949 2950 /* 2951 // this would be nice for convenience, but it broke the getter above. 2952 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2953 if(boolean) 2954 setAttribute(name, name); 2955 else 2956 removeAttribute(name); 2957 } 2958 */ 2959 2960 /** 2961 Returns the element's children. 2962 */ 2963 @property inout(Element[]) childNodes() inout { 2964 return children; 2965 } 2966 2967 /++ 2968 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2969 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2970 +/ 2971 @property DataSet dataset() { 2972 return DataSet(this); 2973 } 2974 2975 /++ 2976 Gives dot/opIndex access to attributes 2977 --- 2978 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2979 --- 2980 +/ 2981 @property AttributeSet attrs() { 2982 return AttributeSet(this); 2983 } 2984 2985 /++ 2986 Provides both string and object style (like in Javascript) access to the style attribute. 2987 2988 --- 2989 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2990 --- 2991 +/ 2992 @property ElementStyle style() { 2993 return ElementStyle(this); 2994 } 2995 2996 /++ 2997 This sets the style attribute with a string. 2998 +/ 2999 @property ElementStyle style(string s) { 3000 this.setAttribute("style", s); 3001 return this.style; 3002 } 3003 3004 private void parseAttributes(string[] whichOnes = null) { 3005 /+ 3006 if(whichOnes is null) 3007 whichOnes = attributes.keys; 3008 foreach(attr; whichOnes) { 3009 switch(attr) { 3010 case "id": 3011 3012 break; 3013 case "class": 3014 3015 break; 3016 case "style": 3017 3018 break; 3019 default: 3020 // we don't care about it 3021 } 3022 } 3023 +/ 3024 } 3025 3026 3027 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 3028 3029 // the next few methods are for implementing interactive kind of things 3030 private CssStyle _computedStyle; 3031 3032 /// Don't use this. It can try to parse out the style element but it isn't complete and if I get back to it, it won't be for a while. 3033 @property CssStyle computedStyle() { 3034 if(_computedStyle is null) { 3035 auto style = this.getAttribute("style"); 3036 /* we'll treat shitty old html attributes as css here */ 3037 if(this.hasAttribute("width")) 3038 style ~= "; width: " ~ this.attrs.width; 3039 if(this.hasAttribute("height")) 3040 style ~= "; height: " ~ this.attrs.height; 3041 if(this.hasAttribute("bgcolor")) 3042 style ~= "; background-color: " ~ this.attrs.bgcolor; 3043 if(this.tagName == "body" && this.hasAttribute("text")) 3044 style ~= "; color: " ~ this.attrs.text; 3045 if(this.hasAttribute("color")) 3046 style ~= "; color: " ~ this.attrs.color; 3047 /* done */ 3048 3049 3050 _computedStyle = new CssStyle(null, style); // gives at least something to work with 3051 } 3052 return _computedStyle; 3053 } 3054 3055 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 3056 version(browser) { 3057 void* expansionHook; ///ditto 3058 int offsetWidth; ///ditto 3059 int offsetHeight; ///ditto 3060 int offsetLeft; ///ditto 3061 int offsetTop; ///ditto 3062 Element offsetParent; ///ditto 3063 bool hasLayout; ///ditto 3064 int zIndex; ///ditto 3065 3066 ///ditto 3067 int absoluteLeft() { 3068 int a = offsetLeft; 3069 auto p = offsetParent; 3070 while(p) { 3071 a += p.offsetLeft; 3072 p = p.offsetParent; 3073 } 3074 3075 return a; 3076 } 3077 3078 ///ditto 3079 int absoluteTop() { 3080 int a = offsetTop; 3081 auto p = offsetParent; 3082 while(p) { 3083 a += p.offsetTop; 3084 p = p.offsetParent; 3085 } 3086 3087 return a; 3088 } 3089 } 3090 3091 // Back to the regular dom functions 3092 3093 public: 3094 3095 3096 /* ******************************* 3097 DOM Mutation 3098 *********************************/ 3099 3100 /// Removes all inner content from the tag; all child text and elements are gone. 3101 void removeAllChildren() 3102 out { 3103 assert(this.children.length == 0); 3104 } 3105 do { 3106 foreach(child; children) 3107 child.parentNode = null; 3108 children = null; 3109 } 3110 3111 /++ 3112 Adds a sibling element before or after this one in the dom. 3113 3114 History: added June 13, 2020 3115 +/ 3116 Element appendSibling(Element e) { 3117 parentNode.insertAfter(this, e); 3118 return e; 3119 } 3120 3121 /// ditto 3122 Element prependSibling(Element e) { 3123 parentNode.insertBefore(this, e); 3124 return e; 3125 } 3126 3127 3128 /++ 3129 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 3130 3131 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 3132 3133 History: 3134 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 3135 +/ 3136 Element appendChild(Element e) 3137 in { 3138 assert(e !is null); 3139 assert(e !is this); 3140 } 3141 out (ret) { 3142 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 3143 assert(e.parentDocument is this.parentDocument); 3144 assert(e is ret); 3145 } 3146 do { 3147 if(e.parentNode !is null) 3148 e.parentNode.removeChild(e); 3149 3150 selfClosed = false; 3151 if(auto frag = cast(DocumentFragment) e) 3152 children ~= frag.children; 3153 else 3154 children ~= e; 3155 3156 e.parentNode = this; 3157 3158 /+ 3159 foreach(item; e.tree) 3160 item.parentDocument = this.parentDocument; 3161 +/ 3162 3163 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 3164 3165 return e; 3166 } 3167 3168 /// Inserts the second element to this node, right before the first param 3169 Element insertBefore(in Element where, Element what) 3170 in { 3171 assert(where !is null); 3172 assert(where.parentNode is this); 3173 assert(what !is null); 3174 assert(what.parentNode is null); 3175 } 3176 out (ret) { 3177 assert(where.parentNode is this); 3178 assert(what.parentNode is this); 3179 3180 assert(what.parentDocument is this.parentDocument); 3181 assert(ret is what); 3182 } 3183 do { 3184 foreach(i, e; children) { 3185 if(e is where) { 3186 if(auto frag = cast(DocumentFragment) what) { 3187 children = children[0..i] ~ frag.children ~ children[i..$]; 3188 foreach(child; frag.children) 3189 child.parentNode = this; 3190 } else { 3191 children = children[0..i] ~ what ~ children[i..$]; 3192 } 3193 what.parentNode = this; 3194 return what; 3195 } 3196 } 3197 3198 return what; 3199 3200 assert(0); 3201 } 3202 3203 /++ 3204 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 3205 +/ 3206 Element insertAfter(in Element where, Element what) 3207 in { 3208 assert(where !is null); 3209 assert(where.parentNode is this); 3210 assert(what !is null); 3211 assert(what.parentNode is null); 3212 } 3213 out (ret) { 3214 assert(where.parentNode is this); 3215 assert(what.parentNode is this); 3216 assert(what.parentDocument is this.parentDocument); 3217 assert(ret is what); 3218 } 3219 do { 3220 foreach(i, e; children) { 3221 if(e is where) { 3222 if(auto frag = cast(DocumentFragment) what) { 3223 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 3224 foreach(child; frag.children) 3225 child.parentNode = this; 3226 } else 3227 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 3228 what.parentNode = this; 3229 return what; 3230 } 3231 } 3232 3233 return what; 3234 3235 assert(0); 3236 } 3237 3238 /// swaps one child for a new thing. Returns the old child which is now parentless. 3239 Element swapNode(Element child, Element replacement) 3240 in { 3241 assert(child !is null); 3242 assert(replacement !is null); 3243 assert(child.parentNode is this); 3244 } 3245 out(ret) { 3246 assert(ret is child); 3247 assert(ret.parentNode is null); 3248 assert(replacement.parentNode is this); 3249 assert(replacement.parentDocument is this.parentDocument); 3250 } 3251 do { 3252 foreach(ref c; this.children) 3253 if(c is child) { 3254 c.parentNode = null; 3255 c = replacement; 3256 c.parentNode = this; 3257 return child; 3258 } 3259 assert(0); 3260 } 3261 3262 3263 /++ 3264 Appends the given to the node. 3265 3266 3267 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 3268 yields `<example>text <b>bold</b> hi</example>`. 3269 3270 See_Also: 3271 [firstInnerText], [directText], [innerText], [appendChild] 3272 +/ 3273 @scriptable 3274 Element appendText(string text) { 3275 Element e = new TextNode(parentDocument, text); 3276 appendChild(e); 3277 return this; 3278 } 3279 3280 /++ 3281 Returns child elements which are of a tag type (excludes text, comments, etc.). 3282 3283 3284 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 3285 3286 Params: 3287 tagName = filter results to only the child elements with the given tag name. 3288 +/ 3289 @property Element[] childElements(string tagName = null) { 3290 Element[] ret; 3291 foreach(c; children) 3292 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 3293 ret ~= c; 3294 return ret; 3295 } 3296 3297 /++ 3298 Appends the given html to the element, returning the elements appended 3299 3300 3301 This is similar to `element.innerHTML += "html string";` in Javascript. 3302 +/ 3303 @scriptable 3304 Element[] appendHtml(string html) { 3305 Document d = new Document("<root>" ~ html ~ "</root>"); 3306 return stealChildren(d.root); 3307 } 3308 3309 3310 /++ 3311 Inserts a child under this element after the element `where`. 3312 +/ 3313 void insertChildAfter(Element child, Element where) 3314 in { 3315 assert(child !is null); 3316 assert(where !is null); 3317 assert(where.parentNode is this); 3318 assert(!selfClosed); 3319 //assert(isInArray(where, children)); 3320 } 3321 out { 3322 assert(child.parentNode is this); 3323 assert(where.parentNode is this); 3324 //assert(isInArray(where, children)); 3325 //assert(isInArray(child, children)); 3326 } 3327 do { 3328 foreach(ref i, c; children) { 3329 if(c is where) { 3330 i++; 3331 if(auto frag = cast(DocumentFragment) child) { 3332 children = children[0..i] ~ child.children ~ children[i..$]; 3333 //foreach(child; frag.children) 3334 //child.parentNode = this; 3335 } else 3336 children = children[0..i] ~ child ~ children[i..$]; 3337 child.parentNode = this; 3338 break; 3339 } 3340 } 3341 } 3342 3343 /++ 3344 Reparents all the child elements of `e` to `this`, leaving `e` childless. 3345 3346 Params: 3347 e = the element whose children you want to steal 3348 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 3349 +/ 3350 Element[] stealChildren(Element e, Element position = null) 3351 in { 3352 assert(!selfClosed); 3353 assert(e !is null); 3354 //if(position !is null) 3355 //assert(isInArray(position, children)); 3356 } 3357 out (ret) { 3358 assert(e.children.length == 0); 3359 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 3360 version(none) 3361 debug foreach(child; ret) { 3362 assert(child.parentNode is this); 3363 assert(child.parentDocument is this.parentDocument); 3364 } 3365 } 3366 do { 3367 foreach(c; e.children) { 3368 c.parentNode = this; 3369 } 3370 if(position is null) 3371 children ~= e.children; 3372 else { 3373 foreach(i, child; children) { 3374 if(child is position) { 3375 children = children[0..i] ~ 3376 e.children ~ 3377 children[i..$]; 3378 break; 3379 } 3380 } 3381 } 3382 3383 auto ret = e.children[]; 3384 e.children.length = 0; 3385 3386 return ret; 3387 } 3388 3389 /// Puts the current element first in our children list. The given element must not have a parent already. 3390 Element prependChild(Element e) 3391 in { 3392 assert(e.parentNode is null); 3393 assert(!selfClosed); 3394 } 3395 out { 3396 assert(e.parentNode is this); 3397 assert(e.parentDocument is this.parentDocument); 3398 assert(children[0] is e); 3399 } 3400 do { 3401 if(auto frag = cast(DocumentFragment) e) { 3402 children = e.children ~ children; 3403 foreach(child; frag.children) 3404 child.parentNode = this; 3405 } else 3406 children = e ~ children; 3407 e.parentNode = this; 3408 return e; 3409 } 3410 3411 3412 /** 3413 Returns a string containing all child elements, formatted such that it could be pasted into 3414 an XML file. 3415 */ 3416 @property string innerHTML(Appender!string where = appender!string()) const { 3417 if(children is null) 3418 return ""; 3419 3420 auto start = where.data.length; 3421 3422 foreach(child; children) { 3423 assert(child !is null); 3424 3425 child.writeToAppender(where); 3426 } 3427 3428 return where.data[start .. $]; 3429 } 3430 3431 /** 3432 Takes some html and replaces the element's children with the tree made from the string. 3433 */ 3434 @property Element innerHTML(string html, bool strict = false) { 3435 if(html.length) 3436 selfClosed = false; 3437 3438 if(html.length == 0) { 3439 // I often say innerHTML = ""; as a shortcut to clear it out, 3440 // so let's optimize that slightly. 3441 removeAllChildren(); 3442 return this; 3443 } 3444 3445 auto doc = new Document(); 3446 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3447 3448 children = doc.root.children; 3449 foreach(c; children) { 3450 c.parentNode = this; 3451 } 3452 3453 doc.root.children = null; 3454 3455 return this; 3456 } 3457 3458 /// ditto 3459 @property Element innerHTML(Html html) { 3460 return this.innerHTML = html.source; 3461 } 3462 3463 /** 3464 Replaces this node with the given html string, which is parsed 3465 3466 Note: this invalidates the this reference, since it is removed 3467 from the tree. 3468 3469 Returns the new children that replace this. 3470 */ 3471 @property Element[] outerHTML(string html) { 3472 auto doc = new Document(); 3473 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3474 3475 children = doc.root.children; 3476 foreach(c; children) { 3477 c.parentNode = this; 3478 } 3479 3480 stripOut(); 3481 3482 return doc.root.children; 3483 } 3484 3485 /++ 3486 Returns all the html for this element, including the tag itself. 3487 3488 This is equivalent to calling toString(). 3489 +/ 3490 @property string outerHTML() { 3491 return this.toString(); 3492 } 3493 3494 /// This sets the inner content of the element *without* trying to parse it. 3495 /// You can inject any code in there; this serves as an escape hatch from the dom. 3496 /// 3497 /// The only times you might actually need it are for < style > and < script > tags in html. 3498 /// Other than that, innerHTML and/or innerText should do the job. 3499 @property void innerRawSource(string rawSource) { 3500 children.length = 0; 3501 auto rs = new RawSource(parentDocument, rawSource); 3502 children ~= rs; 3503 rs.parentNode = this; 3504 } 3505 3506 /++ 3507 Replaces the element `find`, which must be a child of `this`, with the element `replace`, which must have no parent. 3508 +/ 3509 Element replaceChild(Element find, Element replace) 3510 in { 3511 assert(find !is null); 3512 assert(find.parentNode is this); 3513 assert(replace !is null); 3514 assert(replace.parentNode is null); 3515 } 3516 out(ret) { 3517 assert(ret is replace); 3518 assert(replace.parentNode is this); 3519 assert(replace.parentDocument is this.parentDocument); 3520 assert(find.parentNode is null); 3521 } 3522 do { 3523 // FIXME 3524 //if(auto frag = cast(DocumentFragment) replace) 3525 //return this.replaceChild(frag, replace.children); 3526 for(int i = 0; i < children.length; i++) { 3527 if(children[i] is find) { 3528 replace.parentNode = this; 3529 children[i].parentNode = null; 3530 children[i] = replace; 3531 return replace; 3532 } 3533 } 3534 3535 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3536 } 3537 3538 /** 3539 Replaces the given element with a whole group. 3540 */ 3541 void replaceChild(Element find, Element[] replace) 3542 in { 3543 assert(find !is null); 3544 assert(replace !is null); 3545 assert(find.parentNode is this); 3546 debug foreach(r; replace) 3547 assert(r.parentNode is null); 3548 } 3549 out { 3550 assert(find.parentNode is null); 3551 assert(children.length >= replace.length); 3552 debug foreach(child; children) 3553 assert(child !is find); 3554 debug foreach(r; replace) 3555 assert(r.parentNode is this); 3556 } 3557 do { 3558 if(replace.length == 0) { 3559 removeChild(find); 3560 return; 3561 } 3562 assert(replace.length); 3563 for(int i = 0; i < children.length; i++) { 3564 if(children[i] is find) { 3565 children[i].parentNode = null; // this element should now be dead 3566 children[i] = replace[0]; 3567 foreach(e; replace) { 3568 e.parentNode = this; 3569 } 3570 3571 children = .insertAfter(children, i, replace[1..$]); 3572 3573 return; 3574 } 3575 } 3576 3577 throw new Exception("no such child"); 3578 } 3579 3580 3581 /** 3582 Removes the given child from this list. 3583 3584 Returns the removed element. 3585 */ 3586 Element removeChild(Element c) 3587 in { 3588 assert(c !is null); 3589 assert(c.parentNode is this); 3590 } 3591 out { 3592 debug foreach(child; children) 3593 assert(child !is c); 3594 assert(c.parentNode is null); 3595 } 3596 do { 3597 foreach(i, e; children) { 3598 if(e is c) { 3599 children = children[0..i] ~ children [i+1..$]; 3600 c.parentNode = null; 3601 return c; 3602 } 3603 } 3604 3605 throw new Exception("no such child"); 3606 } 3607 3608 /// This removes all the children from this element, returning the old list. 3609 Element[] removeChildren() 3610 out (ret) { 3611 assert(children.length == 0); 3612 debug foreach(r; ret) 3613 assert(r.parentNode is null); 3614 } 3615 do { 3616 Element[] oldChildren = children.dup; 3617 foreach(c; oldChildren) 3618 c.parentNode = null; 3619 3620 children.length = 0; 3621 3622 return oldChildren; 3623 } 3624 3625 /** 3626 Fetch the inside text, with all tags stripped out. 3627 3628 <p>cool <b>api</b> & code dude<p> 3629 innerText of that is "cool api & code dude". 3630 3631 This does not match what real innerText does! 3632 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3633 3634 It is more like [textContent]. 3635 3636 See_Also: 3637 [visibleText], which is closer to what the real `innerText` 3638 does. 3639 */ 3640 @scriptable 3641 @property string innerText() const { 3642 string s; 3643 foreach(child; children) { 3644 if(child.nodeType != NodeType.Text) 3645 s ~= child.innerText; 3646 else 3647 s ~= child.nodeValue(); 3648 } 3649 return s; 3650 } 3651 3652 /// ditto 3653 alias textContent = innerText; 3654 3655 /++ 3656 Gets the element's visible text, similar to how it would look assuming 3657 the document was HTML being displayed by a browser. This means it will 3658 attempt whitespace normalization (unless it is a `<pre>` tag), add `\n` 3659 characters for `<br>` tags, and I reserve the right to make it process 3660 additional css and tags in the future. 3661 3662 If you need specific output, use the more stable [textContent] property 3663 or iterate yourself with [tree] or a recursive function with [children]. 3664 3665 History: 3666 Added March 25, 2022 (dub v10.8) 3667 +/ 3668 string visibleText() const { 3669 return this.visibleTextHelper(this.tagName == "pre"); 3670 } 3671 3672 private string visibleTextHelper(bool pre) const { 3673 string result; 3674 foreach(thing; this.children) { 3675 if(thing.nodeType == NodeType.Text) 3676 result ~= pre ? thing.nodeValue : normalizeWhitespace(thing.nodeValue); 3677 else if(thing.tagName == "br") 3678 result ~= "\n"; 3679 else 3680 result ~= thing.visibleTextHelper(pre || thing.tagName == "pre"); 3681 } 3682 return result; 3683 } 3684 3685 /** 3686 Sets the inside text, replacing all children. You don't 3687 have to worry about entity encoding. 3688 */ 3689 @scriptable 3690 @property void innerText(string text) { 3691 selfClosed = false; 3692 Element e = new TextNode(parentDocument, text); 3693 children = [e]; 3694 e.parentNode = this; 3695 } 3696 3697 /** 3698 Strips this node out of the document, replacing it with the given text 3699 */ 3700 @property void outerText(string text) { 3701 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3702 } 3703 3704 /** 3705 Same result as innerText; the tag with all inner tags stripped out 3706 */ 3707 @property string outerText() const { 3708 return innerText; 3709 } 3710 3711 3712 /* ******************************* 3713 Miscellaneous 3714 *********************************/ 3715 3716 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3717 @property Element cloned() 3718 /+ 3719 out(ret) { 3720 // FIXME: not sure why these fail... 3721 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3722 assert(ret.tagName == this.tagName); 3723 } 3724 do { 3725 +/ 3726 { 3727 return this.cloneNode(true); 3728 } 3729 3730 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3731 Element cloneNode(bool deepClone) { 3732 auto e = Element.make(this.tagName); 3733 e.attributes = this.attributes.aadup; 3734 e.selfClosed = this.selfClosed; 3735 3736 if(deepClone) { 3737 foreach(child; children) { 3738 e.appendChild(child.cloneNode(true)); 3739 } 3740 } 3741 3742 3743 return e; 3744 } 3745 3746 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3747 string nodeValue() const { 3748 return ""; 3749 } 3750 3751 // should return int 3752 ///. 3753 @property int nodeType() const { 3754 return 1; 3755 } 3756 3757 3758 invariant () { 3759 debug assert(tagName.indexOf(" ") == -1); 3760 3761 // commented cuz it gets into recursive pain and eff dat. 3762 /+ 3763 if(children !is null) 3764 foreach(child; children) { 3765 // assert(parentNode !is null); 3766 assert(child !is null); 3767 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3768 assert(child !is this); 3769 //assert(child !is parentNode); 3770 } 3771 +/ 3772 3773 /+ 3774 // this isn't helping 3775 if(parent_ && parent_.asElement) { 3776 bool found = false; 3777 foreach(child; parent_.asElement.children) 3778 if(child is this) 3779 found = true; 3780 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3781 } 3782 +/ 3783 3784 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3785 if(parentNode !is null) { 3786 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3787 auto lol = cast(TextNode) this; 3788 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3789 } 3790 +/ 3791 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3792 // reason is so you can create these without needing a reference to the document 3793 } 3794 3795 /** 3796 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3797 an XML file. 3798 */ 3799 override string toString() const { 3800 return writeToAppender(); 3801 } 3802 3803 /++ 3804 Returns if the node would be printed to string as `<tag />` or `<tag></tag>`. In other words, if it has no non-empty text nodes and no element nodes. Please note that whitespace text nodes are NOT considered empty; `Html("<tag> </tag>").isEmpty == false`. 3805 3806 3807 The value is undefined if there are comment or processing instruction nodes. The current implementation returns false if it sees those, assuming the nodes haven't been stripped out during parsing. But I'm not married to the current implementation and reserve the right to change it without notice. 3808 3809 History: 3810 Added December 3, 2021 (dub v10.5) 3811 3812 +/ 3813 public bool isEmpty() const { 3814 foreach(child; this.children) { 3815 // any non-text node is of course not empty since that's a tag 3816 if(child.nodeType != NodeType.Text) 3817 return false; 3818 // or a text node is empty if it is is a null or empty string, so this length check fixes that 3819 if(child.nodeValue.length) 3820 return false; 3821 } 3822 3823 return true; 3824 } 3825 3826 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3827 if(indentWith is null) 3828 return null; 3829 3830 // at the top we don't have anything to really do 3831 //if(parent_ is null) 3832 //return null; 3833 3834 // I've used isEmpty before but this other check seems better.... 3835 //|| this.isEmpty()) 3836 3837 string s; 3838 3839 if(insertComments) s ~= "<!--"; 3840 s ~= "\n"; 3841 foreach(indent; 0 .. indentationLevel) 3842 s ~= indentWith; 3843 if(insertComments) s ~= "-->"; 3844 3845 return s; 3846 } 3847 3848 /++ 3849 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3850 for eyeball debugging. 3851 3852 $(PITFALL 3853 This function is not stable. Its interface and output may change without 3854 notice. The only promise I make is that it will continue to make a best- 3855 effort attempt at being useful for debugging by human eyes. 3856 3857 I have used it in the past for diffing html documents, but even then, it 3858 might change between versions. If it is useful, great, but beware; this 3859 use is at your own risk. 3860 ) 3861 3862 History: 3863 On November 19, 2021, I changed this to `final`. If you were overriding it, 3864 change our override to `toPrettyStringImpl` instead. It now just calls 3865 `toPrettyStringImpl.strip` to be an entry point for a stand-alone call. 3866 3867 If you are calling it as part of another implementation, you might want to 3868 change that call to `toPrettyStringImpl` as well. 3869 3870 I am NOT considering this a breaking change since this function is documented 3871 to only be used for eyeball debugging anyway, which means the exact format is 3872 not specified and the override behavior can generally not be relied upon. 3873 3874 (And I find it extremely unlikely anyone was subclassing anyway, but if you were, 3875 email me, and we'll see what we can do. I'd like to know at least.) 3876 3877 I reserve the right to make future changes in the future without considering 3878 them breaking as well. 3879 +/ 3880 final string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3881 return toPrettyStringImpl(insertComments, indentationLevel, indentWith).strip; 3882 } 3883 3884 string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3885 3886 // first step is to concatenate any consecutive text nodes to simplify 3887 // the white space analysis. this changes the tree! but i'm allowed since 3888 // the comment always says it changes the comments 3889 // 3890 // actually i'm not allowed cuz it is const so i will cheat and lie 3891 /+ 3892 TextNode lastTextChild = null; 3893 for(int a = 0; a < this.children.length; a++) { 3894 auto child = this.children[a]; 3895 if(auto tn = cast(TextNode) child) { 3896 if(lastTextChild) { 3897 lastTextChild.contents ~= tn.contents; 3898 for(int b = a; b < this.children.length - 1; b++) 3899 this.children[b] = this.children[b + 1]; 3900 this.children = this.children[0 .. $-1]; 3901 } else { 3902 lastTextChild = tn; 3903 } 3904 } else { 3905 lastTextChild = null; 3906 } 3907 } 3908 +/ 3909 3910 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 3911 3912 const(Element)[] children; 3913 3914 TextNode lastTextChild = null; 3915 for(int a = 0; a < this.children.length; a++) { 3916 auto child = this.children[a]; 3917 if(auto tn = cast(const(TextNode)) child) { 3918 if(lastTextChild !is null) { 3919 lastTextChild.contents ~= tn.contents; 3920 } else { 3921 lastTextChild = new TextNode(""); 3922 lastTextChild.parentNode = cast(Element) this; 3923 lastTextChild.contents ~= tn.contents; 3924 children ~= lastTextChild; 3925 } 3926 } else { 3927 lastTextChild = null; 3928 children ~= child; 3929 } 3930 } 3931 3932 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3933 3934 s ~= "<"; 3935 s ~= tagName; 3936 3937 // i sort these for consistent output. might be more legible 3938 // but especially it keeps it the same for diff purposes. 3939 import std.algorithm : sort; 3940 auto keys = sort(attributes.keys); 3941 foreach(n; keys) { 3942 auto v = attributes[n]; 3943 s ~= " "; 3944 s ~= n; 3945 s ~= "=\""; 3946 s ~= htmlEntitiesEncode(v); 3947 s ~= "\""; 3948 } 3949 3950 if(selfClosed){ 3951 s ~= " />"; 3952 return s; 3953 } 3954 3955 s ~= ">"; 3956 3957 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3958 // just keep them on the same line 3959 3960 if(isEmpty) { 3961 // no work needed, this is empty so don't indent just for a blank line 3962 } else if(children.length == 1 && children[0].isEmpty) { 3963 // just one empty one, can put it inline too 3964 s ~= children[0].toString(); 3965 } else if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 3966 foreach(child; children) { 3967 s ~= child.toString();//toPrettyString(false, 0, null); 3968 } 3969 } else { 3970 foreach(child; children) { 3971 assert(child !is null); 3972 3973 s ~= child.toPrettyStringImpl(insertComments, indentationLevel + 1, indentWith); 3974 } 3975 3976 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3977 } 3978 3979 s ~= "</"; 3980 s ~= tagName; 3981 s ~= ">"; 3982 3983 return s; 3984 } 3985 3986 /+ 3987 /// Writes out the opening tag only, if applicable. 3988 string writeTagOnly(Appender!string where = appender!string()) const { 3989 +/ 3990 3991 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3992 /// Note: the ordering of attributes in the string is undefined. 3993 /// Returns the string it creates. 3994 string writeToAppender(Appender!string where = appender!string()) const { 3995 assert(tagName !is null); 3996 3997 where.reserve((this.children.length + 1) * 512); 3998 3999 auto start = where.data.length; 4000 4001 where.put("<"); 4002 where.put(tagName); 4003 4004 import std.algorithm : sort; 4005 auto keys = sort(attributes.keys); 4006 foreach(n; keys) { 4007 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 4008 //assert(v !is null); 4009 where.put(" "); 4010 where.put(n); 4011 where.put("=\""); 4012 htmlEntitiesEncode(v, where); 4013 where.put("\""); 4014 } 4015 4016 if(selfClosed){ 4017 where.put(" />"); 4018 return where.data[start .. $]; 4019 } 4020 4021 where.put('>'); 4022 4023 innerHTML(where); 4024 4025 where.put("</"); 4026 where.put(tagName); 4027 where.put('>'); 4028 4029 return where.data[start .. $]; 4030 } 4031 4032 /** 4033 Returns a lazy range of all its children, recursively. 4034 */ 4035 @property ElementStream tree() { 4036 return new ElementStream(this); 4037 } 4038 4039 // I moved these from Form because they are generally useful. 4040 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 4041 // FIXME: add overloads for other label types... 4042 /++ 4043 Adds a form field to this element, normally a `<input>` but `type` can also be `"textarea"`. 4044 4045 This is fairly html specific and the label uses my style. I recommend you view the source before you use it to better understand what it does. 4046 +/ 4047 /// Tags: HTML, HTML5 4048 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4049 auto fs = this; 4050 auto i = fs.addChild("label"); 4051 4052 if(!(type == "checkbox" || type == "radio")) 4053 i.addChild("span", label); 4054 4055 Element input; 4056 if(type == "textarea") 4057 input = i.addChild("textarea"). 4058 setAttribute("name", name). 4059 setAttribute("rows", "6"); 4060 else 4061 input = i.addChild("input"). 4062 setAttribute("name", name). 4063 setAttribute("type", type); 4064 4065 if(type == "checkbox" || type == "radio") 4066 i.addChild("span", label); 4067 4068 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4069 fieldOptions.applyToElement(input); 4070 return i; 4071 } 4072 4073 /// ditto 4074 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4075 auto fs = this; 4076 auto i = fs.addChild("label"); 4077 i.addChild(label); 4078 Element input; 4079 if(type == "textarea") 4080 input = i.addChild("textarea"). 4081 setAttribute("name", name). 4082 setAttribute("rows", "6"); 4083 else 4084 input = i.addChild("input"). 4085 setAttribute("name", name). 4086 setAttribute("type", type); 4087 4088 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4089 fieldOptions.applyToElement(input); 4090 return i; 4091 } 4092 4093 /// ditto 4094 Element addField(string label, string name, FormFieldOptions fieldOptions) { 4095 return addField(label, name, "text", fieldOptions); 4096 } 4097 4098 /// ditto 4099 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4100 auto fs = this; 4101 auto i = fs.addChild("label"); 4102 i.addChild("span", label); 4103 auto sel = i.addChild("select").setAttribute("name", name); 4104 4105 foreach(k, opt; options) 4106 sel.addChild("option", opt, k); 4107 4108 // FIXME: implement requirements somehow 4109 4110 return i; 4111 } 4112 4113 /// ditto 4114 Element addSubmitButton(string label = null) { 4115 auto t = this; 4116 auto holder = t.addChild("div"); 4117 holder.addClass("submit-holder"); 4118 auto i = holder.addChild("input"); 4119 i.type = "submit"; 4120 if(label.length) 4121 i.value = label; 4122 return holder; 4123 } 4124 4125 } 4126 // computedStyle could argubaly be removed to bring size down 4127 //pragma(msg, __traits(classInstanceSize, Element)); 4128 //pragma(msg, Element.tupleof); 4129 4130 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4131 /++ 4132 Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4133 4134 History: 4135 On December 16, 2022, it disabled the special case treatment of `<script>` and `<style>` that [Document] 4136 does for HTML. To get the old behavior back, add `, true` to your constructor call. 4137 +/ 4138 /// Group: core_functionality 4139 class XmlDocument : Document { 4140 this(string data, bool enableHtmlHacks = false) { 4141 selfClosedElements = null; 4142 inlineElements = null; 4143 rawSourceElements = null; 4144 contentType = "text/xml; charset=utf-8"; 4145 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4146 4147 parseStrict(data, !enableHtmlHacks); 4148 } 4149 } 4150 4151 unittest { 4152 // FIXME: i should also make XmlDocument do different entities than just html too. 4153 auto str = "<html><style>foo {}</style><script>void function() { a < b; }</script></html>"; 4154 auto document = new Document(str, true, true); 4155 assert(document.requireSelector("style").children[0].tagName == "#raw"); 4156 assert(document.requireSelector("script").children[0].tagName == "#raw"); 4157 try { 4158 auto xml = new XmlDocument(str); 4159 assert(0); 4160 } catch(MarkupException e) { 4161 // failure expected, script special case is not valid XML without a dtd (which isn't here) 4162 } 4163 //assert(xml.requireSelector("style").children[0].tagName == "#raw"); 4164 //assert(xml.requireSelector("script").children[0].tagName == "#raw"); 4165 } 4166 4167 4168 4169 import std.string; 4170 4171 /* domconvenience follows { */ 4172 4173 /// finds comments that match the given txt. Case insensitive, strips whitespace. 4174 /// Group: core_functionality 4175 Element[] findComments(Document document, string txt) { 4176 return findComments(document.root, txt); 4177 } 4178 4179 /// ditto 4180 Element[] findComments(Element element, string txt) { 4181 txt = txt.strip().toLower(); 4182 Element[] ret; 4183 4184 foreach(comment; element.getElementsByTagName("#comment")) { 4185 string t = comment.nodeValue().strip().toLower(); 4186 if(t == txt) 4187 ret ~= comment; 4188 } 4189 4190 return ret; 4191 } 4192 4193 /// An option type that propagates null. See: [Element.optionSelector] 4194 /// Group: implementations 4195 struct MaybeNullElement(SomeElementType) { 4196 this(SomeElementType ele) { 4197 this.element = ele; 4198 } 4199 SomeElementType element; 4200 4201 /// Forwards to the element, wit a null check inserted that propagates null. 4202 auto opDispatch(string method, T...)(T args) { 4203 alias type = typeof(__traits(getMember, element, method)(args)); 4204 static if(is(type : Element)) { 4205 if(element is null) 4206 return MaybeNullElement!type(null); 4207 return __traits(getMember, element, method)(args); 4208 } else static if(is(type == string)) { 4209 if(element is null) 4210 return cast(string) null; 4211 return __traits(getMember, element, method)(args); 4212 } else static if(is(type == void)) { 4213 if(element is null) 4214 return; 4215 __traits(getMember, element, method)(args); 4216 } else { 4217 static assert(0); 4218 } 4219 } 4220 4221 /// Allows implicit casting to the wrapped element. 4222 alias element this; 4223 } 4224 4225 /++ 4226 A collection of elements which forwards methods to the children. 4227 +/ 4228 /// Group: implementations 4229 struct ElementCollection { 4230 /// 4231 this(Element e) { 4232 elements = [e]; 4233 } 4234 4235 /// 4236 this(Element e, string selector) { 4237 elements = e.querySelectorAll(selector); 4238 } 4239 4240 /// 4241 this(Element[] e) { 4242 elements = e; 4243 } 4244 4245 Element[] elements; 4246 //alias elements this; // let it implicitly convert to the underlying array 4247 4248 /// 4249 ElementCollection opIndex(string selector) { 4250 ElementCollection ec; 4251 foreach(e; elements) 4252 ec.elements ~= e.getElementsBySelector(selector); 4253 return ec; 4254 } 4255 4256 /// 4257 Element opIndex(int i) { 4258 return elements[i]; 4259 } 4260 4261 /// if you slice it, give the underlying array for easy forwarding of the 4262 /// collection to range expecting algorithms or looping over. 4263 Element[] opSlice() { 4264 return elements; 4265 } 4266 4267 /// And input range primitives so we can foreach over this 4268 void popFront() { 4269 elements = elements[1..$]; 4270 } 4271 4272 /// ditto 4273 Element front() { 4274 return elements[0]; 4275 } 4276 4277 /// ditto 4278 bool empty() { 4279 return !elements.length; 4280 } 4281 4282 /++ 4283 Collects strings from the collection, concatenating them together 4284 Kinda like running reduce and ~= on it. 4285 4286 --- 4287 document["p"].collect!"innerText"; 4288 --- 4289 +/ 4290 string collect(string method)(string separator = "") { 4291 string text; 4292 foreach(e; elements) { 4293 text ~= mixin("e." ~ method); 4294 text ~= separator; 4295 } 4296 return text; 4297 } 4298 4299 /// Forward method calls to each individual [Element|element] of the collection 4300 /// returns this so it can be chained. 4301 ElementCollection opDispatch(string name, T...)(T t) { 4302 foreach(e; elements) { 4303 mixin("e." ~ name)(t); 4304 } 4305 return this; 4306 } 4307 4308 /++ 4309 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 4310 +/ 4311 ElementCollection wrapIn(Element what) { 4312 foreach(e; elements) { 4313 e.wrapIn(what.cloneNode(false)); 4314 } 4315 4316 return this; 4317 } 4318 4319 /// Concatenates two ElementCollection together. 4320 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 4321 return ElementCollection(this.elements ~ rhs.elements); 4322 } 4323 } 4324 4325 4326 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 4327 /// Group: implementations 4328 mixin template JavascriptStyleDispatch() { 4329 /// 4330 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 4331 if(v !is null) 4332 return set(name, v); 4333 return get(name); 4334 } 4335 4336 /// 4337 string opIndex(string key) const { 4338 return get(key); 4339 } 4340 4341 /// 4342 string opIndexAssign(string value, string field) { 4343 return set(field, value); 4344 } 4345 4346 // FIXME: doesn't seem to work 4347 string* opBinary(string op)(string key) if(op == "in") { 4348 return key in fields; 4349 } 4350 } 4351 4352 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 4353 /// 4354 /// Do not create this object directly. 4355 /// Group: implementations 4356 struct DataSet { 4357 /// 4358 this(Element e) { 4359 this._element = e; 4360 } 4361 4362 private Element _element; 4363 /// 4364 string set(string name, string value) { 4365 _element.setAttribute("data-" ~ unCamelCase(name), value); 4366 return value; 4367 } 4368 4369 /// 4370 string get(string name) const { 4371 return _element.getAttribute("data-" ~ unCamelCase(name)); 4372 } 4373 4374 /// 4375 mixin JavascriptStyleDispatch!(); 4376 } 4377 4378 /// Proxy object for attributes which will replace the main opDispatch eventually 4379 /// Group: implementations 4380 struct AttributeSet { 4381 /// Generally, you shouldn't create this yourself, since you can use [Element.attrs] instead. 4382 this(Element e) { 4383 this._element = e; 4384 } 4385 4386 private Element _element; 4387 /++ 4388 Sets a `value` for attribute with `name`. If the attribute doesn't exist, this will create it, even if `value` is `null`. 4389 +/ 4390 string set(string name, string value) { 4391 _element.setAttribute(name, value); 4392 return value; 4393 } 4394 4395 /++ 4396 Provides support for testing presence of an attribute with the `in` operator. 4397 4398 History: 4399 Added December 16, 2020 (dub v10.10) 4400 +/ 4401 auto opBinaryRight(string op : "in")(string name) const 4402 { 4403 return name in _element.attributes; 4404 } 4405 /// 4406 unittest 4407 { 4408 auto doc = new XmlDocument(`<test attr="test"/>`); 4409 assert("attr" in doc.root.attrs); 4410 assert("test" !in doc.root.attrs); 4411 } 4412 4413 /++ 4414 Returns the value of attribute `name`, or `null` if doesn't exist 4415 +/ 4416 string get(string name) const { 4417 return _element.getAttribute(name); 4418 } 4419 4420 /// 4421 mixin JavascriptStyleDispatch!(); 4422 } 4423 4424 4425 4426 /// for style, i want to be able to set it with a string like a plain attribute, 4427 /// but also be able to do properties Javascript style. 4428 4429 /// Group: implementations 4430 struct ElementStyle { 4431 this(Element parent) { 4432 _element = parent; 4433 } 4434 4435 Element _element; 4436 4437 @property ref inout(string) _attribute() inout { 4438 auto s = "style" in _element.attributes; 4439 if(s is null) { 4440 auto e = cast() _element; // const_cast 4441 e.attributes["style"] = ""; // we need something to reference 4442 s = cast(inout) ("style" in e.attributes); 4443 } 4444 4445 assert(s !is null); 4446 return *s; 4447 } 4448 4449 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 4450 4451 string set(string name, string value) { 4452 if(name.length == 0) 4453 return value; 4454 if(name == "cssFloat") 4455 name = "float"; 4456 else 4457 name = unCamelCase(name); 4458 auto r = rules(); 4459 r[name] = value; 4460 4461 _attribute = ""; 4462 foreach(k, v; r) { 4463 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 4464 continue; 4465 if(_attribute.length) 4466 _attribute ~= " "; 4467 _attribute ~= k ~ ": " ~ v ~ ";"; 4468 } 4469 4470 _element.setAttribute("style", _attribute); // this is to trigger the observer call 4471 4472 return value; 4473 } 4474 string get(string name) const { 4475 if(name == "cssFloat") 4476 name = "float"; 4477 else 4478 name = unCamelCase(name); 4479 auto r = rules(); 4480 if(name in r) 4481 return r[name]; 4482 return null; 4483 } 4484 4485 string[string] rules() const { 4486 string[string] ret; 4487 foreach(rule; _attribute.split(";")) { 4488 rule = rule.strip(); 4489 if(rule.length == 0) 4490 continue; 4491 auto idx = rule.indexOf(":"); 4492 if(idx == -1) 4493 ret[rule] = ""; 4494 else { 4495 auto name = rule[0 .. idx].strip(); 4496 auto value = rule[idx + 1 .. $].strip(); 4497 4498 ret[name] = value; 4499 } 4500 } 4501 4502 return ret; 4503 } 4504 4505 mixin JavascriptStyleDispatch!(); 4506 } 4507 4508 /// Converts a camel cased propertyName to a css style dashed property-name 4509 string unCamelCase(string a) { 4510 string ret; 4511 foreach(c; a) 4512 if((c >= 'A' && c <= 'Z')) 4513 ret ~= "-" ~ toLower("" ~ c)[0]; 4514 else 4515 ret ~= c; 4516 return ret; 4517 } 4518 4519 /// Translates a css style property-name to a camel cased propertyName 4520 string camelCase(string a) { 4521 string ret; 4522 bool justSawDash = false; 4523 foreach(c; a) 4524 if(c == '-') { 4525 justSawDash = true; 4526 } else { 4527 if(justSawDash) { 4528 justSawDash = false; 4529 ret ~= toUpper("" ~ c); 4530 } else 4531 ret ~= c; 4532 } 4533 return ret; 4534 } 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 // domconvenience ends } 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 // @safe: 4557 4558 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 4559 // Instead, override writeToAppender(); 4560 4561 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 4562 4563 // Stripping them is useful for reading php as html.... but adding them 4564 // is good for building php. 4565 4566 // I need to maintain compatibility with the way it is now too. 4567 4568 import std.string; 4569 import std.exception; 4570 import std.uri; 4571 import std.array; 4572 import std.range; 4573 4574 //import std.stdio; 4575 4576 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 4577 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 4578 // most likely a typo so I say kill kill kill. 4579 4580 4581 /++ 4582 This might belong in another module, but it represents a file with a mime type and some data. 4583 Document implements this interface with type = text/html (see Document.contentType for more info) 4584 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 4585 +/ 4586 /// Group: bonus_functionality 4587 interface FileResource { 4588 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 4589 @property string contentType() const; 4590 /// the data 4591 immutable(ubyte)[] getData() const; 4592 /++ 4593 filename, return null if none 4594 4595 History: 4596 Added December 25, 2020 4597 +/ 4598 @property string filename() const; 4599 } 4600 4601 4602 4603 4604 ///. 4605 /// Group: bonus_functionality 4606 enum NodeType { Text = 3 } 4607 4608 4609 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 4610 /// Group: core_functionality 4611 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 4612 in {} 4613 out(ret) { assert(ret !is null); } 4614 do { 4615 auto ret = cast(T) e; 4616 if(ret is null) 4617 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 4618 return ret; 4619 } 4620 4621 4622 ///. 4623 /// Group: core_functionality 4624 class DocumentFragment : Element { 4625 ///. 4626 this(Document _parentDocument) { 4627 tagName = "#fragment"; 4628 super(_parentDocument); 4629 } 4630 4631 /++ 4632 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4633 4634 Since: March 29, 2018 (or git tagged v2.1.0) 4635 +/ 4636 this(Html html) { 4637 this(null); 4638 4639 this.innerHTML = html.source; 4640 } 4641 4642 ///. 4643 override string writeToAppender(Appender!string where = appender!string()) const { 4644 return this.innerHTML(where); 4645 } 4646 4647 override string toPrettyStringImpl(bool insertComments, int indentationLevel, string indentWith) const { 4648 string s; 4649 foreach(child; children) 4650 s ~= child.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 4651 return s; 4652 } 4653 4654 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4655 /* 4656 override inout(Element) parentNode() inout { 4657 return children.length ? children[0].parentNode : null; 4658 } 4659 */ 4660 /+ 4661 override Element parentNode(Element p) { 4662 this.parentNode = p; 4663 foreach(child; children) 4664 child.parentNode = p; 4665 return p; 4666 } 4667 +/ 4668 } 4669 4670 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4671 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4672 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4673 /// 4674 /// The output parameter can be given to append to an existing buffer. You don't have to 4675 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4676 /// Group: core_functionality 4677 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4678 // if there's no entities, we can save a lot of time by not bothering with the 4679 // decoding loop. This check cuts the net toString time by better than half in my test. 4680 // let me know if it made your tests worse though, since if you use an entity in just about 4681 // every location, the check will add time... but I suspect the average experience is like mine 4682 // since the check gives up as soon as it can anyway. 4683 4684 bool shortcut = true; 4685 foreach(char c; data) { 4686 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4687 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4688 shortcut = false; // there's actual work to be done 4689 break; 4690 } 4691 } 4692 4693 if(shortcut) { 4694 output.put(data); 4695 return data; 4696 } 4697 4698 auto start = output.data.length; 4699 4700 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4701 4702 foreach(dchar d; data) { 4703 if(d == '&') 4704 output.put("&"); 4705 else if (d == '<') 4706 output.put("<"); 4707 else if (d == '>') 4708 output.put(">"); 4709 else if (d == '\"') 4710 output.put("""); 4711 // else if (d == '\'') 4712 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4713 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4714 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4715 // idk about apostrophes though. Might be worth it, might not. 4716 else if (!encodeNonAscii || (d < 128 && d > 0)) 4717 output.put(d); 4718 else 4719 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4720 } 4721 4722 //assert(output !is null); // this fails on empty attributes..... 4723 return output.data[start .. $]; 4724 4725 // data = data.replace("\u00a0", " "); 4726 } 4727 4728 /// An alias for htmlEntitiesEncode; it works for xml too 4729 /// Group: core_functionality 4730 string xmlEntitiesEncode(string data) { 4731 return htmlEntitiesEncode(data); 4732 } 4733 4734 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4735 /// Group: core_functionality 4736 dchar parseEntity(in dchar[] entity) { 4737 4738 char[128] buffer; 4739 int bpos; 4740 foreach(char c; entity[1 .. $-1]) 4741 buffer[bpos++] = c; 4742 char[] entityAsString = buffer[0 .. bpos]; 4743 4744 int min = 0; 4745 int max = cast(int) availableEntities.length; 4746 4747 keep_looking: 4748 if(min + 1 < max) { 4749 int spot = (max - min) / 2 + min; 4750 if(availableEntities[spot] == entityAsString) { 4751 return availableEntitiesValues[spot]; 4752 } else if(entityAsString < availableEntities[spot]) { 4753 max = spot; 4754 goto keep_looking; 4755 } else { 4756 min = spot; 4757 goto keep_looking; 4758 } 4759 } 4760 4761 switch(entity[1..$-1]) { 4762 case "quot": 4763 return '"'; 4764 case "apos": 4765 return '\''; 4766 case "lt": 4767 return '<'; 4768 case "gt": 4769 return '>'; 4770 case "amp": 4771 return '&'; 4772 // the next are html rather than xml 4773 4774 // and handling numeric entities 4775 default: 4776 if(entity[1] == '#') { 4777 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 4778 auto hex = entity[3..$-1]; 4779 4780 auto p = intFromHex(to!string(hex).toLower()); 4781 return cast(dchar) p; 4782 } else { 4783 auto decimal = entity[2..$-1]; 4784 4785 // dealing with broken html entities 4786 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 4787 decimal = decimal[1 .. $]; 4788 4789 while(decimal.length && (decimal[$-1] < '0' || decimal[$-1] > '9')) 4790 decimal = decimal[0 .. $ - 1]; 4791 4792 if(decimal.length == 0) 4793 return ' '; // this is really broken html 4794 // done with dealing with broken stuff 4795 4796 auto p = std.conv.to!int(decimal); 4797 return cast(dchar) p; 4798 } 4799 } else 4800 return '\ufffd'; // replacement character diamond thing 4801 } 4802 4803 assert(0); 4804 } 4805 4806 unittest { 4807 // not in the binary search 4808 assert(parseEntity("""d) == '"'); 4809 4810 // numeric value 4811 assert(parseEntity("Դ") == '\u0534'); 4812 4813 // not found at all 4814 assert(parseEntity("&asdasdasd;"d) == '\ufffd'); 4815 4816 // random values in the bin search 4817 assert(parseEntity("	"d) == '\t'); 4818 assert(parseEntity("»"d) == '\»'); 4819 4820 // near the middle and edges of the bin search 4821 assert(parseEntity("𝒶"d) == '\U0001d4b6'); 4822 assert(parseEntity("*"d) == '\u002a'); 4823 assert(parseEntity("Æ"d) == '\u00c6'); 4824 assert(parseEntity("‌"d) == '\u200c'); 4825 } 4826 4827 import std.utf; 4828 import std.stdio; 4829 4830 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 4831 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 4832 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 4833 /// Group: core_functionality 4834 string htmlEntitiesDecode(string data, bool strict = false) { 4835 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 4836 if(data.indexOf("&") == -1) // all html entities begin with & 4837 return data; // if there are no entities in here, we can return the original slice and save some time 4838 4839 char[] a; // this seems to do a *better* job than appender! 4840 4841 char[4] buffer; 4842 4843 bool tryingEntity = false; 4844 bool tryingNumericEntity = false; 4845 bool tryingHexEntity = false; 4846 dchar[16] entityBeingTried; 4847 int entityBeingTriedLength = 0; 4848 int entityAttemptIndex = 0; 4849 4850 foreach(dchar ch; data) { 4851 if(tryingEntity) { 4852 entityAttemptIndex++; 4853 entityBeingTried[entityBeingTriedLength++] = ch; 4854 4855 if(entityBeingTriedLength == 2 && ch == '#') { 4856 tryingNumericEntity = true; 4857 continue; 4858 } else if(tryingNumericEntity && entityBeingTriedLength == 3 && ch == 'x') { 4859 tryingHexEntity = true; 4860 continue; 4861 } 4862 4863 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 4864 if(ch == '&') { 4865 if(strict) 4866 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4867 4868 // if not strict, let's try to parse both. 4869 4870 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") { 4871 a ~= "&"; // double amp means keep the first one, still try to parse the next one 4872 } else { 4873 auto ch2 = parseEntity(entityBeingTried[0 .. entityBeingTriedLength]); 4874 if(ch2 == '\ufffd') { // either someone put this in intentionally (lol) or we failed to get it 4875 // but either way, just abort and keep the plain text 4876 foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now 4877 a ~= c; 4878 } else { 4879 a ~= buffer[0.. std.utf.encode(buffer, ch2)]; 4880 } 4881 } 4882 4883 // tryingEntity is still true 4884 goto new_entity; 4885 } else 4886 if(ch == ';') { 4887 tryingEntity = false; 4888 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 4889 } else if(ch == ' ') { 4890 // e.g. you & i 4891 if(strict) 4892 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4893 else { 4894 tryingEntity = false; 4895 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]); 4896 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 4897 } 4898 } else { 4899 if(tryingNumericEntity) { 4900 if(ch < '0' || ch > '9') { 4901 if(tryingHexEntity) { 4902 if(ch < 'A') 4903 goto trouble; 4904 if(ch > 'Z' && ch < 'a') 4905 goto trouble; 4906 if(ch > 'z') 4907 goto trouble; 4908 } else { 4909 trouble: 4910 if(strict) 4911 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4912 tryingEntity = false; 4913 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 4914 a ~= ch; 4915 continue; 4916 } 4917 } 4918 } 4919 4920 4921 if(entityAttemptIndex >= 9) { 4922 done: 4923 if(strict) 4924 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4925 else { 4926 tryingEntity = false; 4927 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 4928 } 4929 } 4930 } 4931 } else { 4932 if(ch == '&') { 4933 new_entity: 4934 tryingEntity = true; 4935 tryingNumericEntity = false; 4936 tryingHexEntity = false; 4937 entityBeingTriedLength = 0; 4938 entityBeingTried[entityBeingTriedLength++] = ch; 4939 entityAttemptIndex = 0; 4940 } else { 4941 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 4942 } 4943 } 4944 } 4945 4946 if(tryingEntity) { 4947 if(strict) 4948 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 4949 4950 // otherwise, let's try to recover, at least so we don't drop any data 4951 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 4952 // FIXME: what if we have "cool &"? should we try to parse it? 4953 } 4954 4955 return cast(string) a; // assumeUnique is actually kinda slow, lol 4956 } 4957 4958 unittest { 4959 // error recovery 4960 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // unterminated turned back to thing 4961 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // semi-terminated... parse and carry on (is this really sane?) 4962 assert(htmlEntitiesDecode("loc=en_us&tracknum=111") == "loc=en_us&tracknum=111"); // a bit of both, seen in a real life email 4963 assert(htmlEntitiesDecode("& test") == "& test"); // unterminated, just abort 4964 4965 // in strict mode all of these should fail 4966 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 4967 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 4968 try { assert(htmlEntitiesDecode("loc=en_us&tracknum=111", true) == "<&foo"); assert(0); } catch(Exception e) { } 4969 try { assert(htmlEntitiesDecode("& test", true) == "& test"); assert(0); } catch(Exception e) { } 4970 4971 // correct cases that should pass the same in strict or loose mode 4972 foreach(strict; [false, true]) { 4973 assert(htmlEntitiesDecode("&hello» win", strict) == "&hello\» win"); 4974 } 4975 } 4976 4977 /// Group: implementations 4978 abstract class SpecialElement : Element { 4979 this(Document _parentDocument) { 4980 super(_parentDocument); 4981 } 4982 4983 ///. 4984 override Element appendChild(Element e) { 4985 assert(0, "Cannot append to a special node"); 4986 } 4987 4988 ///. 4989 @property override int nodeType() const { 4990 return 100; 4991 } 4992 } 4993 4994 ///. 4995 /// Group: implementations 4996 class RawSource : SpecialElement { 4997 ///. 4998 this(Document _parentDocument, string s) { 4999 super(_parentDocument); 5000 source = s; 5001 tagName = "#raw"; 5002 } 5003 5004 ///. 5005 override string nodeValue() const { 5006 return this.toString(); 5007 } 5008 5009 ///. 5010 override string writeToAppender(Appender!string where = appender!string()) const { 5011 where.put(source); 5012 return source; 5013 } 5014 5015 override string toPrettyStringImpl(bool, int, string) const { 5016 return source; 5017 } 5018 5019 5020 override RawSource cloneNode(bool deep) { 5021 return new RawSource(parentDocument, source); 5022 } 5023 5024 ///. 5025 string source; 5026 } 5027 5028 /// Group: implementations 5029 abstract class ServerSideCode : SpecialElement { 5030 this(Document _parentDocument, string type) { 5031 super(_parentDocument); 5032 tagName = "#" ~ type; 5033 } 5034 5035 ///. 5036 override string nodeValue() const { 5037 return this.source; 5038 } 5039 5040 ///. 5041 override string writeToAppender(Appender!string where = appender!string()) const { 5042 auto start = where.data.length; 5043 where.put("<"); 5044 where.put(source); 5045 where.put(">"); 5046 return where.data[start .. $]; 5047 } 5048 5049 override string toPrettyStringImpl(bool, int, string) const { 5050 return "<" ~ source ~ ">"; 5051 } 5052 5053 ///. 5054 string source; 5055 } 5056 5057 ///. 5058 /// Group: implementations 5059 class PhpCode : ServerSideCode { 5060 ///. 5061 this(Document _parentDocument, string s) { 5062 super(_parentDocument, "php"); 5063 source = s; 5064 } 5065 5066 override PhpCode cloneNode(bool deep) { 5067 return new PhpCode(parentDocument, source); 5068 } 5069 } 5070 5071 ///. 5072 /// Group: implementations 5073 class AspCode : ServerSideCode { 5074 ///. 5075 this(Document _parentDocument, string s) { 5076 super(_parentDocument, "asp"); 5077 source = s; 5078 } 5079 5080 override AspCode cloneNode(bool deep) { 5081 return new AspCode(parentDocument, source); 5082 } 5083 } 5084 5085 ///. 5086 /// Group: implementations 5087 class BangInstruction : SpecialElement { 5088 ///. 5089 this(Document _parentDocument, string s) { 5090 super(_parentDocument); 5091 source = s; 5092 tagName = "#bpi"; 5093 } 5094 5095 ///. 5096 override string nodeValue() const { 5097 return this.source; 5098 } 5099 5100 override BangInstruction cloneNode(bool deep) { 5101 return new BangInstruction(parentDocument, source); 5102 } 5103 5104 ///. 5105 override string writeToAppender(Appender!string where = appender!string()) const { 5106 auto start = where.data.length; 5107 where.put("<!"); 5108 where.put(source); 5109 where.put(">"); 5110 return where.data[start .. $]; 5111 } 5112 5113 override string toPrettyStringImpl(bool, int, string) const { 5114 string s; 5115 s ~= "<!"; 5116 s ~= source; 5117 s ~= ">"; 5118 return s; 5119 } 5120 5121 ///. 5122 string source; 5123 } 5124 5125 ///. 5126 /// Group: implementations 5127 class QuestionInstruction : SpecialElement { 5128 ///. 5129 this(Document _parentDocument, string s) { 5130 super(_parentDocument); 5131 source = s; 5132 tagName = "#qpi"; 5133 } 5134 5135 override QuestionInstruction cloneNode(bool deep) { 5136 return new QuestionInstruction(parentDocument, source); 5137 } 5138 5139 ///. 5140 override string nodeValue() const { 5141 return this.source; 5142 } 5143 5144 ///. 5145 override string writeToAppender(Appender!string where = appender!string()) const { 5146 auto start = where.data.length; 5147 where.put("<"); 5148 where.put(source); 5149 where.put(">"); 5150 return where.data[start .. $]; 5151 } 5152 5153 override string toPrettyStringImpl(bool, int, string) const { 5154 string s; 5155 s ~= "<"; 5156 s ~= source; 5157 s ~= ">"; 5158 return s; 5159 } 5160 5161 5162 ///. 5163 string source; 5164 } 5165 5166 ///. 5167 /// Group: implementations 5168 class HtmlComment : SpecialElement { 5169 ///. 5170 this(Document _parentDocument, string s) { 5171 super(_parentDocument); 5172 source = s; 5173 tagName = "#comment"; 5174 } 5175 5176 override HtmlComment cloneNode(bool deep) { 5177 return new HtmlComment(parentDocument, source); 5178 } 5179 5180 ///. 5181 override string nodeValue() const { 5182 return this.source; 5183 } 5184 5185 ///. 5186 override string writeToAppender(Appender!string where = appender!string()) const { 5187 auto start = where.data.length; 5188 where.put("<!--"); 5189 where.put(source); 5190 where.put("-->"); 5191 return where.data[start .. $]; 5192 } 5193 5194 override string toPrettyStringImpl(bool, int, string) const { 5195 string s; 5196 s ~= "<!--"; 5197 s ~= source; 5198 s ~= "-->"; 5199 return s; 5200 } 5201 5202 5203 ///. 5204 string source; 5205 } 5206 5207 5208 5209 5210 ///. 5211 /// Group: implementations 5212 class TextNode : Element { 5213 public: 5214 ///. 5215 this(Document _parentDocument, string e) { 5216 super(_parentDocument); 5217 contents = e; 5218 tagName = "#text"; 5219 } 5220 5221 /// 5222 this(string e) { 5223 this(null, e); 5224 } 5225 5226 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5227 5228 ///. 5229 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5230 auto e = new TextNode(_parentDocument, ""); 5231 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5232 return e; 5233 } 5234 5235 ///. 5236 override @property TextNode cloneNode(bool deep) { 5237 auto n = new TextNode(parentDocument, contents); 5238 return n; 5239 } 5240 5241 ///. 5242 override string nodeValue() const { 5243 return this.contents; //toString(); 5244 } 5245 5246 ///. 5247 @property override int nodeType() const { 5248 return NodeType.Text; 5249 } 5250 5251 ///. 5252 override string writeToAppender(Appender!string where = appender!string()) const { 5253 string s; 5254 if(contents.length) 5255 s = htmlEntitiesEncode(contents, where); 5256 else 5257 s = ""; 5258 5259 assert(s !is null); 5260 return s; 5261 } 5262 5263 override string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5264 string s; 5265 5266 string contents = this.contents; 5267 // we will first collapse the whitespace per html 5268 // sort of. note this can break stuff yo!!!! 5269 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5270 string n = ""; 5271 bool lastWasWhitespace = indentationLevel > 0; 5272 foreach(char c; contents) { 5273 if(c.isSimpleWhite) { 5274 if(!lastWasWhitespace) 5275 n ~= ' '; 5276 lastWasWhitespace = true; 5277 } else { 5278 n ~= c; 5279 lastWasWhitespace = false; 5280 } 5281 } 5282 5283 contents = n; 5284 } 5285 5286 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5287 contents = contents.strip; 5288 } 5289 5290 auto e = htmlEntitiesEncode(contents); 5291 import std.algorithm.iteration : splitter; 5292 bool first = true; 5293 foreach(line; splitter(e, "\n")) { 5294 if(first) { 5295 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5296 first = false; 5297 } else { 5298 s ~= "\n"; 5299 if(insertComments) 5300 s ~= "<!--"; 5301 foreach(i; 0 .. indentationLevel) 5302 s ~= "\t"; 5303 if(insertComments) 5304 s ~= "-->"; 5305 } 5306 s ~= line.stripRight; 5307 } 5308 return s; 5309 } 5310 5311 ///. 5312 override Element appendChild(Element e) { 5313 assert(0, "Cannot append to a text node"); 5314 } 5315 5316 ///. 5317 string contents; 5318 // alias contents content; // I just mistype this a lot, 5319 } 5320 5321 /** 5322 There are subclasses of Element offering improved helper 5323 functions for the element in HTML. 5324 */ 5325 5326 /++ 5327 Represents a HTML link. This provides some convenience methods for manipulating query strings, but otherwise is sthe same Element interface. 5328 5329 Please note this object may not be used for all `<a>` tags. 5330 +/ 5331 /// Group: implementations 5332 class Link : Element { 5333 5334 /++ 5335 Constructs `<a href="that href">that text</a>`. 5336 +/ 5337 this(string href, string text) { 5338 super("a"); 5339 setAttribute("href", href); 5340 innerText = text; 5341 } 5342 5343 /// ditto 5344 this(Document _parentDocument) { 5345 super(_parentDocument); 5346 this.tagName = "a"; 5347 } 5348 5349 /+ 5350 /// Returns everything in the href EXCEPT the query string 5351 @property string targetSansQuery() { 5352 5353 } 5354 5355 ///. 5356 @property string domainName() { 5357 5358 } 5359 5360 ///. 5361 @property string path 5362 +/ 5363 /// This gets a variable from the URL's query string. 5364 string getValue(string name) { 5365 auto vars = variablesHash(); 5366 if(name in vars) 5367 return vars[name]; 5368 return null; 5369 } 5370 5371 private string[string] variablesHash() { 5372 string href = getAttribute("href"); 5373 if(href is null) 5374 return null; 5375 5376 auto ques = href.indexOf("?"); 5377 string str = ""; 5378 if(ques != -1) { 5379 str = href[ques+1..$]; 5380 5381 auto fragment = str.indexOf("#"); 5382 if(fragment != -1) 5383 str = str[0..fragment]; 5384 } 5385 5386 string[] variables = str.split("&"); 5387 5388 string[string] hash; 5389 5390 foreach(var; variables) { 5391 auto index = var.indexOf("="); 5392 if(index == -1) 5393 hash[var] = ""; 5394 else { 5395 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 5396 } 5397 } 5398 5399 return hash; 5400 } 5401 5402 /// Replaces all the stuff after a ? in the link at once with the given assoc array values. 5403 /*private*/ void updateQueryString(string[string] vars) { 5404 string href = getAttribute("href"); 5405 5406 auto question = href.indexOf("?"); 5407 if(question != -1) 5408 href = href[0..question]; 5409 5410 string frag = ""; 5411 auto fragment = href.indexOf("#"); 5412 if(fragment != -1) { 5413 frag = href[fragment..$]; 5414 href = href[0..fragment]; 5415 } 5416 5417 string query = "?"; 5418 bool first = true; 5419 foreach(name, value; vars) { 5420 if(!first) 5421 query ~= "&"; 5422 else 5423 first = false; 5424 5425 query ~= encodeComponent(name); 5426 if(value.length) 5427 query ~= "=" ~ encodeComponent(value); 5428 } 5429 5430 if(query != "?") 5431 href ~= query; 5432 5433 href ~= frag; 5434 5435 setAttribute("href", href); 5436 } 5437 5438 /// Sets or adds the variable with the given name to the given value 5439 /// It automatically URI encodes the values and takes care of the ? and &. 5440 override void setValue(string name, string variable) { 5441 auto vars = variablesHash(); 5442 vars[name] = variable; 5443 5444 updateQueryString(vars); 5445 } 5446 5447 /// Removes the given variable from the query string 5448 void removeValue(string name) { 5449 auto vars = variablesHash(); 5450 vars.remove(name); 5451 5452 updateQueryString(vars); 5453 } 5454 5455 /* 5456 ///. 5457 override string toString() { 5458 5459 } 5460 5461 ///. 5462 override string getAttribute(string name) { 5463 if(name == "href") { 5464 5465 } else 5466 return super.getAttribute(name); 5467 } 5468 */ 5469 } 5470 5471 /++ 5472 Represents a HTML form. This slightly specializes Element to add a few more convenience methods for adding and extracting form data. 5473 5474 Please note this object may not be used for all `<form>` tags. 5475 +/ 5476 /// Group: implementations 5477 class Form : Element { 5478 5479 ///. 5480 this(Document _parentDocument) { 5481 super(_parentDocument); 5482 tagName = "form"; 5483 } 5484 5485 /// Overrides of the base class implementations that more confirm to *my* conventions when writing form html. 5486 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 5487 auto t = this.querySelector("fieldset div"); 5488 if(t is null) 5489 return super.addField(label, name, type, fieldOptions); 5490 else 5491 return t.addField(label, name, type, fieldOptions); 5492 } 5493 5494 /// ditto 5495 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 5496 auto type = "text"; 5497 auto t = this.querySelector("fieldset div"); 5498 if(t is null) 5499 return super.addField(label, name, type, fieldOptions); 5500 else 5501 return t.addField(label, name, type, fieldOptions); 5502 } 5503 5504 /// ditto 5505 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 5506 auto t = this.querySelector("fieldset div"); 5507 if(t is null) 5508 return super.addField(label, name, options, fieldOptions); 5509 else 5510 return t.addField(label, name, options, fieldOptions); 5511 } 5512 5513 /// ditto 5514 override void setValue(string field, string value) { 5515 setValue(field, value, true); 5516 } 5517 5518 // FIXME: doesn't handle arrays; multiple fields can have the same name 5519 5520 /// Set's the form field's value. For input boxes, this sets the value attribute. For 5521 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 5522 /// the checked/selected attribute from all, and adds it to the one matching the value. 5523 /// For checkboxes, if the value is non-null and not empty, it checks the box. 5524 5525 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 5526 /// Otherwise, it makes a new input with type=hidden to keep the value. 5527 void setValue(string field, string value, bool makeNew) { 5528 auto eles = getField(field); 5529 if(eles.length == 0) { 5530 if(makeNew) { 5531 addInput(field, value); 5532 return; 5533 } else 5534 throw new Exception("form field does not exist"); 5535 } 5536 5537 if(eles.length == 1) { 5538 auto e = eles[0]; 5539 switch(e.tagName) { 5540 default: assert(0); 5541 case "textarea": 5542 e.innerText = value; 5543 break; 5544 case "input": 5545 string type = e.getAttribute("type"); 5546 if(type is null) { 5547 e.value = value; 5548 return; 5549 } 5550 switch(type) { 5551 case "checkbox": 5552 case "radio": 5553 if(value.length && value != "false") 5554 e.setAttribute("checked", "checked"); 5555 else 5556 e.removeAttribute("checked"); 5557 break; 5558 default: 5559 e.value = value; 5560 return; 5561 } 5562 break; 5563 case "select": 5564 bool found = false; 5565 foreach(child; e.tree) { 5566 if(child.tagName != "option") 5567 continue; 5568 string val = child.getAttribute("value"); 5569 if(val is null) 5570 val = child.innerText; 5571 if(val == value) { 5572 child.setAttribute("selected", "selected"); 5573 found = true; 5574 } else 5575 child.removeAttribute("selected"); 5576 } 5577 5578 if(!found) { 5579 e.addChild("option", value) 5580 .setAttribute("selected", "selected"); 5581 } 5582 break; 5583 } 5584 } else { 5585 // assume radio boxes 5586 foreach(e; eles) { 5587 string val = e.getAttribute("value"); 5588 //if(val is null) 5589 // throw new Exception("don't know what to do with radio boxes with null value"); 5590 if(val == value) 5591 e.setAttribute("checked", "checked"); 5592 else 5593 e.removeAttribute("checked"); 5594 } 5595 } 5596 } 5597 5598 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 5599 /// it makes no attempt to find and modify existing elements in the form to the new values. 5600 void addValueArray(string key, string[] arrayOfValues) { 5601 foreach(arr; arrayOfValues) 5602 addChild("input", key, arr); 5603 } 5604 5605 /// Gets the value of the field; what would be given if it submitted right now. (so 5606 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 5607 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 5608 string getValue(string field) { 5609 auto eles = getField(field); 5610 if(eles.length == 0) 5611 return ""; 5612 if(eles.length == 1) { 5613 auto e = eles[0]; 5614 switch(e.tagName) { 5615 default: assert(0); 5616 case "input": 5617 if(e.type == "checkbox") { 5618 if(e.checked) 5619 return e.value.length ? e.value : "checked"; 5620 return ""; 5621 } else 5622 return e.value; 5623 case "textarea": 5624 return e.innerText; 5625 case "select": 5626 foreach(child; e.tree) { 5627 if(child.tagName != "option") 5628 continue; 5629 if(child.selected) 5630 return child.value; 5631 } 5632 break; 5633 } 5634 } else { 5635 // assuming radio 5636 foreach(e; eles) { 5637 if(e.checked) 5638 return e.value; 5639 } 5640 } 5641 5642 return ""; 5643 } 5644 5645 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 5646 /++ 5647 Returns the form's contents in application/x-www-form-urlencoded format. 5648 5649 Bugs: 5650 Doesn't handle repeated elements of the same name nor files. 5651 +/ 5652 string getPostableData() { 5653 bool[string] namesDone; 5654 5655 string ret; 5656 bool outputted = false; 5657 5658 foreach(e; getElementsBySelector("[name]")) { 5659 if(e.name in namesDone) 5660 continue; 5661 5662 if(outputted) 5663 ret ~= "&"; 5664 else 5665 outputted = true; 5666 5667 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 5668 5669 namesDone[e.name] = true; 5670 } 5671 5672 return ret; 5673 } 5674 5675 /// Gets the actual elements with the given name 5676 Element[] getField(string name) { 5677 Element[] ret; 5678 foreach(e; tree) { 5679 if(e.name == name) 5680 ret ~= e; 5681 } 5682 return ret; 5683 } 5684 5685 /// Grabs the <label> with the given for tag, if there is one. 5686 Element getLabel(string forId) { 5687 foreach(e; tree) 5688 if(e.tagName == "label" && e.getAttribute("for") == forId) 5689 return e; 5690 return null; 5691 } 5692 5693 /// Adds a new INPUT field to the end of the form with the given attributes. 5694 Element addInput(string name, string value, string type = "hidden") { 5695 auto e = new Element(parentDocument, "input", null, true); 5696 e.name = name; 5697 e.value = value; 5698 e.type = type; 5699 5700 appendChild(e); 5701 5702 return e; 5703 } 5704 5705 /// Removes the given field from the form. It finds the element and knocks it right out. 5706 void removeField(string name) { 5707 foreach(e; getField(name)) 5708 e.parentNode.removeChild(e); 5709 } 5710 5711 /+ 5712 /// Returns all form members. 5713 @property Element[] elements() { 5714 5715 } 5716 5717 ///. 5718 string opDispatch(string name)(string v = null) 5719 // filter things that should actually be attributes on the form 5720 if( name != "method" && name != "action" && name != "enctype" 5721 && name != "style" && name != "name" && name != "id" && name != "class") 5722 { 5723 5724 } 5725 +/ 5726 /+ 5727 void submit() { 5728 // take its elements and submit them through http 5729 } 5730 +/ 5731 } 5732 5733 import std.conv; 5734 5735 /++ 5736 Represents a HTML table. Has some convenience methods for working with tabular data. 5737 +/ 5738 /// Group: implementations 5739 class Table : Element { 5740 5741 /// You can make this yourself but you'd generally get one of these object out of a html parse or [Element.make] call. 5742 this(Document _parentDocument) { 5743 super(_parentDocument); 5744 tagName = "table"; 5745 } 5746 5747 /++ 5748 Creates an element with the given type and content. The argument can be an Element, Html, or other data which is converted to text with `to!string` 5749 5750 The element is $(I not) appended to the table. 5751 +/ 5752 Element th(T)(T t) { 5753 Element e; 5754 if(parentDocument !is null) 5755 e = parentDocument.createElement("th"); 5756 else 5757 e = Element.make("th"); 5758 static if(is(T == Html)) 5759 e.innerHTML = t; 5760 else static if(is(T : Element)) 5761 e.appendChild(t); 5762 else 5763 e.innerText = to!string(t); 5764 return e; 5765 } 5766 5767 /// ditto 5768 Element td(T)(T t) { 5769 Element e; 5770 if(parentDocument !is null) 5771 e = parentDocument.createElement("td"); 5772 else 5773 e = Element.make("td"); 5774 static if(is(T == Html)) 5775 e.innerHTML = t; 5776 else static if(is(T : Element)) 5777 e.appendChild(t); 5778 else 5779 e.innerText = to!string(t); 5780 return e; 5781 } 5782 5783 /++ 5784 Passes each argument to the [th] method for `appendHeaderRow` or [td] method for the others, appends them all to the `<tbody>` element for `appendRow`, `<thead>` element for `appendHeaderRow`, or a `<tfoot>` element for `appendFooterRow`, and ensures it is appended it to the table. 5785 +/ 5786 Element appendHeaderRow(T...)(T t) { 5787 return appendRowInternal("th", "thead", t); 5788 } 5789 5790 /// ditto 5791 Element appendFooterRow(T...)(T t) { 5792 return appendRowInternal("td", "tfoot", t); 5793 } 5794 5795 /// ditto 5796 Element appendRow(T...)(T t) { 5797 return appendRowInternal("td", "tbody", t); 5798 } 5799 5800 /++ 5801 Takes each argument as a class name and calls [Element.addClass] for each element in the column associated with that index. 5802 5803 Please note this does not use the html `<col>` element. 5804 +/ 5805 void addColumnClasses(string[] classes...) { 5806 auto grid = getGrid(); 5807 foreach(row; grid) 5808 foreach(i, cl; classes) { 5809 if(cl.length) 5810 if(i < row.length) 5811 row[i].addClass(cl); 5812 } 5813 } 5814 5815 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 5816 Element row = Element.make("tr"); 5817 5818 foreach(e; t) { 5819 static if(is(typeof(e) : Element)) { 5820 if(e.tagName == "td" || e.tagName == "th") 5821 row.appendChild(e); 5822 else { 5823 Element a = Element.make(innerType); 5824 5825 a.appendChild(e); 5826 5827 row.appendChild(a); 5828 } 5829 } else static if(is(typeof(e) == Html)) { 5830 Element a = Element.make(innerType); 5831 a.innerHTML = e.source; 5832 row.appendChild(a); 5833 } else static if(is(typeof(e) == Element[])) { 5834 Element a = Element.make(innerType); 5835 foreach(ele; e) 5836 a.appendChild(ele); 5837 row.appendChild(a); 5838 } else static if(is(typeof(e) == string[])) { 5839 foreach(ele; e) { 5840 Element a = Element.make(innerType); 5841 a.innerText = to!string(ele); 5842 row.appendChild(a); 5843 } 5844 } else { 5845 Element a = Element.make(innerType); 5846 a.innerText = to!string(e); 5847 row.appendChild(a); 5848 } 5849 } 5850 5851 foreach(e; children) { 5852 if(e.tagName == findType) { 5853 e.appendChild(row); 5854 return row; 5855 } 5856 } 5857 5858 // the type was not found if we are here... let's add it so it is well-formed 5859 auto lol = this.addChild(findType); 5860 lol.appendChild(row); 5861 5862 return row; 5863 } 5864 5865 /// Returns the `<caption>` element of the table, creating one if it isn't there. 5866 Element captionElement() { 5867 Element cap; 5868 foreach(c; children) { 5869 if(c.tagName == "caption") { 5870 cap = c; 5871 break; 5872 } 5873 } 5874 5875 if(cap is null) { 5876 cap = Element.make("caption"); 5877 appendChild(cap); 5878 } 5879 5880 return cap; 5881 } 5882 5883 /// Returns or sets the text inside the `<caption>` element, creating that element if it isnt' there. 5884 @property string caption() { 5885 return captionElement().innerText; 5886 } 5887 5888 /// ditto 5889 @property void caption(string text) { 5890 captionElement().innerText = text; 5891 } 5892 5893 /// Gets the logical layout of the table as a rectangular grid of 5894 /// cells. It considers rowspan and colspan. A cell with a large 5895 /// span is represented in the grid by being referenced several times. 5896 /// The tablePortition parameter can get just a <thead>, <tbody>, or 5897 /// <tfoot> portion if you pass one. 5898 /// 5899 /// Note: the rectangular grid might include null cells. 5900 /// 5901 /// This is kinda expensive so you should call once when you want the grid, 5902 /// then do lookups on the returned array. 5903 TableCell[][] getGrid(Element tablePortition = null) 5904 in { 5905 if(tablePortition is null) 5906 assert(tablePortition is null); 5907 else { 5908 assert(tablePortition !is null); 5909 assert(tablePortition.parentNode is this); 5910 assert( 5911 tablePortition.tagName == "tbody" 5912 || 5913 tablePortition.tagName == "tfoot" 5914 || 5915 tablePortition.tagName == "thead" 5916 ); 5917 } 5918 } 5919 do { 5920 if(tablePortition is null) 5921 tablePortition = this; 5922 5923 TableCell[][] ret; 5924 5925 // FIXME: will also return rows of sub tables! 5926 auto rows = tablePortition.getElementsByTagName("tr"); 5927 ret.length = rows.length; 5928 5929 int maxLength = 0; 5930 5931 int insertCell(int row, int position, TableCell cell) { 5932 if(row >= ret.length) 5933 return position; // not supposed to happen - a rowspan is prolly too big. 5934 5935 if(position == -1) { 5936 position++; 5937 foreach(item; ret[row]) { 5938 if(item is null) 5939 break; 5940 position++; 5941 } 5942 } 5943 5944 if(position < ret[row].length) 5945 ret[row][position] = cell; 5946 else 5947 foreach(i; ret[row].length .. position + 1) { 5948 if(i == position) 5949 ret[row] ~= cell; 5950 else 5951 ret[row] ~= null; 5952 } 5953 return position; 5954 } 5955 5956 foreach(i, rowElement; rows) { 5957 auto row = cast(TableRow) rowElement; 5958 assert(row !is null); 5959 assert(i < ret.length); 5960 5961 int position = 0; 5962 foreach(cellElement; rowElement.childNodes) { 5963 auto cell = cast(TableCell) cellElement; 5964 if(cell is null) 5965 continue; 5966 5967 // FIXME: colspan == 0 or rowspan == 0 5968 // is supposed to mean fill in the rest of 5969 // the table, not skip it 5970 foreach(int j; 0 .. cell.colspan) { 5971 foreach(int k; 0 .. cell.rowspan) 5972 // if the first row, always append. 5973 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 5974 position++; 5975 } 5976 } 5977 5978 if(ret[i].length > maxLength) 5979 maxLength = cast(int) ret[i].length; 5980 } 5981 5982 // want to ensure it's rectangular 5983 foreach(ref r; ret) { 5984 foreach(i; r.length .. maxLength) 5985 r ~= null; 5986 } 5987 5988 return ret; 5989 } 5990 } 5991 5992 /// Represents a table row element - a <tr> 5993 /// Group: implementations 5994 class TableRow : Element { 5995 ///. 5996 this(Document _parentDocument) { 5997 super(_parentDocument); 5998 tagName = "tr"; 5999 } 6000 6001 // FIXME: the standard says there should be a lot more in here, 6002 // but meh, I never use it and it's a pain to implement. 6003 } 6004 6005 /// Represents anything that can be a table cell - <td> or <th> html. 6006 /// Group: implementations 6007 class TableCell : Element { 6008 ///. 6009 this(Document _parentDocument, string _tagName) { 6010 super(_parentDocument, _tagName); 6011 } 6012 6013 /// Gets and sets the row/colspan attributes as integers 6014 @property int rowspan() const { 6015 int ret = 1; 6016 auto it = getAttribute("rowspan"); 6017 if(it.length) 6018 ret = to!int(it); 6019 return ret; 6020 } 6021 6022 /// ditto 6023 @property int colspan() const { 6024 int ret = 1; 6025 auto it = getAttribute("colspan"); 6026 if(it.length) 6027 ret = to!int(it); 6028 return ret; 6029 } 6030 6031 /// ditto 6032 @property int rowspan(int i) { 6033 setAttribute("rowspan", to!string(i)); 6034 return i; 6035 } 6036 6037 /// ditto 6038 @property int colspan(int i) { 6039 setAttribute("colspan", to!string(i)); 6040 return i; 6041 } 6042 6043 } 6044 6045 6046 /// This is thrown on parse errors. 6047 /// Group: implementations 6048 class MarkupException : Exception { 6049 6050 ///. 6051 this(string message, string file = __FILE__, size_t line = __LINE__) { 6052 super(message, file, line); 6053 } 6054 } 6055 6056 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6057 /// Group: implementations 6058 class ElementNotFoundException : Exception { 6059 6060 /// type == kind of element you were looking for and search == a selector describing the search. 6061 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6062 this.searchContext = searchContext; 6063 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6064 } 6065 6066 Element searchContext; 6067 } 6068 6069 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6070 /// 6071 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6072 /// Group: core_functionality 6073 struct Html { 6074 /// This string holds the actual html. Use it to retrieve the contents. 6075 string source; 6076 } 6077 6078 // for the observers 6079 enum DomMutationOperations { 6080 setAttribute, 6081 removeAttribute, 6082 appendChild, // tagname, attributes[], innerHTML 6083 insertBefore, 6084 truncateChildren, 6085 removeChild, 6086 appendHtml, 6087 replaceHtml, 6088 appendText, 6089 replaceText, 6090 replaceTextOnly 6091 } 6092 6093 // and for observers too 6094 struct DomMutationEvent { 6095 DomMutationOperations operation; 6096 Element target; 6097 Element related; // what this means differs with the operation 6098 Element related2; 6099 string relatedString; 6100 string relatedString2; 6101 } 6102 6103 6104 private immutable static string[] htmlSelfClosedElements = [ 6105 // html 4 6106 "area","base","br","col","hr","img","input","link","meta","param", 6107 6108 // html 5 6109 "embed","source","track","wbr" 6110 ]; 6111 6112 private immutable static string[] htmlRawSourceElements = [ 6113 "script", "style" 6114 ]; 6115 6116 private immutable static string[] htmlInlineElements = [ 6117 "span", "strong", "em", "b", "i", "a" 6118 ]; 6119 6120 6121 static import std.conv; 6122 6123 /// helper function for decoding html entities 6124 int intFromHex(string hex) { 6125 int place = 1; 6126 int value = 0; 6127 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6128 int v; 6129 char q = hex[a]; 6130 if( q >= '0' && q <= '9') 6131 v = q - '0'; 6132 else if (q >= 'a' && q <= 'f') 6133 v = q - 'a' + 10; 6134 else if (q >= 'A' && q <= 'F') 6135 v = q - 'A' + 10; 6136 else throw new Exception("Illegal hex character: " ~ q); 6137 6138 value += v * place; 6139 6140 place *= 16; 6141 } 6142 6143 return value; 6144 } 6145 6146 6147 // CSS selector handling 6148 6149 // EXTENSIONS 6150 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6151 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6152 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6153 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6154 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6155 6156 6157 6158 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6159 // That might be useful to implement, though I do have parent selectors too. 6160 6161 ///. 6162 static immutable string[] selectorTokens = [ 6163 // It is important that the 2 character possibilities go first here for accurate lexing 6164 "~=", "*=", "|=", "^=", "$=", "!=", 6165 "::", ">>", 6166 "<<", // my any-parent extension (reciprocal of whitespace) 6167 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6168 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6169 ]; // other is white space or a name. 6170 6171 ///. 6172 sizediff_t idToken(string str, sizediff_t position) { 6173 sizediff_t tid = -1; 6174 char c = str[position]; 6175 foreach(a, token; selectorTokens) 6176 6177 if(c == token[0]) { 6178 if(token.length > 1) { 6179 if(position + 1 >= str.length || str[position+1] != token[1]) 6180 continue; // not this token 6181 } 6182 tid = a; 6183 break; 6184 } 6185 return tid; 6186 } 6187 6188 /// Parts of the CSS selector implementation 6189 // look, ma, no phobos! 6190 // new lexer by ketmar 6191 string[] lexSelector (string selstr) { 6192 6193 static sizediff_t idToken (string str, size_t stpos) { 6194 char c = str[stpos]; 6195 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6196 if (c == token[0]) { 6197 if (token.length > 1) { 6198 assert(token.length == 2, token); // we don't have 3-char tokens yet 6199 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6200 } 6201 return tidx; 6202 } 6203 } 6204 return -1; 6205 } 6206 6207 // skip spaces and comments 6208 static string removeLeadingBlanks (string str) { 6209 size_t curpos = 0; 6210 while (curpos < str.length) { 6211 immutable char ch = str[curpos]; 6212 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6213 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6214 // comment 6215 curpos += 2; 6216 while (curpos < str.length) { 6217 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6218 curpos += 2; 6219 break; 6220 } 6221 ++curpos; 6222 } 6223 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6224 ++curpos; 6225 6226 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6227 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6228 // That is not the same as ".foo.bar". If the space is stripped, important 6229 // information is lost, despite the tokens being separatable anyway. 6230 // 6231 // The parser really needs to be aware of the presence of a space. 6232 } else { 6233 break; 6234 } 6235 } 6236 return str[curpos..$]; 6237 } 6238 6239 static bool isBlankAt() (string str, size_t pos) { 6240 // we should consider unicode spaces too, but... unicode sux anyway. 6241 return 6242 (pos < str.length && // in string 6243 (str[pos] <= 32 || // space 6244 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6245 } 6246 6247 string[] tokens; 6248 // lexx it! 6249 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6250 if(selstr[0] == '\"' || selstr[0] == '\'') { 6251 auto end = selstr[0]; 6252 auto pos = 1; 6253 bool escaping; 6254 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6255 if(escaping) 6256 escaping = false; 6257 else if(selstr[pos] == '\\') 6258 escaping = true; 6259 pos++; 6260 } 6261 6262 // FIXME: do better unescaping 6263 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6264 if(pos+1 >= selstr.length) 6265 assert(0, selstr); 6266 selstr = selstr[pos + 1.. $]; 6267 continue; 6268 } 6269 6270 6271 // no tokens starts with escape 6272 immutable tid = idToken(selstr, 0); 6273 if (tid >= 0) { 6274 // special token 6275 tokens ~= selectorTokens[tid]; // it's funnier this way 6276 selstr = selstr[selectorTokens[tid].length..$]; 6277 continue; 6278 } 6279 // from start to space or special token 6280 size_t escapePos = size_t.max; 6281 size_t curpos = 0; // i can has chizburger^w escape at the start 6282 while (curpos < selstr.length) { 6283 if (selstr[curpos] == '\\') { 6284 // this is escape, just skip it and next char 6285 if (escapePos == size_t.max) escapePos = curpos; 6286 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6287 } else { 6288 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6289 ++curpos; 6290 } 6291 } 6292 // identifier 6293 if (escapePos != size_t.max) { 6294 // i hate it when it happens 6295 string id = selstr[0..escapePos]; 6296 while (escapePos < curpos) { 6297 if (curpos-escapePos < 2) break; 6298 id ~= selstr[escapePos+1]; // escaped char 6299 escapePos += 2; 6300 immutable stp = escapePos; 6301 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6302 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6303 } 6304 if (id.length > 0) tokens ~= id; 6305 } else { 6306 tokens ~= selstr[0..curpos]; 6307 } 6308 selstr = selstr[curpos..$]; 6309 } 6310 return tokens; 6311 } 6312 version(unittest_domd_lexer) unittest { 6313 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6314 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6315 assert(lexSelector(r" < <") == ["<", "<"]); 6316 assert(lexSelector(r" <<") == ["<<"]); 6317 assert(lexSelector(r" <</") == ["<<", "/"]); 6318 assert(lexSelector(r" <</*") == ["<<"]); 6319 assert(lexSelector(r" <\</*") == ["<", "<"]); 6320 assert(lexSelector(r"heh\") == ["heh"]); 6321 assert(lexSelector(r"alice \") == ["alice"]); 6322 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6323 } 6324 6325 /// ditto 6326 struct SelectorPart { 6327 string tagNameFilter; ///. 6328 string[] attributesPresent; /// [attr] 6329 string[2][] attributesEqual; /// [attr=value] 6330 string[2][] attributesStartsWith; /// [attr^=value] 6331 string[2][] attributesEndsWith; /// [attr$=value] 6332 // split it on space, then match to these 6333 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6334 // split it on dash, then match to these 6335 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6336 string[2][] attributesInclude; /// [attr*=value] 6337 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6338 6339 string[] hasSelectors; /// :has(this) 6340 string[] notSelectors; /// :not(this) 6341 6342 string[] isSelectors; /// :is(this) 6343 string[] whereSelectors; /// :where(this) 6344 6345 ParsedNth[] nthOfType; /// . 6346 ParsedNth[] nthLastOfType; /// . 6347 ParsedNth[] nthChild; /// . 6348 6349 bool firstChild; ///. 6350 bool lastChild; ///. 6351 6352 bool firstOfType; /// . 6353 bool lastOfType; /// . 6354 6355 bool emptyElement; ///. 6356 bool whitespaceOnly; /// 6357 bool oddChild; ///. 6358 bool evenChild; ///. 6359 6360 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6361 6362 bool rootElement; ///. 6363 6364 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6365 6366 bool isCleanSlateExceptSeparation() { 6367 auto cp = this; 6368 cp.separation = -1; 6369 return cp is SelectorPart.init; 6370 } 6371 6372 ///. 6373 string toString() { 6374 string ret; 6375 switch(separation) { 6376 default: assert(0); 6377 case -1: break; 6378 case 0: ret ~= " "; break; 6379 case 1: ret ~= " > "; break; 6380 case 2: ret ~= " + "; break; 6381 case 3: ret ~= " ~ "; break; 6382 case 4: ret ~= " < "; break; 6383 } 6384 ret ~= tagNameFilter; 6385 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6386 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6387 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6388 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6389 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6390 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6391 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6392 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6393 6394 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6395 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6396 6397 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 6398 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 6399 6400 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6401 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6402 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6403 6404 if(firstChild) ret ~= ":first-child"; 6405 if(lastChild) ret ~= ":last-child"; 6406 if(firstOfType) ret ~= ":first-of-type"; 6407 if(lastOfType) ret ~= ":last-of-type"; 6408 if(emptyElement) ret ~= ":empty"; 6409 if(whitespaceOnly) ret ~= ":whitespace-only"; 6410 if(oddChild) ret ~= ":odd-child"; 6411 if(evenChild) ret ~= ":even-child"; 6412 if(rootElement) ret ~= ":root"; 6413 if(scopeElement) ret ~= ":scope"; 6414 6415 return ret; 6416 } 6417 6418 // USEFUL 6419 /// Returns true if the given element matches this part 6420 bool matchElement(Element e, Element scopeElementNow = null) { 6421 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 6422 // Each individual call is reasonably fast already, but it adds up. 6423 if(e is null) return false; 6424 if(e.nodeType != 1) return false; 6425 6426 if(tagNameFilter != "" && tagNameFilter != "*") 6427 if(e.tagName != tagNameFilter) 6428 return false; 6429 if(firstChild) { 6430 if(e.parentNode is null) 6431 return false; 6432 if(e.parentNode.childElements[0] !is e) 6433 return false; 6434 } 6435 if(lastChild) { 6436 if(e.parentNode is null) 6437 return false; 6438 auto ce = e.parentNode.childElements; 6439 if(ce[$-1] !is e) 6440 return false; 6441 } 6442 if(firstOfType) { 6443 if(e.parentNode is null) 6444 return false; 6445 auto ce = e.parentNode.childElements; 6446 foreach(c; ce) { 6447 if(c.tagName == e.tagName) { 6448 if(c is e) 6449 return true; 6450 else 6451 return false; 6452 } 6453 } 6454 } 6455 if(lastOfType) { 6456 if(e.parentNode is null) 6457 return false; 6458 auto ce = e.parentNode.childElements; 6459 foreach_reverse(c; ce) { 6460 if(c.tagName == e.tagName) { 6461 if(c is e) 6462 return true; 6463 else 6464 return false; 6465 } 6466 } 6467 } 6468 if(scopeElement) { 6469 if(e !is scopeElementNow) 6470 return false; 6471 } 6472 if(emptyElement) { 6473 if(e.isEmpty()) 6474 return false; 6475 } 6476 if(whitespaceOnly) { 6477 if(e.innerText.strip.length) 6478 return false; 6479 } 6480 if(rootElement) { 6481 if(e.parentNode !is null) 6482 return false; 6483 } 6484 if(oddChild || evenChild) { 6485 if(e.parentNode is null) 6486 return false; 6487 foreach(i, child; e.parentNode.childElements) { 6488 if(child is e) { 6489 if(oddChild && !(i&1)) 6490 return false; 6491 if(evenChild && (i&1)) 6492 return false; 6493 break; 6494 } 6495 } 6496 } 6497 6498 bool matchWithSeparator(string attr, string value, string separator) { 6499 foreach(s; attr.split(separator)) 6500 if(s == value) 6501 return true; 6502 return false; 6503 } 6504 6505 foreach(a; attributesPresent) 6506 if(a !in e.attributes) 6507 return false; 6508 foreach(a; attributesEqual) 6509 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 6510 return false; 6511 foreach(a; attributesNotEqual) 6512 // FIXME: maybe it should say null counts... this just bit me. 6513 // I did [attr][attr!=value] to work around. 6514 // 6515 // if it's null, it's not equal, right? 6516 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 6517 if(e.getAttribute(a[0]) == a[1]) 6518 return false; 6519 foreach(a; attributesInclude) 6520 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 6521 return false; 6522 foreach(a; attributesStartsWith) 6523 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 6524 return false; 6525 foreach(a; attributesEndsWith) 6526 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 6527 return false; 6528 foreach(a; attributesIncludesSeparatedBySpaces) 6529 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 6530 return false; 6531 foreach(a; attributesIncludesSeparatedByDashes) 6532 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 6533 return false; 6534 foreach(a; hasSelectors) { 6535 if(e.querySelector(a) is null) 6536 return false; 6537 } 6538 foreach(a; notSelectors) { 6539 auto sel = Selector(a); 6540 if(sel.matchesElement(e)) 6541 return false; 6542 } 6543 foreach(a; isSelectors) { 6544 auto sel = Selector(a); 6545 if(!sel.matchesElement(e)) 6546 return false; 6547 } 6548 foreach(a; whereSelectors) { 6549 auto sel = Selector(a); 6550 if(!sel.matchesElement(e)) 6551 return false; 6552 } 6553 6554 foreach(a; nthChild) { 6555 if(e.parentNode is null) 6556 return false; 6557 6558 auto among = e.parentNode.childElements; 6559 6560 if(!a.solvesFor(among, e)) 6561 return false; 6562 } 6563 foreach(a; nthOfType) { 6564 if(e.parentNode is null) 6565 return false; 6566 6567 auto among = e.parentNode.childElements(e.tagName); 6568 6569 if(!a.solvesFor(among, e)) 6570 return false; 6571 } 6572 foreach(a; nthLastOfType) { 6573 if(e.parentNode is null) 6574 return false; 6575 6576 auto among = retro(e.parentNode.childElements(e.tagName)); 6577 6578 if(!a.solvesFor(among, e)) 6579 return false; 6580 } 6581 6582 return true; 6583 } 6584 } 6585 6586 struct ParsedNth { 6587 int multiplier; 6588 int adder; 6589 6590 string of; 6591 6592 this(string text) { 6593 auto original = text; 6594 consumeWhitespace(text); 6595 if(text.startsWith("odd")) { 6596 multiplier = 2; 6597 adder = 1; 6598 6599 text = text[3 .. $]; 6600 } else if(text.startsWith("even")) { 6601 multiplier = 2; 6602 adder = 1; 6603 6604 text = text[4 .. $]; 6605 } else { 6606 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 6607 consumeWhitespace(text); 6608 if(text.length && text[0] == 'n') { 6609 multiplier = n; 6610 text = text[1 .. $]; 6611 consumeWhitespace(text); 6612 if(text.length) { 6613 if(text[0] == '+') { 6614 text = text[1 .. $]; 6615 adder = parseNumber(text); 6616 } else if(text[0] == '-') { 6617 text = text[1 .. $]; 6618 adder = -parseNumber(text); 6619 } else if(text[0] == 'o') { 6620 // continue, this is handled below 6621 } else 6622 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 6623 } 6624 } else { 6625 adder = n; 6626 } 6627 } 6628 6629 consumeWhitespace(text); 6630 if(text.startsWith("of")) { 6631 text = text[2 .. $]; 6632 consumeWhitespace(text); 6633 of = text[0 .. $]; 6634 } 6635 } 6636 6637 string toString() { 6638 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 6639 } 6640 6641 bool solvesFor(R)(R elements, Element e) { 6642 int idx = 1; 6643 bool found = false; 6644 foreach(ele; elements) { 6645 if(of.length) { 6646 auto sel = Selector(of); 6647 if(!sel.matchesElement(ele)) 6648 continue; 6649 } 6650 if(ele is e) { 6651 found = true; 6652 break; 6653 } 6654 idx++; 6655 } 6656 if(!found) return false; 6657 6658 // multiplier* n + adder = idx 6659 // if there is a solution for integral n, it matches 6660 6661 idx -= adder; 6662 if(multiplier) { 6663 if(idx % multiplier == 0) 6664 return true; 6665 } else { 6666 return idx == 0; 6667 } 6668 return false; 6669 } 6670 6671 private void consumeWhitespace(ref string text) { 6672 while(text.length && text[0] == ' ') 6673 text = text[1 .. $]; 6674 } 6675 6676 private int parseNumber(ref string text) { 6677 consumeWhitespace(text); 6678 if(text.length == 0) return 0; 6679 bool negative = text[0] == '-'; 6680 if(text[0] == '+') 6681 text = text[1 .. $]; 6682 if(negative) text = text[1 .. $]; 6683 int i = 0; 6684 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 6685 i++; 6686 if(i == 0) 6687 return 0; 6688 int cool = to!int(text[0 .. i]); 6689 text = text[i .. $]; 6690 return negative ? -cool : cool; 6691 } 6692 } 6693 6694 // USEFUL 6695 /// ditto 6696 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts, Element scopeElementNow = null) { 6697 Element[] ret; 6698 if(!parts.length) { 6699 return [start]; // the null selector only matches the start point; it 6700 // is what terminates the recursion 6701 } 6702 6703 auto part = parts[0]; 6704 //writeln("checking ", part, " against ", start, " with ", part.separation); 6705 switch(part.separation) { 6706 default: assert(0); 6707 case -1: 6708 case 0: // tree 6709 foreach(e; start.tree) { 6710 if(part.separation == 0 && start is e) 6711 continue; // space doesn't match itself! 6712 if(part.matchElement(e, scopeElementNow)) { 6713 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 6714 } 6715 } 6716 break; 6717 case 1: // children 6718 foreach(e; start.childNodes) { 6719 if(part.matchElement(e, scopeElementNow)) { 6720 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 6721 } 6722 } 6723 break; 6724 case 2: // next-sibling 6725 auto e = start.nextSibling("*"); 6726 if(part.matchElement(e, scopeElementNow)) 6727 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 6728 break; 6729 case 3: // younger sibling 6730 auto tmp = start.parentNode; 6731 if(tmp !is null) { 6732 sizediff_t pos = -1; 6733 auto children = tmp.childElements; 6734 foreach(i, child; children) { 6735 if(child is start) { 6736 pos = i; 6737 break; 6738 } 6739 } 6740 assert(pos != -1); 6741 foreach(e; children[pos+1..$]) { 6742 if(part.matchElement(e, scopeElementNow)) 6743 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 6744 } 6745 } 6746 break; 6747 case 4: // immediate parent node, an extension of mine to walk back up the tree 6748 auto e = start.parentNode; 6749 if(part.matchElement(e, scopeElementNow)) { 6750 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 6751 } 6752 /* 6753 Example of usefulness: 6754 6755 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 6756 6757 table th < tr 6758 6759 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 6760 */ 6761 break; 6762 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 6763 /* 6764 Like with the < operator, this is best used to find some parent of a particular known element. 6765 6766 Say you have an anchor inside a 6767 */ 6768 } 6769 6770 return ret; 6771 } 6772 6773 /++ 6774 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 6775 6776 See_Also: 6777 $(LIST 6778 * [Element.querySelector] 6779 * [Element.querySelectorAll] 6780 * [Element.matches] 6781 * [Element.closest] 6782 * [Document.querySelector] 6783 * [Document.querySelectorAll] 6784 ) 6785 +/ 6786 /// Group: core_functionality 6787 struct Selector { 6788 SelectorComponent[] components; 6789 string original; 6790 /++ 6791 Parses the selector string and constructs the usable structure. 6792 +/ 6793 this(string cssSelector) { 6794 components = parseSelectorString(cssSelector); 6795 original = cssSelector; 6796 } 6797 6798 /++ 6799 Returns true if the given element matches this selector, 6800 considered relative to an arbitrary element. 6801 6802 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 6803 with [std.algorithm.iteration.filter]: 6804 6805 --- 6806 Selector sel = Selector("foo > bar"); 6807 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 6808 --- 6809 +/ 6810 bool matchesElement(Element e, Element relativeTo = null) { 6811 foreach(component; components) 6812 if(component.matchElement(e, relativeTo)) 6813 return true; 6814 6815 return false; 6816 } 6817 6818 /++ 6819 Reciprocal of [Element.querySelectorAll] 6820 +/ 6821 Element[] getMatchingElements(Element start, Element relativeTo = null) { 6822 Element[] ret; 6823 foreach(component; components) 6824 ret ~= getElementsBySelectorParts(start, component.parts, relativeTo); 6825 return removeDuplicates(ret); 6826 } 6827 6828 /++ 6829 Like [getMatchingElements], but returns a lazy range. Be careful 6830 about mutating the dom as you iterate through this. 6831 +/ 6832 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 6833 import std.algorithm.iteration; 6834 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 6835 } 6836 6837 6838 /// Returns the string this was built from 6839 string toString() { 6840 return original; 6841 } 6842 6843 /++ 6844 Returns a string from the parsed result 6845 6846 6847 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 6848 +/ 6849 string parsedToString() { 6850 string ret; 6851 6852 foreach(idx, component; components) { 6853 if(idx) ret ~= ", "; 6854 ret ~= component.toString(); 6855 } 6856 6857 return ret; 6858 } 6859 } 6860 6861 ///. 6862 struct SelectorComponent { 6863 ///. 6864 SelectorPart[] parts; 6865 6866 ///. 6867 string toString() { 6868 string ret; 6869 foreach(part; parts) 6870 ret ~= part.toString(); 6871 return ret; 6872 } 6873 6874 // USEFUL 6875 ///. 6876 Element[] getElements(Element start, Element relativeTo = null) { 6877 return removeDuplicates(getElementsBySelectorParts(start, parts, relativeTo)); 6878 } 6879 6880 // USEFUL (but not implemented) 6881 /// If relativeTo == null, it assumes the root of the parent document. 6882 bool matchElement(Element e, Element relativeTo = null) { 6883 if(e is null) return false; 6884 Element where = e; 6885 int lastSeparation = -1; 6886 6887 auto lparts = parts; 6888 6889 if(parts.length && parts[0].separation > 0) { 6890 throw new Exception("invalid selector"); 6891 /+ 6892 // if it starts with a non-trivial separator, inject 6893 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 6894 // which implies html 6895 6896 // however, if it is a child-matching selector and there are no children, 6897 // bail out early as it obviously cannot match. 6898 bool hasNonTextChildren = false; 6899 foreach(c; e.children) 6900 if(c.nodeType != 3) { 6901 hasNonTextChildren = true; 6902 break; 6903 } 6904 if(!hasNonTextChildren) 6905 return false; 6906 6907 // there is probably a MUCH better way to do this. 6908 auto dummy = SelectorPart.init; 6909 dummy.tagNameFilter = "*"; 6910 dummy.separation = 0; 6911 lparts = dummy ~ lparts; 6912 +/ 6913 } 6914 6915 foreach(part; retro(lparts)) { 6916 6917 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 6918 // writeln(parts); 6919 6920 if(lastSeparation == -1) { 6921 if(!part.matchElement(where, relativeTo)) 6922 return false; 6923 } else if(lastSeparation == 0) { // generic parent 6924 // need to go up the whole chain 6925 where = where.parentNode; 6926 6927 while(where !is null) { 6928 if(part.matchElement(where, relativeTo)) 6929 break; 6930 6931 if(where is relativeTo) 6932 return false; 6933 6934 where = where.parentNode; 6935 } 6936 6937 if(where is null) 6938 return false; 6939 } else if(lastSeparation == 1) { // the > operator 6940 where = where.parentNode; 6941 6942 if(!part.matchElement(where, relativeTo)) 6943 return false; 6944 } else if(lastSeparation == 2) { // the + operator 6945 //writeln("WHERE", where, " ", part); 6946 where = where.previousSibling("*"); 6947 6948 if(!part.matchElement(where, relativeTo)) 6949 return false; 6950 } else if(lastSeparation == 3) { // the ~ operator 6951 where = where.previousSibling("*"); 6952 while(where !is null) { 6953 if(part.matchElement(where, relativeTo)) 6954 break; 6955 6956 if(where is relativeTo) 6957 return false; 6958 6959 where = where.previousSibling("*"); 6960 } 6961 6962 if(where is null) 6963 return false; 6964 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 6965 // FIXME 6966 } 6967 6968 lastSeparation = part.separation; 6969 6970 /* 6971 /+ 6972 I commented this to magically make unittest pass and I think the reason it works 6973 when commented is that I inject a :scope iff there's a selector at top level now 6974 and if not, it follows the (frankly stupid) w3c standard behavior at arbitrary id 6975 asduiwh . but me injecting the :scope also acts as a terminating condition. 6976 6977 tbh this prolly needs like a trillion more tests. 6978 +/ 6979 if(where is relativeTo) 6980 return false; // at end of line, if we aren't done by now, the match fails 6981 */ 6982 } 6983 return true; // if we got here, it is a success 6984 } 6985 6986 // the string should NOT have commas. Use parseSelectorString for that instead 6987 ///. 6988 static SelectorComponent fromString(string selector) { 6989 return parseSelector(lexSelector(selector)); 6990 } 6991 } 6992 6993 ///. 6994 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 6995 SelectorComponent[] ret; 6996 auto tokens = lexSelector(selector); // this will parse commas too 6997 // and now do comma-separated slices (i haz phobosophobia!) 6998 int parensCount = 0; 6999 while (tokens.length > 0) { 7000 size_t end = 0; 7001 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7002 if(tokens[end] == "(") parensCount++; 7003 if(tokens[end] == ")") parensCount--; 7004 ++end; 7005 } 7006 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7007 if (tokens.length-end < 2) break; 7008 tokens = tokens[end+1..$]; 7009 } 7010 return ret; 7011 } 7012 7013 ///. 7014 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7015 SelectorComponent s; 7016 7017 SelectorPart current; 7018 void commit() { 7019 // might as well skip null items 7020 if(!current.isCleanSlateExceptSeparation()) { 7021 s.parts ~= current; 7022 current = current.init; // start right over 7023 } 7024 } 7025 enum State { 7026 Starting, 7027 ReadingClass, 7028 ReadingId, 7029 ReadingAttributeSelector, 7030 ReadingAttributeComparison, 7031 ExpectingAttributeCloser, 7032 ReadingPseudoClass, 7033 ReadingAttributeValue, 7034 7035 SkippingFunctionalSelector, 7036 } 7037 State state = State.Starting; 7038 string attributeName, attributeValue, attributeComparison; 7039 int parensCount; 7040 foreach(idx, token; tokens) { 7041 string readFunctionalSelector() { 7042 string s; 7043 if(tokens[idx + 1] != "(") 7044 throw new Exception("parse error"); 7045 int pc = 1; 7046 foreach(t; tokens[idx + 2 .. $]) { 7047 if(t == "(") 7048 pc++; 7049 if(t == ")") 7050 pc--; 7051 if(pc == 0) 7052 break; 7053 s ~= t; 7054 } 7055 7056 return s; 7057 } 7058 7059 sizediff_t tid = -1; 7060 foreach(i, item; selectorTokens) 7061 if(token == item) { 7062 tid = i; 7063 break; 7064 } 7065 final switch(state) { 7066 case State.Starting: // fresh, might be reading an operator or a tagname 7067 if(tid == -1) { 7068 if(!caseSensitiveTags) 7069 token = token.toLower(); 7070 7071 if(current.isCleanSlateExceptSeparation()) { 7072 current.tagNameFilter = token; 7073 // default thing, see comment under "*" below 7074 if(current.separation == -1) current.separation = 0; 7075 } else { 7076 // if it was already set, we must see two thingies 7077 // separated by whitespace... 7078 commit(); 7079 current.separation = 0; // tree 7080 current.tagNameFilter = token; 7081 } 7082 } else { 7083 // Selector operators 7084 switch(token) { 7085 case "*": 7086 current.tagNameFilter = "*"; 7087 // the idea here is if we haven't actually set a separation 7088 // yet (e.g. the > operator), it should assume the generic 7089 // whitespace (descendant) mode to avoid matching self with -1 7090 if(current.separation == -1) current.separation = 0; 7091 break; 7092 case " ": 7093 // If some other separation has already been set, 7094 // this is irrelevant whitespace, so we should skip it. 7095 // this happens in the case of "foo > bar" for example. 7096 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7097 continue; 7098 commit(); 7099 current.separation = 0; // tree 7100 break; 7101 case ">>": 7102 commit(); 7103 current.separation = 0; // alternate syntax for tree from html5 css 7104 break; 7105 case ">": 7106 commit(); 7107 current.separation = 1; // child 7108 break; 7109 case "+": 7110 commit(); 7111 current.separation = 2; // sibling directly after 7112 break; 7113 case "~": 7114 commit(); 7115 current.separation = 3; // any sibling after 7116 break; 7117 case "<": 7118 commit(); 7119 current.separation = 4; // immediate parent of 7120 break; 7121 case "[": 7122 state = State.ReadingAttributeSelector; 7123 if(current.separation == -1) current.separation = 0; 7124 break; 7125 case ".": 7126 state = State.ReadingClass; 7127 if(current.separation == -1) current.separation = 0; 7128 break; 7129 case "#": 7130 state = State.ReadingId; 7131 if(current.separation == -1) current.separation = 0; 7132 break; 7133 case ":": 7134 case "::": 7135 state = State.ReadingPseudoClass; 7136 if(current.separation == -1) current.separation = 0; 7137 break; 7138 7139 default: 7140 assert(0, token); 7141 } 7142 } 7143 break; 7144 case State.ReadingClass: 7145 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7146 state = State.Starting; 7147 break; 7148 case State.ReadingId: 7149 current.attributesEqual ~= ["id", token]; 7150 state = State.Starting; 7151 break; 7152 case State.ReadingPseudoClass: 7153 switch(token) { 7154 case "first-of-type": 7155 current.firstOfType = true; 7156 break; 7157 case "last-of-type": 7158 current.lastOfType = true; 7159 break; 7160 case "only-of-type": 7161 current.firstOfType = true; 7162 current.lastOfType = true; 7163 break; 7164 case "first-child": 7165 current.firstChild = true; 7166 break; 7167 case "last-child": 7168 current.lastChild = true; 7169 break; 7170 case "only-child": 7171 current.firstChild = true; 7172 current.lastChild = true; 7173 break; 7174 case "scope": 7175 current.scopeElement = true; 7176 break; 7177 case "empty": 7178 // one with no children 7179 current.emptyElement = true; 7180 break; 7181 case "whitespace-only": 7182 current.whitespaceOnly = true; 7183 break; 7184 case "link": 7185 current.attributesPresent ~= "href"; 7186 break; 7187 case "root": 7188 current.rootElement = true; 7189 break; 7190 case "nth-child": 7191 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7192 state = State.SkippingFunctionalSelector; 7193 continue; 7194 case "nth-of-type": 7195 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7196 state = State.SkippingFunctionalSelector; 7197 continue; 7198 case "nth-last-of-type": 7199 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7200 state = State.SkippingFunctionalSelector; 7201 continue; 7202 case "is": 7203 state = State.SkippingFunctionalSelector; 7204 current.isSelectors ~= readFunctionalSelector(); 7205 continue; // now the rest of the parser skips past the parens we just handled 7206 case "where": 7207 state = State.SkippingFunctionalSelector; 7208 current.whereSelectors ~= readFunctionalSelector(); 7209 continue; // now the rest of the parser skips past the parens we just handled 7210 case "not": 7211 state = State.SkippingFunctionalSelector; 7212 current.notSelectors ~= readFunctionalSelector(); 7213 continue; // now the rest of the parser skips past the parens we just handled 7214 case "has": 7215 state = State.SkippingFunctionalSelector; 7216 current.hasSelectors ~= readFunctionalSelector(); 7217 continue; // now the rest of the parser skips past the parens we just handled 7218 // back to standards though not quite right lol 7219 case "disabled": 7220 current.attributesPresent ~= "disabled"; 7221 break; 7222 case "checked": 7223 current.attributesPresent ~= "checked"; 7224 break; 7225 7226 case "visited", "active", "hover", "target", "focus", "selected": 7227 current.attributesPresent ~= "nothing"; 7228 // FIXME 7229 /+ 7230 // extensions not implemented 7231 //case "text": // takes the text in the element and wraps it in an element, returning it 7232 +/ 7233 goto case; 7234 case "before", "after": 7235 current.attributesPresent ~= "FIXME"; 7236 7237 break; 7238 // My extensions 7239 case "odd-child": 7240 current.oddChild = true; 7241 break; 7242 case "even-child": 7243 current.evenChild = true; 7244 break; 7245 default: 7246 //if(token.indexOf("lang") == -1) 7247 //assert(0, token); 7248 break; 7249 } 7250 state = State.Starting; 7251 break; 7252 case State.SkippingFunctionalSelector: 7253 if(token == "(") { 7254 parensCount++; 7255 } else if(token == ")") { 7256 parensCount--; 7257 } 7258 7259 if(parensCount == 0) 7260 state = State.Starting; 7261 break; 7262 case State.ReadingAttributeSelector: 7263 attributeName = token; 7264 attributeComparison = null; 7265 attributeValue = null; 7266 state = State.ReadingAttributeComparison; 7267 break; 7268 case State.ReadingAttributeComparison: 7269 // FIXME: these things really should be quotable in the proper lexer... 7270 if(token != "]") { 7271 if(token.indexOf("=") == -1) { 7272 // not a comparison; consider it 7273 // part of the attribute 7274 attributeValue ~= token; 7275 } else { 7276 attributeComparison = token; 7277 state = State.ReadingAttributeValue; 7278 } 7279 break; 7280 } 7281 goto case; 7282 case State.ExpectingAttributeCloser: 7283 if(token != "]") { 7284 // not the closer; consider it part of comparison 7285 if(attributeComparison == "") 7286 attributeName ~= token; 7287 else 7288 attributeValue ~= token; 7289 break; 7290 } 7291 7292 // Selector operators 7293 switch(attributeComparison) { 7294 default: assert(0); 7295 case "": 7296 current.attributesPresent ~= attributeName; 7297 break; 7298 case "=": 7299 current.attributesEqual ~= [attributeName, attributeValue]; 7300 break; 7301 case "|=": 7302 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7303 break; 7304 case "~=": 7305 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7306 break; 7307 case "$=": 7308 current.attributesEndsWith ~= [attributeName, attributeValue]; 7309 break; 7310 case "^=": 7311 current.attributesStartsWith ~= [attributeName, attributeValue]; 7312 break; 7313 case "*=": 7314 current.attributesInclude ~= [attributeName, attributeValue]; 7315 break; 7316 case "!=": 7317 current.attributesNotEqual ~= [attributeName, attributeValue]; 7318 break; 7319 } 7320 7321 state = State.Starting; 7322 break; 7323 case State.ReadingAttributeValue: 7324 attributeValue = token; 7325 state = State.ExpectingAttributeCloser; 7326 break; 7327 } 7328 } 7329 7330 commit(); 7331 7332 return s; 7333 } 7334 7335 ///. 7336 Element[] removeDuplicates(Element[] input) { 7337 Element[] ret; 7338 7339 bool[Element] already; 7340 foreach(e; input) { 7341 if(e in already) continue; 7342 already[e] = true; 7343 ret ~= e; 7344 } 7345 7346 return ret; 7347 } 7348 7349 // done with CSS selector handling 7350 7351 7352 // FIXME: use the better parser from html.d 7353 /// This is probably not useful to you unless you're writing a browser or something like that. 7354 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7355 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7356 class CssStyle { 7357 ///. 7358 this(string rule, string content) { 7359 rule = rule.strip(); 7360 content = content.strip(); 7361 7362 if(content.length == 0) 7363 return; 7364 7365 originatingRule = rule; 7366 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7367 7368 foreach(part; content.split(";")) { 7369 part = part.strip(); 7370 if(part.length == 0) 7371 continue; 7372 auto idx = part.indexOf(":"); 7373 if(idx == -1) 7374 continue; 7375 //throw new Exception("Bad css rule (no colon): " ~ part); 7376 7377 Property p; 7378 7379 p.name = part[0 .. idx].strip(); 7380 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7381 p.givenExplicitly = true; 7382 p.specificity = originatingSpecificity; 7383 7384 properties ~= p; 7385 } 7386 7387 foreach(property; properties) 7388 expandShortForm(property, originatingSpecificity); 7389 } 7390 7391 ///. 7392 Specificity getSpecificityOfRule(string rule) { 7393 Specificity s; 7394 if(rule.length == 0) { // inline 7395 // s.important = 2; 7396 } else { 7397 // FIXME 7398 } 7399 7400 return s; 7401 } 7402 7403 string originatingRule; ///. 7404 Specificity originatingSpecificity; ///. 7405 7406 ///. 7407 union Specificity { 7408 uint score; ///. 7409 // version(little_endian) 7410 ///. 7411 struct { 7412 ubyte tags; ///. 7413 ubyte classes; ///. 7414 ubyte ids; ///. 7415 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 7416 } 7417 } 7418 7419 ///. 7420 struct Property { 7421 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 7422 string name; ///. 7423 string value; ///. 7424 Specificity specificity; ///. 7425 // do we care about the original source rule? 7426 } 7427 7428 ///. 7429 Property[] properties; 7430 7431 ///. 7432 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 7433 string name = unCamelCase(nameGiven); 7434 if(value is null) 7435 return getValue(name); 7436 else 7437 return setValue(name, value, 0x02000000 /* inline specificity */); 7438 } 7439 7440 /// takes dash style name 7441 string getValue(string name) { 7442 foreach(property; properties) 7443 if(property.name == name) 7444 return property.value; 7445 return null; 7446 } 7447 7448 /// takes dash style name 7449 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 7450 value = value.replace("! important", "!important"); 7451 if(value.indexOf("!important") != -1) { 7452 newSpecificity.important = 1; // FIXME 7453 value = value.replace("!important", "").strip(); 7454 } 7455 7456 foreach(ref property; properties) 7457 if(property.name == name) { 7458 if(newSpecificity.score >= property.specificity.score) { 7459 property.givenExplicitly = explicit; 7460 expandShortForm(property, newSpecificity); 7461 return (property.value = value); 7462 } else { 7463 if(name == "display") 7464 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 7465 return value; // do nothing - the specificity is too low 7466 } 7467 } 7468 7469 // it's not here... 7470 7471 Property p; 7472 p.givenExplicitly = true; 7473 p.name = name; 7474 p.value = value; 7475 p.specificity = originatingSpecificity; 7476 7477 properties ~= p; 7478 expandShortForm(p, originatingSpecificity); 7479 7480 return value; 7481 } 7482 7483 private void expandQuadShort(string name, string value, Specificity specificity) { 7484 auto parts = value.split(" "); 7485 switch(parts.length) { 7486 case 1: 7487 setValue(name ~"-left", parts[0], specificity, false); 7488 setValue(name ~"-right", parts[0], specificity, false); 7489 setValue(name ~"-top", parts[0], specificity, false); 7490 setValue(name ~"-bottom", parts[0], specificity, false); 7491 break; 7492 case 2: 7493 setValue(name ~"-left", parts[1], specificity, false); 7494 setValue(name ~"-right", parts[1], specificity, false); 7495 setValue(name ~"-top", parts[0], specificity, false); 7496 setValue(name ~"-bottom", parts[0], specificity, false); 7497 break; 7498 case 3: 7499 setValue(name ~"-top", parts[0], specificity, false); 7500 setValue(name ~"-right", parts[1], specificity, false); 7501 setValue(name ~"-bottom", parts[2], specificity, false); 7502 setValue(name ~"-left", parts[2], specificity, false); 7503 7504 break; 7505 case 4: 7506 setValue(name ~"-top", parts[0], specificity, false); 7507 setValue(name ~"-right", parts[1], specificity, false); 7508 setValue(name ~"-bottom", parts[2], specificity, false); 7509 setValue(name ~"-left", parts[3], specificity, false); 7510 break; 7511 default: 7512 assert(0, value); 7513 } 7514 } 7515 7516 ///. 7517 void expandShortForm(Property p, Specificity specificity) { 7518 switch(p.name) { 7519 case "margin": 7520 case "padding": 7521 expandQuadShort(p.name, p.value, specificity); 7522 break; 7523 case "border": 7524 case "outline": 7525 setValue(p.name ~ "-left", p.value, specificity, false); 7526 setValue(p.name ~ "-right", p.value, specificity, false); 7527 setValue(p.name ~ "-top", p.value, specificity, false); 7528 setValue(p.name ~ "-bottom", p.value, specificity, false); 7529 break; 7530 7531 case "border-top": 7532 case "border-bottom": 7533 case "border-left": 7534 case "border-right": 7535 case "outline-top": 7536 case "outline-bottom": 7537 case "outline-left": 7538 case "outline-right": 7539 7540 default: {} 7541 } 7542 } 7543 7544 ///. 7545 override string toString() { 7546 string ret; 7547 if(originatingRule.length) 7548 ret = originatingRule ~ " {"; 7549 7550 foreach(property; properties) { 7551 if(!property.givenExplicitly) 7552 continue; // skip the inferred shit 7553 7554 if(originatingRule.length) 7555 ret ~= "\n\t"; 7556 else 7557 ret ~= " "; 7558 7559 ret ~= property.name ~ ": " ~ property.value ~ ";"; 7560 } 7561 7562 if(originatingRule.length) 7563 ret ~= "\n}\n"; 7564 7565 return ret; 7566 } 7567 } 7568 7569 string cssUrl(string url) { 7570 return "url(\"" ~ url ~ "\")"; 7571 } 7572 7573 /// This probably isn't useful, unless you're writing a browser or something like that. 7574 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 7575 /// as text. 7576 /// 7577 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 7578 /// that you can apply to your documents to build the complete computedStyle object. 7579 class StyleSheet { 7580 ///. 7581 CssStyle[] rules; 7582 7583 ///. 7584 this(string source) { 7585 // FIXME: handle @ rules and probably could improve lexer 7586 // add nesting? 7587 int state; 7588 string currentRule; 7589 string currentValue; 7590 7591 string* currentThing = ¤tRule; 7592 foreach(c; source) { 7593 handle: switch(state) { 7594 default: assert(0); 7595 case 0: // starting - we assume we're reading a rule 7596 switch(c) { 7597 case '@': 7598 state = 4; 7599 break; 7600 case '/': 7601 state = 1; 7602 break; 7603 case '{': 7604 currentThing = ¤tValue; 7605 break; 7606 case '}': 7607 if(currentThing is ¤tValue) { 7608 rules ~= new CssStyle(currentRule, currentValue); 7609 7610 currentRule = ""; 7611 currentValue = ""; 7612 7613 currentThing = ¤tRule; 7614 } else { 7615 // idk what is going on here. 7616 // check sveit.com to reproduce 7617 currentRule = ""; 7618 currentValue = ""; 7619 } 7620 break; 7621 default: 7622 (*currentThing) ~= c; 7623 } 7624 break; 7625 case 1: // expecting * 7626 if(c == '*') 7627 state = 2; 7628 else { 7629 state = 0; 7630 (*currentThing) ~= "/" ~ c; 7631 } 7632 break; 7633 case 2: // inside comment 7634 if(c == '*') 7635 state = 3; 7636 break; 7637 case 3: // expecting / to end comment 7638 if(c == '/') 7639 state = 0; 7640 else 7641 state = 2; // it's just a comment so no need to append 7642 break; 7643 case 4: 7644 if(c == '{') 7645 state = 5; 7646 if(c == ';') 7647 state = 0; // just skipping import 7648 break; 7649 case 5: 7650 if(c == '}') 7651 state = 0; // skipping font face probably 7652 } 7653 } 7654 } 7655 7656 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 7657 void apply(Document document) { 7658 foreach(rule; rules) { 7659 if(rule.originatingRule.length == 0) 7660 continue; // this shouldn't happen here in a stylesheet 7661 foreach(element; document.querySelectorAll(rule.originatingRule)) { 7662 // note: this should be a different object than the inline style 7663 // since givenExplicitly is likely destroyed here 7664 auto current = element.computedStyle; 7665 7666 foreach(item; rule.properties) 7667 current.setValue(item.name, item.value, item.specificity); 7668 } 7669 } 7670 } 7671 } 7672 7673 7674 /// This is kinda private; just a little utility container for use by the ElementStream class. 7675 final class Stack(T) { 7676 this() { 7677 internalLength = 0; 7678 arr = initialBuffer[]; 7679 } 7680 7681 ///. 7682 void push(T t) { 7683 if(internalLength >= arr.length) { 7684 auto oldarr = arr; 7685 if(arr.length < 4096) 7686 arr = new T[arr.length * 2]; 7687 else 7688 arr = new T[arr.length + 4096]; 7689 arr[0 .. oldarr.length] = oldarr[]; 7690 } 7691 7692 arr[internalLength] = t; 7693 internalLength++; 7694 } 7695 7696 ///. 7697 T pop() { 7698 assert(internalLength); 7699 internalLength--; 7700 return arr[internalLength]; 7701 } 7702 7703 ///. 7704 T peek() { 7705 assert(internalLength); 7706 return arr[internalLength - 1]; 7707 } 7708 7709 ///. 7710 @property bool empty() { 7711 return internalLength ? false : true; 7712 } 7713 7714 ///. 7715 private T[] arr; 7716 private size_t internalLength; 7717 private T[64] initialBuffer; 7718 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 7719 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 7720 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 7721 } 7722 7723 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 7724 final class ElementStream { 7725 7726 ///. 7727 @property Element front() { 7728 return current.element; 7729 } 7730 7731 /// Use Element.tree instead. 7732 this(Element start) { 7733 current.element = start; 7734 current.childPosition = -1; 7735 isEmpty = false; 7736 stack = new Stack!(Current); 7737 } 7738 7739 /* 7740 Handle it 7741 handle its children 7742 7743 */ 7744 7745 ///. 7746 void popFront() { 7747 more: 7748 if(isEmpty) return; 7749 7750 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 7751 7752 current.childPosition++; 7753 if(current.childPosition >= current.element.children.length) { 7754 if(stack.empty()) 7755 isEmpty = true; 7756 else { 7757 current = stack.pop(); 7758 goto more; 7759 } 7760 } else { 7761 stack.push(current); 7762 current.element = current.element.children[current.childPosition]; 7763 current.childPosition = -1; 7764 } 7765 } 7766 7767 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 7768 void currentKilled() { 7769 if(stack.empty) // should never happen 7770 isEmpty = true; 7771 else { 7772 current = stack.pop(); 7773 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 7774 } 7775 } 7776 7777 ///. 7778 @property bool empty() { 7779 return isEmpty; 7780 } 7781 7782 private: 7783 7784 struct Current { 7785 Element element; 7786 int childPosition; 7787 } 7788 7789 Current current; 7790 7791 Stack!(Current) stack; 7792 7793 bool isEmpty; 7794 } 7795 7796 7797 7798 // unbelievable. 7799 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 7800 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 7801 static import std.algorithm; 7802 auto found = std.algorithm.find(haystack, needle); 7803 if(found.length == 0) 7804 return -1; 7805 return haystack.length - found.length; 7806 } 7807 7808 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 7809 assert(position < arr.length); 7810 T[] ret; 7811 ret.length = arr.length + what.length; 7812 int a = 0; 7813 foreach(i; arr[0..position+1]) 7814 ret[a++] = i; 7815 7816 foreach(i; what) 7817 ret[a++] = i; 7818 7819 foreach(i; arr[position+1..$]) 7820 ret[a++] = i; 7821 7822 return ret; 7823 } 7824 7825 package bool isInArray(T)(T item, T[] arr) { 7826 foreach(i; arr) 7827 if(item == i) 7828 return true; 7829 return false; 7830 } 7831 7832 private string[string] aadup(in string[string] arr) { 7833 string[string] ret; 7834 foreach(k, v; arr) 7835 ret[k] = v; 7836 return ret; 7837 } 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 // These MUST be sorted. See generatedomcases.d for a program to generate it if you need to add more than a few (otherwise maybe you can work it in yourself but yikes) 7854 7855 immutable string[] availableEntities = 7856 ["AElig", "AElig", "AMP", "AMP", "Aacute", "Aacute", "Abreve", "Abreve", "Acirc", "Acirc", "Acy", "Acy", "Afr", "Afr", "Agrave", "Agrave", "Alpha", "Alpha", "Amacr", "Amacr", "And", "And", "Aogon", "Aogon", "Aopf", "Aopf", "ApplyFunction", "ApplyFunction", "Aring", "Aring", "Ascr", "Ascr", "Assign", "Assign", "Atilde", 7857 "Atilde", "Auml", "Auml", "Backslash", "Backslash", "Barv", "Barv", "Barwed", "Barwed", "Bcy", "Bcy", "Because", "Because", "Bernoullis", "Bernoullis", "Beta", "Beta", "Bfr", "Bfr", "Bopf", "Bopf", "Breve", "Breve", "Bscr", "Bscr", "Bumpeq", "Bumpeq", "CHcy", "CHcy", "COPY", "COPY", "Cacute", "Cacute", "Cap", "Cap", "CapitalDifferentialD", 7858 "CapitalDifferentialD", "Cayleys", "Cayleys", "Ccaron", "Ccaron", "Ccedil", "Ccedil", "Ccirc", "Ccirc", "Cconint", "Cconint", "Cdot", "Cdot", "Cedilla", "Cedilla", "CenterDot", "CenterDot", "Cfr", "Cfr", "Chi", "Chi", "CircleDot", "CircleDot", "CircleMinus", "CircleMinus", "CirclePlus", "CirclePlus", "CircleTimes", "CircleTimes", 7859 "ClockwiseContourIntegral", "ClockwiseContourIntegral", "CloseCurlyDoubleQuote", "CloseCurlyDoubleQuote", "CloseCurlyQuote", "CloseCurlyQuote", "Colon", "Colon", "Colone", "Colone", "Congruent", "Congruent", "Conint", "Conint", "ContourIntegral", "ContourIntegral", "Copf", "Copf", "Coproduct", "Coproduct", "CounterClockwiseContourIntegral", 7860 "CounterClockwiseContourIntegral", "Cross", "Cross", "Cscr", "Cscr", "Cup", "Cup", "CupCap", "CupCap", "DD", "DD", "DDotrahd", "DDotrahd", "DJcy", "DJcy", "DScy", "DScy", "DZcy", "DZcy", "Dagger", "Dagger", "Darr", "Darr", "Dashv", "Dashv", "Dcaron", "Dcaron", "Dcy", "Dcy", "Del", "Del", "Delta", "Delta", "Dfr", "Dfr", 7861 "DiacriticalAcute", "DiacriticalAcute", "DiacriticalDot", "DiacriticalDot", "DiacriticalDoubleAcute", "DiacriticalDoubleAcute", "DiacriticalGrave", "DiacriticalGrave", "DiacriticalTilde", "DiacriticalTilde", "Diamond", "Diamond", "DifferentialD", "DifferentialD", "Dopf", "Dopf", "Dot", "Dot", "DotDot", "DotDot", "DotEqual", 7862 "DotEqual", "DoubleContourIntegral", "DoubleContourIntegral", "DoubleDot", "DoubleDot", "DoubleDownArrow", "DoubleDownArrow", "DoubleLeftArrow", "DoubleLeftArrow", "DoubleLeftRightArrow", "DoubleLeftRightArrow", "DoubleLeftTee", "DoubleLeftTee", "DoubleLongLeftArrow", "DoubleLongLeftArrow", "DoubleLongLeftRightArrow", 7863 "DoubleLongLeftRightArrow", "DoubleLongRightArrow", "DoubleLongRightArrow", "DoubleRightArrow", "DoubleRightArrow", "DoubleRightTee", "DoubleRightTee", "DoubleUpArrow", "DoubleUpArrow", "DoubleUpDownArrow", "DoubleUpDownArrow", "DoubleVerticalBar", "DoubleVerticalBar", "DownArrow", "DownArrow", "DownArrowBar", "DownArrowBar", 7864 "DownArrowUpArrow", "DownArrowUpArrow", "DownBreve", "DownBreve", "DownLeftRightVector", "DownLeftRightVector", "DownLeftTeeVector", "DownLeftTeeVector", "DownLeftVector", "DownLeftVector", "DownLeftVectorBar", "DownLeftVectorBar", "DownRightTeeVector", "DownRightTeeVector", "DownRightVector", "DownRightVector", "DownRightVectorBar", 7865 "DownRightVectorBar", "DownTee", "DownTee", "DownTeeArrow", "DownTeeArrow", "Downarrow", "Downarrow", "Dscr", "Dscr", "Dstrok", "Dstrok", "ENG", "ENG", "ETH", "ETH", "Eacute", "Eacute", "Ecaron", "Ecaron", "Ecirc", "Ecirc", "Ecy", "Ecy", "Edot", "Edot", "Efr", "Efr", "Egrave", "Egrave", "Element", "Element", "Emacr", "Emacr", 7866 "EmptySmallSquare", "EmptySmallSquare", "EmptyVerySmallSquare", "EmptyVerySmallSquare", "Eogon", "Eogon", "Eopf", "Eopf", "Epsilon", "Epsilon", "Equal", "Equal", "EqualTilde", "EqualTilde", "Equilibrium", "Equilibrium", "Escr", "Escr", "Esim", "Esim", "Eta", "Eta", "Euml", "Euml", "Exists", "Exists", "ExponentialE", "ExponentialE", 7867 "Fcy", "Fcy", "Ffr", "Ffr", "FilledSmallSquare", "FilledSmallSquare", "FilledVerySmallSquare", "FilledVerySmallSquare", "Fopf", "Fopf", "ForAll", "ForAll", "Fouriertrf", "Fouriertrf", "Fscr", "Fscr", "GJcy", "GJcy", "GT", "GT", "Gamma", "Gamma", "Gammad", "Gammad", "Gbreve", "Gbreve", "Gcedil", "Gcedil", "Gcirc", "Gcirc", 7868 "Gcy", "Gcy", "Gdot", "Gdot", "Gfr", "Gfr", "Gg", "Gg", "Gopf", "Gopf", "GreaterEqual", "GreaterEqual", "GreaterEqualLess", "GreaterEqualLess", "GreaterFullEqual", "GreaterFullEqual", "GreaterGreater", "GreaterGreater", "GreaterLess", "GreaterLess", "GreaterSlantEqual", "GreaterSlantEqual", "GreaterTilde", "GreaterTilde", 7869 "Gscr", "Gscr", "Gt", "Gt", "HARDcy", "HARDcy", "Hacek", "Hacek", "Hat", "Hat", "Hcirc", "Hcirc", "Hfr", "Hfr", "HilbertSpace", "HilbertSpace", "Hopf", "Hopf", "HorizontalLine", "HorizontalLine", "Hscr", "Hscr", "Hstrok", "Hstrok", "HumpDownHump", "HumpDownHump", "HumpEqual", "HumpEqual", "IEcy", "IEcy", "IJlig", "IJlig", 7870 "IOcy", "IOcy", "Iacute", "Iacute", "Icirc", "Icirc", "Icy", "Icy", "Idot", "Idot", "Ifr", "Ifr", "Igrave", "Igrave", "Im", "Im", "Imacr", "Imacr", "ImaginaryI", "ImaginaryI", "Implies", "Implies", "Int", "Int", "Integral", "Integral", "Intersection", "Intersection", "InvisibleComma", "InvisibleComma", "InvisibleTimes", 7871 "InvisibleTimes", "Iogon", "Iogon", "Iopf", "Iopf", "Iota", "Iota", "Iscr", "Iscr", "Itilde", "Itilde", "Iukcy", "Iukcy", "Iuml", "Iuml", "Jcirc", "Jcirc", "Jcy", "Jcy", "Jfr", "Jfr", "Jopf", "Jopf", "Jscr", "Jscr", "Jsercy", "Jsercy", "Jukcy", "Jukcy", "KHcy", "KHcy", "KJcy", "KJcy", "Kappa", "Kappa", "Kcedil", "Kcedil", 7872 "Kcy", "Kcy", "Kfr", "Kfr", "Kopf", "Kopf", "Kscr", "Kscr", "LJcy", "LJcy", "LT", "LT", "Lacute", "Lacute", "Lambda", "Lambda", "Lang", "Lang", "Laplacetrf", "Laplacetrf", "Larr", "Larr", "Lcaron", "Lcaron", "Lcedil", "Lcedil", "Lcy", "Lcy", "LeftAngleBracket", "LeftAngleBracket", "LeftArrow", "LeftArrow", "LeftArrowBar", 7873 "LeftArrowBar", "LeftArrowRightArrow", "LeftArrowRightArrow", "LeftCeiling", "LeftCeiling", "LeftDoubleBracket", "LeftDoubleBracket", "LeftDownTeeVector", "LeftDownTeeVector", "LeftDownVector", "LeftDownVector", "LeftDownVectorBar", "LeftDownVectorBar", "LeftFloor", "LeftFloor", "LeftRightArrow", "LeftRightArrow", "LeftRightVector", 7874 "LeftRightVector", "LeftTee", "LeftTee", "LeftTeeArrow", "LeftTeeArrow", "LeftTeeVector", "LeftTeeVector", "LeftTriangle", "LeftTriangle", "LeftTriangleBar", "LeftTriangleBar", "LeftTriangleEqual", "LeftTriangleEqual", "LeftUpDownVector", "LeftUpDownVector", "LeftUpTeeVector", "LeftUpTeeVector", "LeftUpVector", "LeftUpVector", 7875 "LeftUpVectorBar", "LeftUpVectorBar", "LeftVector", "LeftVector", "LeftVectorBar", "LeftVectorBar", "Leftarrow", "Leftarrow", "Leftrightarrow", "Leftrightarrow", "LessEqualGreater", "LessEqualGreater", "LessFullEqual", "LessFullEqual", "LessGreater", "LessGreater", "LessLess", "LessLess", "LessSlantEqual", "LessSlantEqual", 7876 "LessTilde", "LessTilde", "Lfr", "Lfr", "Ll", "Ll", "Lleftarrow", "Lleftarrow", "Lmidot", "Lmidot", "LongLeftArrow", "LongLeftArrow", "LongLeftRightArrow", "LongLeftRightArrow", "LongRightArrow", "LongRightArrow", "Longleftarrow", "Longleftarrow", "Longleftrightarrow", "Longleftrightarrow", "Longrightarrow", "Longrightarrow", 7877 "Lopf", "Lopf", "LowerLeftArrow", "LowerLeftArrow", "LowerRightArrow", "LowerRightArrow", "Lscr", "Lscr", "Lsh", "Lsh", "Lstrok", "Lstrok", "Lt", "Lt", "Map", "Map", "Mcy", "Mcy", "MediumSpace", "MediumSpace", "Mellintrf", "Mellintrf", "Mfr", "Mfr", "MinusPlus", "MinusPlus", "Mopf", "Mopf", "Mscr", "Mscr", "Mu", "Mu", 7878 "NJcy", "NJcy", "Nacute", "Nacute", "Ncaron", "Ncaron", "Ncedil", "Ncedil", "Ncy", "Ncy", "NegativeMediumSpace", "NegativeMediumSpace", "NegativeThickSpace", "NegativeThickSpace", "NegativeThinSpace", "NegativeThinSpace", "NegativeVeryThinSpace", "NegativeVeryThinSpace", "NestedGreaterGreater", "NestedGreaterGreater", 7879 "NestedLessLess", "NestedLessLess", "NewLine", "NewLine", "Nfr", "Nfr", "NoBreak", "NoBreak", "NonBreakingSpace", "NonBreakingSpace", "Nopf", "Nopf", "Not", "Not", "NotCongruent", "NotCongruent", "NotCupCap", "NotCupCap", "NotDoubleVerticalBar", "NotDoubleVerticalBar", "NotElement", "NotElement", "NotEqual", "NotEqual", 7880 "NotExists", "NotExists", "NotGreater", "NotGreater", "NotGreaterEqual", "NotGreaterEqual", "NotGreaterLess", "NotGreaterLess", "NotGreaterTilde", "NotGreaterTilde", "NotLeftTriangle", "NotLeftTriangle", "NotLeftTriangleEqual", "NotLeftTriangleEqual", "NotLess", "NotLess", "NotLessEqual", "NotLessEqual", "NotLessGreater", 7881 "NotLessGreater", "NotLessTilde", "NotLessTilde", "NotPrecedes", "NotPrecedes", "NotPrecedesSlantEqual", "NotPrecedesSlantEqual", "NotReverseElement", "NotReverseElement", "NotRightTriangle", "NotRightTriangle", "NotRightTriangleEqual", "NotRightTriangleEqual", "NotSquareSubsetEqual", "NotSquareSubsetEqual", "NotSquareSupersetEqual", 7882 "NotSquareSupersetEqual", "NotSubsetEqual", "NotSubsetEqual", "NotSucceeds", "NotSucceeds", "NotSucceedsSlantEqual", "NotSucceedsSlantEqual", "NotSupersetEqual", "NotSupersetEqual", "NotTilde", "NotTilde", "NotTildeEqual", "NotTildeEqual", "NotTildeFullEqual", "NotTildeFullEqual", "NotTildeTilde", "NotTildeTilde", "NotVerticalBar", 7883 "NotVerticalBar", "Nscr", "Nscr", "Ntilde", "Ntilde", "Nu", "Nu", "OElig", "OElig", "Oacute", "Oacute", "Ocirc", "Ocirc", "Ocy", "Ocy", "Odblac", "Odblac", "Ofr", "Ofr", "Ograve", "Ograve", "Omacr", "Omacr", "Omega", "Omega", "Omicron", "Omicron", "Oopf", "Oopf", "OpenCurlyDoubleQuote", "OpenCurlyDoubleQuote", "OpenCurlyQuote", 7884 "OpenCurlyQuote", "Or", "Or", "Oscr", "Oscr", "Oslash", "Oslash", "Otilde", "Otilde", "Otimes", "Otimes", "Ouml", "Ouml", "OverBar", "OverBar", "OverBrace", "OverBrace", "OverBracket", "OverBracket", "OverParenthesis", "OverParenthesis", "PartialD", "PartialD", "Pcy", "Pcy", "Pfr", "Pfr", "Phi", "Phi", "Pi", "Pi", "PlusMinus", 7885 "PlusMinus", "Poincareplane", "Poincareplane", "Popf", "Popf", "Pr", "Pr", "Precedes", "Precedes", "PrecedesEqual", "PrecedesEqual", "PrecedesSlantEqual", "PrecedesSlantEqual", "PrecedesTilde", "PrecedesTilde", "Prime", "Prime", "Product", "Product", "Proportion", "Proportion", "Proportional", "Proportional", "Pscr", "Pscr", 7886 "Psi", "Psi", "QUOT", "QUOT", "Qfr", "Qfr", "Qopf", "Qopf", "Qscr", "Qscr", "RBarr", "RBarr", "REG", "REG", "Racute", "Racute", "Rang", "Rang", "Rarr", "Rarr", "Rarrtl", "Rarrtl", "Rcaron", "Rcaron", "Rcedil", "Rcedil", "Rcy", "Rcy", "Re", "Re", "ReverseElement", "ReverseElement", "ReverseEquilibrium", "ReverseEquilibrium", 7887 "ReverseUpEquilibrium", "ReverseUpEquilibrium", "Rfr", "Rfr", "Rho", "Rho", "RightAngleBracket", "RightAngleBracket", "RightArrow", "RightArrow", "RightArrowBar", "RightArrowBar", "RightArrowLeftArrow", "RightArrowLeftArrow", "RightCeiling", "RightCeiling", "RightDoubleBracket", "RightDoubleBracket", "RightDownTeeVector", 7888 "RightDownTeeVector", "RightDownVector", "RightDownVector", "RightDownVectorBar", "RightDownVectorBar", "RightFloor", "RightFloor", "RightTee", "RightTee", "RightTeeArrow", "RightTeeArrow", "RightTeeVector", "RightTeeVector", "RightTriangle", "RightTriangle", "RightTriangleBar", "RightTriangleBar", "RightTriangleEqual", 7889 "RightTriangleEqual", "RightUpDownVector", "RightUpDownVector", "RightUpTeeVector", "RightUpTeeVector", "RightUpVector", "RightUpVector", "RightUpVectorBar", "RightUpVectorBar", "RightVector", "RightVector", "RightVectorBar", "RightVectorBar", "Rightarrow", "Rightarrow", "Ropf", "Ropf", "RoundImplies", "RoundImplies", 7890 "Rrightarrow", "Rrightarrow", "Rscr", "Rscr", "Rsh", "Rsh", "RuleDelayed", "RuleDelayed", "SHCHcy", "SHCHcy", "SHcy", "SHcy", "SOFTcy", "SOFTcy", "Sacute", "Sacute", "Sc", "Sc", "Scaron", "Scaron", "Scedil", "Scedil", "Scirc", "Scirc", "Scy", "Scy", "Sfr", "Sfr", "ShortDownArrow", "ShortDownArrow", "ShortLeftArrow", "ShortLeftArrow", 7891 "ShortRightArrow", "ShortRightArrow", "ShortUpArrow", "ShortUpArrow", "Sigma", "Sigma", "SmallCircle", "SmallCircle", "Sopf", "Sopf", "Sqrt", "Sqrt", "Square", "Square", "SquareIntersection", "SquareIntersection", "SquareSubset", "SquareSubset", "SquareSubsetEqual", "SquareSubsetEqual", "SquareSuperset", "SquareSuperset", 7892 "SquareSupersetEqual", "SquareSupersetEqual", "SquareUnion", "SquareUnion", "Sscr", "Sscr", "Star", "Star", "Sub", "Sub", "Subset", "Subset", "SubsetEqual", "SubsetEqual", "Succeeds", "Succeeds", "SucceedsEqual", "SucceedsEqual", "SucceedsSlantEqual", "SucceedsSlantEqual", "SucceedsTilde", "SucceedsTilde", "SuchThat", 7893 "SuchThat", "Sum", "Sum", "Sup", "Sup", "Superset", "Superset", "SupersetEqual", "SupersetEqual", "Supset", "Supset", "THORN", "THORN", "TRADE", "TRADE", "TSHcy", "TSHcy", "TScy", "TScy", "Tab", "Tab", "Tau", "Tau", "Tcaron", "Tcaron", "Tcedil", "Tcedil", "Tcy", "Tcy", "Tfr", "Tfr", "Therefore", "Therefore", "Theta", "Theta", 7894 "ThinSpace", "ThinSpace", "Tilde", "Tilde", "TildeEqual", "TildeEqual", "TildeFullEqual", "TildeFullEqual", "TildeTilde", "TildeTilde", "Topf", "Topf", "TripleDot", "TripleDot", "Tscr", "Tscr", "Tstrok", "Tstrok", "Uacute", "Uacute", "Uarr", "Uarr", "Uarrocir", "Uarrocir", "Ubrcy", "Ubrcy", "Ubreve", "Ubreve", "Ucirc", 7895 "Ucirc", "Ucy", "Ucy", "Udblac", "Udblac", "Ufr", "Ufr", "Ugrave", "Ugrave", "Umacr", "Umacr", "UnderBar", "UnderBar", "UnderBrace", "UnderBrace", "UnderBracket", "UnderBracket", "UnderParenthesis", "UnderParenthesis", "Union", "Union", "UnionPlus", "UnionPlus", "Uogon", "Uogon", "Uopf", "Uopf", "UpArrow", "UpArrow", "UpArrowBar", 7896 "UpArrowBar", "UpArrowDownArrow", "UpArrowDownArrow", "UpDownArrow", "UpDownArrow", "UpEquilibrium", "UpEquilibrium", "UpTee", "UpTee", "UpTeeArrow", "UpTeeArrow", "Uparrow", "Uparrow", "Updownarrow", "Updownarrow", "UpperLeftArrow", "UpperLeftArrow", "UpperRightArrow", "UpperRightArrow", "Upsi", "Upsi", "Upsilon", "Upsilon", 7897 "Uring", "Uring", "Uscr", "Uscr", "Utilde", "Utilde", "Uuml", "Uuml", "VDash", "VDash", "Vbar", "Vbar", "Vcy", "Vcy", "Vdash", "Vdash", "Vdashl", "Vdashl", "Vee", "Vee", "Verbar", "Verbar", "Vert", "Vert", "VerticalBar", "VerticalBar", "VerticalLine", "VerticalLine", "VerticalSeparator", "VerticalSeparator", "VerticalTilde", 7898 "VerticalTilde", "VeryThinSpace", "VeryThinSpace", "Vfr", "Vfr", "Vopf", "Vopf", "Vscr", "Vscr", "Vvdash", "Vvdash", "Wcirc", "Wcirc", "Wedge", "Wedge", "Wfr", "Wfr", "Wopf", "Wopf", "Wscr", "Wscr", "Xfr", "Xfr", "Xi", "Xi", "Xopf", "Xopf", "Xscr", "Xscr", "YAcy", "YAcy", "YIcy", "YIcy", "YUcy", "YUcy", "Yacute", "Yacute", 7899 "Ycirc", "Ycirc", "Ycy", "Ycy", "Yfr", "Yfr", "Yopf", "Yopf", "Yscr", "Yscr", "Yuml", "Yuml", "ZHcy", "ZHcy", "Zacute", "Zacute", "Zcaron", "Zcaron", "Zcy", "Zcy", "Zdot", "Zdot", "ZeroWidthSpace", "ZeroWidthSpace", "Zeta", "Zeta", "Zfr", "Zfr", "Zopf", "Zopf", "Zscr", "Zscr", "aacute", "aacute", "abreve", "abreve", "ac", 7900 "ac", "acd", "acd", "acirc", "acirc", "acute", "acute", "acy", "acy", "aelig", "aelig", "af", "af", "afr", "afr", "agrave", "agrave", "alefsym", "alefsym", "aleph", "aleph", "alpha", "alpha", "amacr", "amacr", "amalg", "amalg", "and", "and", "andand", "andand", "andd", "andd", "andslope", "andslope", "andv", "andv", "ang", 7901 "ang", "ange", "ange", "angle", "angle", "angmsd", "angmsd", "angmsdaa", "angmsdaa", "angmsdab", "angmsdab", "angmsdac", "angmsdac", "angmsdad", "angmsdad", "angmsdae", "angmsdae", "angmsdaf", "angmsdaf", "angmsdag", "angmsdag", "angmsdah", "angmsdah", "angrt", "angrt", "angrtvb", "angrtvb", "angrtvbd", "angrtvbd", "angsph", 7902 "angsph", "angst", "angst", "angzarr", "angzarr", "aogon", "aogon", "aopf", "aopf", "ap", "ap", "apE", "apE", "apacir", "apacir", "ape", "ape", "apid", "apid", "approx", "approx", "approxeq", "approxeq", "aring", "aring", "ascr", "ascr", "ast", "ast", "asymp", "asymp", "asympeq", "asympeq", "atilde", "atilde", "auml", 7903 "auml", "awconint", "awconint", "awint", "awint", "bNot", "bNot", "backcong", "backcong", "backepsilon", "backepsilon", "backprime", "backprime", "backsim", "backsim", "backsimeq", "backsimeq", "barvee", "barvee", "barwed", "barwed", "barwedge", "barwedge", "bbrk", "bbrk", "bbrktbrk", "bbrktbrk", "bcong", "bcong", "bcy", 7904 "bcy", "bdquo", "bdquo", "becaus", "becaus", "because", "because", "bemptyv", "bemptyv", "bepsi", "bepsi", "bernou", "bernou", "beta", "beta", "beth", "beth", "between", "between", "bfr", "bfr", "bigcap", "bigcap", "bigcirc", "bigcirc", "bigcup", "bigcup", "bigodot", "bigodot", "bigoplus", "bigoplus", "bigotimes", "bigotimes", 7905 "bigsqcup", "bigsqcup", "bigstar", "bigstar", "bigtriangledown", "bigtriangledown", "bigtriangleup", "bigtriangleup", "biguplus", "biguplus", "bigvee", "bigvee", "bigwedge", "bigwedge", "bkarow", "bkarow", "blacklozenge", "blacklozenge", "blacksquare", "blacksquare", "blacktriangle", "blacktriangle", "blacktriangledown", 7906 "blacktriangledown", "blacktriangleleft", "blacktriangleleft", "blacktriangleright", "blacktriangleright", "blank", "blank", "blk12", "blk12", "blk14", "blk14", "blk34", "blk34", "block", "block", "bnot", "bnot", "bopf", "bopf", "bot", "bot", "bottom", "bottom", "bowtie", "bowtie", "boxDL", "boxDL", "boxDR", "boxDR", "boxDl", 7907 "boxDl", "boxDr", "boxDr", "boxH", "boxH", "boxHD", "boxHD", "boxHU", "boxHU", "boxHd", "boxHd", "boxHu", "boxHu", "boxUL", "boxUL", "boxUR", "boxUR", "boxUl", "boxUl", "boxUr", "boxUr", "boxV", "boxV", "boxVH", "boxVH", "boxVL", "boxVL", "boxVR", "boxVR", "boxVh", "boxVh", "boxVl", "boxVl", "boxVr", "boxVr", "boxbox", 7908 "boxbox", "boxdL", "boxdL", "boxdR", "boxdR", "boxdl", "boxdl", "boxdr", "boxdr", "boxh", "boxh", "boxhD", "boxhD", "boxhU", "boxhU", "boxhd", "boxhd", "boxhu", "boxhu", "boxminus", "boxminus", "boxplus", "boxplus", "boxtimes", "boxtimes", "boxuL", "boxuL", "boxuR", "boxuR", "boxul", "boxul", "boxur", "boxur", "boxv", 7909 "boxv", "boxvH", "boxvH", "boxvL", "boxvL", "boxvR", "boxvR", "boxvh", "boxvh", "boxvl", "boxvl", "boxvr", "boxvr", "bprime", "bprime", "breve", "breve", "brvbar", "brvbar", "bscr", "bscr", "bsemi", "bsemi", "bsim", "bsim", "bsime", "bsime", "bsol", "bsol", "bsolb", "bsolb", "bsolhsub", "bsolhsub", "bull", "bull", "bullet", 7910 "bullet", "bump", "bump", "bumpE", "bumpE", "bumpe", "bumpe", "bumpeq", "bumpeq", "cacute", "cacute", "cap", "cap", "capand", "capand", "capbrcup", "capbrcup", "capcap", "capcap", "capcup", "capcup", "capdot", "capdot", "caret", "caret", "caron", "caron", "ccaps", "ccaps", "ccaron", "ccaron", "ccedil", "ccedil", "ccirc", 7911 "ccirc", "ccups", "ccups", "ccupssm", "ccupssm", "cdot", "cdot", "cedil", "cedil", "cemptyv", "cemptyv", "cent", "cent", "centerdot", "centerdot", "cfr", "cfr", "chcy", "chcy", "check", "check", "checkmark", "checkmark", "chi", "chi", "cir", "cir", "cirE", "cirE", "circ", "circ", "circeq", "circeq", "circlearrowleft", 7912 "circlearrowleft", "circlearrowright", "circlearrowright", "circledR", "circledR", "circledS", "circledS", "circledast", "circledast", "circledcirc", "circledcirc", "circleddash", "circleddash", "cire", "cire", "cirfnint", "cirfnint", "cirmid", "cirmid", "cirscir", "cirscir", "clubs", "clubs", "clubsuit", "clubsuit", "colon", 7913 "colon", "colone", "colone", "coloneq", "coloneq", "comma", "comma", "commat", "commat", "comp", "comp", "compfn", "compfn", "complement", "complement", "complexes", "complexes", "cong", "cong", "congdot", "congdot", "conint", "conint", "copf", "copf", "coprod", "coprod", "copy", "copy", "copysr", "copysr", "crarr", "crarr", 7914 "cross", "cross", "cscr", "cscr", "csub", "csub", "csube", "csube", "csup", "csup", "csupe", "csupe", "ctdot", "ctdot", "cudarrl", "cudarrl", "cudarrr", "cudarrr", "cuepr", "cuepr", "cuesc", "cuesc", "cularr", "cularr", "cularrp", "cularrp", "cup", "cup", "cupbrcap", "cupbrcap", "cupcap", "cupcap", "cupcup", "cupcup", 7915 "cupdot", "cupdot", "cupor", "cupor", "curarr", "curarr", "curarrm", "curarrm", "curlyeqprec", "curlyeqprec", "curlyeqsucc", "curlyeqsucc", "curlyvee", "curlyvee", "curlywedge", "curlywedge", "curren", "curren", "curvearrowleft", "curvearrowleft", "curvearrowright", "curvearrowright", "cuvee", "cuvee", "cuwed", "cuwed", 7916 "cwconint", "cwconint", "cwint", "cwint", "cylcty", "cylcty", "dArr", "dArr", "dHar", "dHar", "dagger", "dagger", "daleth", "daleth", "darr", "darr", "dash", "dash", "dashv", "dashv", "dbkarow", "dbkarow", "dblac", "dblac", "dcaron", "dcaron", "dcy", "dcy", "dd", "dd", "ddagger", "ddagger", "ddarr", "ddarr", "ddotseq", 7917 "ddotseq", "deg", "deg", "delta", "delta", "demptyv", "demptyv", "dfisht", "dfisht", "dfr", "dfr", "dharl", "dharl", "dharr", "dharr", "diam", "diam", "diamond", "diamond", "diamondsuit", "diamondsuit", "diams", "diams", "die", "die", "digamma", "digamma", "disin", "disin", "div", "div", "divide", "divide", "divideontimes", 7918 "divideontimes", "divonx", "divonx", "djcy", "djcy", "dlcorn", "dlcorn", "dlcrop", "dlcrop", "dollar", "dollar", "dopf", "dopf", "dot", "dot", "doteq", "doteq", "doteqdot", "doteqdot", "dotminus", "dotminus", "dotplus", "dotplus", "dotsquare", "dotsquare", "doublebarwedge", "doublebarwedge", "downarrow", "downarrow", "downdownarrows", 7919 "downdownarrows", "downharpoonleft", "downharpoonleft", "downharpoonright", "downharpoonright", "drbkarow", "drbkarow", "drcorn", "drcorn", "drcrop", "drcrop", "dscr", "dscr", "dscy", "dscy", "dsol", "dsol", "dstrok", "dstrok", "dtdot", "dtdot", "dtri", "dtri", "dtrif", "dtrif", "duarr", "duarr", "duhar", "duhar", "dwangle", 7920 "dwangle", "dzcy", "dzcy", "dzigrarr", "dzigrarr", "eDDot", "eDDot", "eDot", "eDot", "eacute", "eacute", "easter", "easter", "ecaron", "ecaron", "ecir", "ecir", "ecirc", "ecirc", "ecolon", "ecolon", "ecy", "ecy", "edot", "edot", "ee", "ee", "efDot", "efDot", "efr", "efr", "eg", "eg", "egrave", "egrave", "egs", "egs", "egsdot", 7921 "egsdot", "el", "el", "elinters", "elinters", "ell", "ell", "els", "els", "elsdot", "elsdot", "emacr", "emacr", "empty", "empty", "emptyset", "emptyset", "emptyv", "emptyv", "emsp", "emsp", "emsp13", "emsp13", "emsp14", "emsp14", "eng", "eng", "ensp", "ensp", "eogon", "eogon", "eopf", "eopf", "epar", "epar", "eparsl", 7922 "eparsl", "eplus", "eplus", "epsi", "epsi", "epsilon", "epsilon", "epsiv", "epsiv", "eqcirc", "eqcirc", "eqcolon", "eqcolon", "eqsim", "eqsim", "eqslantgtr", "eqslantgtr", "eqslantless", "eqslantless", "equals", "equals", "equest", "equest", "equiv", "equiv", "equivDD", "equivDD", "eqvparsl", "eqvparsl", "erDot", "erDot", 7923 "erarr", "erarr", "escr", "escr", "esdot", "esdot", "esim", "esim", "eta", "eta", "eth", "eth", "euml", "euml", "euro", "euro", "excl", "excl", "exist", "exist", "expectation", "expectation", "exponentiale", "exponentiale", "fallingdotseq", "fallingdotseq", "fcy", "fcy", "female", "female", "ffilig", "ffilig", "fflig", 7924 "fflig", "ffllig", "ffllig", "ffr", "ffr", "filig", "filig", "flat", "flat", "fllig", "fllig", "fltns", "fltns", "fnof", "fnof", "fopf", "fopf", "forall", "forall", "fork", "fork", "forkv", "forkv", "fpartint", "fpartint", "frac12", "frac12", "frac13", "frac13", "frac14", "frac14", "frac15", "frac15", "frac16", "frac16", 7925 "frac18", "frac18", "frac23", "frac23", "frac25", "frac25", "frac34", "frac34", "frac35", "frac35", "frac38", "frac38", "frac45", "frac45", "frac56", "frac56", "frac58", "frac58", "frac78", "frac78", "frasl", "frasl", "frown", "frown", "fscr", "fscr", "gE", "gE", "gEl", "gEl", "gacute", "gacute", "gamma", "gamma", "gammad", 7926 "gammad", "gap", "gap", "gbreve", "gbreve", "gcirc", "gcirc", "gcy", "gcy", "gdot", "gdot", "ge", "ge", "gel", "gel", "geq", "geq", "geqq", "geqq", "geqslant", "geqslant", "ges", "ges", "gescc", "gescc", "gesdot", "gesdot", "gesdoto", "gesdoto", "gesdotol", "gesdotol", "gesles", "gesles", "gfr", "gfr", "gg", "gg", "ggg", 7927 "ggg", "gimel", "gimel", "gjcy", "gjcy", "gl", "gl", "glE", "glE", "gla", "gla", "glj", "glj", "gnE", "gnE", "gnap", "gnap", "gnapprox", "gnapprox", "gne", "gne", "gneq", "gneq", "gneqq", "gneqq", "gnsim", "gnsim", "gopf", "gopf", "grave", "grave", "gscr", "gscr", "gsim", "gsim", "gsime", "gsime", "gsiml", "gsiml", "gtcc", 7928 "gtcc", "gtcir", "gtcir", "gtdot", "gtdot", "gtlPar", "gtlPar", "gtquest", "gtquest", "gtrapprox", "gtrapprox", "gtrarr", "gtrarr", "gtrdot", "gtrdot", "gtreqless", "gtreqless", "gtreqqless", "gtreqqless", "gtrless", "gtrless", "gtrsim", "gtrsim", "hArr", "hArr", "hairsp", "hairsp", "half", "half", "hamilt", "hamilt", 7929 "hardcy", "hardcy", "harr", "harr", "harrcir", "harrcir", "harrw", "harrw", "hbar", "hbar", "hcirc", "hcirc", "hearts", "hearts", "heartsuit", "heartsuit", "hellip", "hellip", "hercon", "hercon", "hfr", "hfr", "hksearow", "hksearow", "hkswarow", "hkswarow", "hoarr", "hoarr", "homtht", "homtht", "hookleftarrow", "hookleftarrow", 7930 "hookrightarrow", "hookrightarrow", "hopf", "hopf", "horbar", "horbar", "hscr", "hscr", "hslash", "hslash", "hstrok", "hstrok", "hybull", "hybull", "hyphen", "hyphen", "iacute", "iacute", "ic", "ic", "icirc", "icirc", "icy", "icy", "iecy", "iecy", "iexcl", "iexcl", "iff", "iff", "ifr", "ifr", "igrave", "igrave", "ii", 7931 "ii", "iiiint", "iiiint", "iiint", "iiint", "iinfin", "iinfin", "iiota", "iiota", "ijlig", "ijlig", "imacr", "imacr", "image", "image", "imagline", "imagline", "imagpart", "imagpart", "imath", "imath", "imof", "imof", "imped", "imped", "in", "in", "incare", "incare", "infin", "infin", "infintie", "infintie", "inodot", 7932 "inodot", "int", "int", "intcal", "intcal", "integers", "integers", "intercal", "intercal", "intlarhk", "intlarhk", "intprod", "intprod", "iocy", "iocy", "iogon", "iogon", "iopf", "iopf", "iota", "iota", "iprod", "iprod", "iquest", "iquest", "iscr", "iscr", "isin", "isin", "isinE", "isinE", "isindot", "isindot", "isins", 7933 "isins", "isinsv", "isinsv", "isinv", "isinv", "it", "it", "itilde", "itilde", "iukcy", "iukcy", "iuml", "iuml", "jcirc", "jcirc", "jcy", "jcy", "jfr", "jfr", "jmath", "jmath", "jopf", "jopf", "jscr", "jscr", "jsercy", "jsercy", "jukcy", "jukcy", "kappa", "kappa", "kappav", "kappav", "kcedil", "kcedil", "kcy", "kcy", "kfr", 7934 "kfr", "kgreen", "kgreen", "khcy", "khcy", "kjcy", "kjcy", "kopf", "kopf", "kscr", "kscr", "lAarr", "lAarr", "lArr", "lArr", "lAtail", "lAtail", "lBarr", "lBarr", "lE", "lE", "lEg", "lEg", "lHar", "lHar", "lacute", "lacute", "laemptyv", "laemptyv", "lagran", "lagran", "lambda", "lambda", "lang", "lang", "langd", "langd", 7935 "langle", "langle", "lap", "lap", "laquo", "laquo", "larr", "larr", "larrb", "larrb", "larrbfs", "larrbfs", "larrfs", "larrfs", "larrhk", "larrhk", "larrlp", "larrlp", "larrpl", "larrpl", "larrsim", "larrsim", "larrtl", "larrtl", "lat", "lat", "latail", "latail", "late", "late", "lbarr", "lbarr", "lbbrk", "lbbrk", "lbrace", 7936 "lbrace", "lbrack", "lbrack", "lbrke", "lbrke", "lbrksld", "lbrksld", "lbrkslu", "lbrkslu", "lcaron", "lcaron", "lcedil", "lcedil", "lceil", "lceil", "lcub", "lcub", "lcy", "lcy", "ldca", "ldca", "ldquo", "ldquo", "ldquor", "ldquor", "ldrdhar", "ldrdhar", "ldrushar", "ldrushar", "ldsh", "ldsh", "le", "le", "leftarrow", 7937 "leftarrow", "leftarrowtail", "leftarrowtail", "leftharpoondown", "leftharpoondown", "leftharpoonup", "leftharpoonup", "leftleftarrows", "leftleftarrows", "leftrightarrow", "leftrightarrow", "leftrightarrows", "leftrightarrows", "leftrightharpoons", "leftrightharpoons", "leftrightsquigarrow", "leftrightsquigarrow", "leftthreetimes", 7938 "leftthreetimes", "leg", "leg", "leq", "leq", "leqq", "leqq", "leqslant", "leqslant", "les", "les", "lescc", "lescc", "lesdot", "lesdot", "lesdoto", "lesdoto", "lesdotor", "lesdotor", "lesges", "lesges", "lessapprox", "lessapprox", "lessdot", "lessdot", "lesseqgtr", "lesseqgtr", "lesseqqgtr", "lesseqqgtr", "lessgtr", "lessgtr", 7939 "lesssim", "lesssim", "lfisht", "lfisht", "lfloor", "lfloor", "lfr", "lfr", "lg", "lg", "lgE", "lgE", "lhard", "lhard", "lharu", "lharu", "lharul", "lharul", "lhblk", "lhblk", "ljcy", "ljcy", "ll", "ll", "llarr", "llarr", "llcorner", "llcorner", "llhard", "llhard", "lltri", "lltri", "lmidot", "lmidot", "lmoust", "lmoust", 7940 "lmoustache", "lmoustache", "lnE", "lnE", "lnap", "lnap", "lnapprox", "lnapprox", "lne", "lne", "lneq", "lneq", "lneqq", "lneqq", "lnsim", "lnsim", "loang", "loang", "loarr", "loarr", "lobrk", "lobrk", "longleftarrow", "longleftarrow", "longleftrightarrow", "longleftrightarrow", "longmapsto", "longmapsto", "longrightarrow", 7941 "longrightarrow", "looparrowleft", "looparrowleft", "looparrowright", "looparrowright", "lopar", "lopar", "lopf", "lopf", "loplus", "loplus", "lotimes", "lotimes", "lowast", "lowast", "lowbar", "lowbar", "loz", "loz", "lozenge", "lozenge", "lozf", "lozf", "lpar", "lpar", "lparlt", "lparlt", "lrarr", "lrarr", "lrcorner", 7942 "lrcorner", "lrhar", "lrhar", "lrhard", "lrhard", "lrm", "lrm", "lrtri", "lrtri", "lsaquo", "lsaquo", "lscr", "lscr", "lsh", "lsh", "lsim", "lsim", "lsime", "lsime", "lsimg", "lsimg", "lsqb", "lsqb", "lsquo", "lsquo", "lsquor", "lsquor", "lstrok", "lstrok", "ltcc", "ltcc", "ltcir", "ltcir", "ltdot", "ltdot", "lthree", 7943 "lthree", "ltimes", "ltimes", "ltlarr", "ltlarr", "ltquest", "ltquest", "ltrPar", "ltrPar", "ltri", "ltri", "ltrie", "ltrie", "ltrif", "ltrif", "lurdshar", "lurdshar", "luruhar", "luruhar", "mDDot", "mDDot", "macr", "macr", "male", "male", "malt", "malt", "maltese", "maltese", "map", "map", "mapsto", "mapsto", "mapstodown", 7944 "mapstodown", "mapstoleft", "mapstoleft", "mapstoup", "mapstoup", "marker", "marker", "mcomma", "mcomma", "mcy", "mcy", "mdash", "mdash", "measuredangle", "measuredangle", "mfr", "mfr", "mho", "mho", "micro", "micro", "mid", "mid", "midast", "midast", "midcir", "midcir", "middot", "middot", "minus", "minus", "minusb", 7945 "minusb", "minusd", "minusd", "minusdu", "minusdu", "mlcp", "mlcp", "mldr", "mldr", "mnplus", "mnplus", "models", "models", "mopf", "mopf", "mp", "mp", "mscr", "mscr", "mstpos", "mstpos", "mu", "mu", "multimap", "multimap", "mumap", "mumap", "nLeftarrow", "nLeftarrow", "nLeftrightarrow", "nLeftrightarrow", "nRightarrow", 7946 "nRightarrow", "nVDash", "nVDash", "nVdash", "nVdash", "nabla", "nabla", "nacute", "nacute", "nap", "nap", "napos", "napos", "napprox", "napprox", "natur", "natur", "natural", "natural", "naturals", "naturals", "nbsp", "nbsp", "ncap", "ncap", "ncaron", "ncaron", "ncedil", "ncedil", "ncong", "ncong", "ncup", "ncup", "ncy", 7947 "ncy", "ndash", "ndash", "ne", "ne", "neArr", "neArr", "nearhk", "nearhk", "nearr", "nearr", "nearrow", "nearrow", "nequiv", "nequiv", "nesear", "nesear", "nexist", "nexist", "nexists", "nexists", "nfr", "nfr", "nge", "nge", "ngeq", "ngeq", "ngsim", "ngsim", "ngt", "ngt", "ngtr", "ngtr", "nhArr", "nhArr", "nharr", "nharr", 7948 "nhpar", "nhpar", "ni", "ni", "nis", "nis", "nisd", "nisd", "niv", "niv", "njcy", "njcy", "nlArr", "nlArr", "nlarr", "nlarr", "nldr", "nldr", "nle", "nle", "nleftarrow", "nleftarrow", "nleftrightarrow", "nleftrightarrow", "nleq", "nleq", "nless", "nless", "nlsim", "nlsim", "nlt", "nlt", "nltri", "nltri", "nltrie", "nltrie", 7949 "nmid", "nmid", "nopf", "nopf", "not", "not", "notin", "notin", "notinva", "notinva", "notinvb", "notinvb", "notinvc", "notinvc", "notni", "notni", "notniva", "notniva", "notnivb", "notnivb", "notnivc", "notnivc", "npar", "npar", "nparallel", "nparallel", "npolint", "npolint", "npr", "npr", "nprcue", "nprcue", "nprec", 7950 "nprec", "nrArr", "nrArr", "nrarr", "nrarr", "nrightarrow", "nrightarrow", "nrtri", "nrtri", "nrtrie", "nrtrie", "nsc", "nsc", "nsccue", "nsccue", "nscr", "nscr", "nshortmid", "nshortmid", "nshortparallel", "nshortparallel", "nsim", "nsim", "nsime", "nsime", "nsimeq", "nsimeq", "nsmid", "nsmid", "nspar", "nspar", "nsqsube", 7951 "nsqsube", "nsqsupe", "nsqsupe", "nsub", "nsub", "nsube", "nsube", "nsubseteq", "nsubseteq", "nsucc", "nsucc", "nsup", "nsup", "nsupe", "nsupe", "nsupseteq", "nsupseteq", "ntgl", "ntgl", "ntilde", "ntilde", "ntlg", "ntlg", "ntriangleleft", "ntriangleleft", "ntrianglelefteq", "ntrianglelefteq", "ntriangleright", "ntriangleright", 7952 "ntrianglerighteq", "ntrianglerighteq", "nu", "nu", "num", "num", "numero", "numero", "numsp", "numsp", "nvDash", "nvDash", "nvHarr", "nvHarr", "nvdash", "nvdash", "nvinfin", "nvinfin", "nvlArr", "nvlArr", "nvrArr", "nvrArr", "nwArr", "nwArr", "nwarhk", "nwarhk", "nwarr", "nwarr", "nwarrow", "nwarrow", "nwnear", "nwnear", 7953 "oS", "oS", "oacute", "oacute", "oast", "oast", "ocir", "ocir", "ocirc", "ocirc", "ocy", "ocy", "odash", "odash", "odblac", "odblac", "odiv", "odiv", "odot", "odot", "odsold", "odsold", "oelig", "oelig", "ofcir", "ofcir", "ofr", "ofr", "ogon", "ogon", "ograve", "ograve", "ogt", "ogt", "ohbar", "ohbar", "ohm", "ohm", "oint", 7954 "oint", "olarr", "olarr", "olcir", "olcir", "olcross", "olcross", "oline", "oline", "olt", "olt", "omacr", "omacr", "omega", "omega", "omicron", "omicron", "omid", "omid", "ominus", "ominus", "oopf", "oopf", "opar", "opar", "operp", "operp", "oplus", "oplus", "or", "or", "orarr", "orarr", "ord", "ord", "order", "order", 7955 "orderof", "orderof", "ordf", "ordf", "ordm", "ordm", "origof", "origof", "oror", "oror", "orslope", "orslope", "orv", "orv", "oscr", "oscr", "oslash", "oslash", "osol", "osol", "otilde", "otilde", "otimes", "otimes", "otimesas", "otimesas", "ouml", "ouml", "ovbar", "ovbar", "par", "par", "para", "para", "parallel", "parallel", 7956 "parsim", "parsim", "parsl", "parsl", "part", "part", "pcy", "pcy", "percnt", "percnt", "period", "period", "permil", "permil", "perp", "perp", "pertenk", "pertenk", "pfr", "pfr", "phi", "phi", "phiv", "phiv", "phmmat", "phmmat", "phone", "phone", "pi", "pi", "pitchfork", "pitchfork", "piv", "piv", "planck", "planck", 7957 "planckh", "planckh", "plankv", "plankv", "plus", "plus", "plusacir", "plusacir", "plusb", "plusb", "pluscir", "pluscir", "plusdo", "plusdo", "plusdu", "plusdu", "pluse", "pluse", "plusmn", "plusmn", "plussim", "plussim", "plustwo", "plustwo", "pm", "pm", "pointint", "pointint", "popf", "popf", "pound", "pound", "pr", 7958 "pr", "prE", "prE", "prap", "prap", "prcue", "prcue", "pre", "pre", "prec", "prec", "precapprox", "precapprox", "preccurlyeq", "preccurlyeq", "preceq", "preceq", "precnapprox", "precnapprox", "precneqq", "precneqq", "precnsim", "precnsim", "precsim", "precsim", "prime", "prime", "primes", "primes", "prnE", "prnE", "prnap", 7959 "prnap", "prnsim", "prnsim", "prod", "prod", "profalar", "profalar", "profline", "profline", "profsurf", "profsurf", "prop", "prop", "propto", "propto", "prsim", "prsim", "prurel", "prurel", "pscr", "pscr", "psi", "psi", "puncsp", "puncsp", "qfr", "qfr", "qint", "qint", "qopf", "qopf", "qprime", "qprime", "qscr", "qscr", 7960 "quaternions", "quaternions", "quatint", "quatint", "quest", "quest", "questeq", "questeq", "rAarr", "rAarr", "rArr", "rArr", "rAtail", "rAtail", "rBarr", "rBarr", "rHar", "rHar", "racute", "racute", "radic", "radic", "raemptyv", "raemptyv", "rang", "rang", "rangd", "rangd", "range", "range", "rangle", "rangle", "raquo", 7961 "raquo", "rarr", "rarr", "rarrap", "rarrap", "rarrb", "rarrb", "rarrbfs", "rarrbfs", "rarrc", "rarrc", "rarrfs", "rarrfs", "rarrhk", "rarrhk", "rarrlp", "rarrlp", "rarrpl", "rarrpl", "rarrsim", "rarrsim", "rarrtl", "rarrtl", "rarrw", "rarrw", "ratail", "ratail", "ratio", "ratio", "rationals", "rationals", "rbarr", "rbarr", 7962 "rbbrk", "rbbrk", "rbrace", "rbrace", "rbrack", "rbrack", "rbrke", "rbrke", "rbrksld", "rbrksld", "rbrkslu", "rbrkslu", "rcaron", "rcaron", "rcedil", "rcedil", "rceil", "rceil", "rcub", "rcub", "rcy", "rcy", "rdca", "rdca", "rdldhar", "rdldhar", "rdquo", "rdquo", "rdquor", "rdquor", "rdsh", "rdsh", "real", "real", "realine", 7963 "realine", "realpart", "realpart", "reals", "reals", "rect", "rect", "reg", "reg", "rfisht", "rfisht", "rfloor", "rfloor", "rfr", "rfr", "rhard", "rhard", "rharu", "rharu", "rharul", "rharul", "rho", "rho", "rhov", "rhov", "rightarrow", "rightarrow", "rightarrowtail", "rightarrowtail", "rightharpoondown", "rightharpoondown", 7964 "rightharpoonup", "rightharpoonup", "rightleftarrows", "rightleftarrows", "rightleftharpoons", "rightleftharpoons", "rightrightarrows", "rightrightarrows", "rightsquigarrow", "rightsquigarrow", "rightthreetimes", "rightthreetimes", "ring", "ring", "risingdotseq", "risingdotseq", "rlarr", "rlarr", "rlhar", "rlhar", "rlm", 7965 "rlm", "rmoust", "rmoust", "rmoustache", "rmoustache", "rnmid", "rnmid", "roang", "roang", "roarr", "roarr", "robrk", "robrk", "ropar", "ropar", "ropf", "ropf", "roplus", "roplus", "rotimes", "rotimes", "rpar", "rpar", "rpargt", "rpargt", "rppolint", "rppolint", "rrarr", "rrarr", "rsaquo", "rsaquo", "rscr", "rscr", "rsh", 7966 "rsh", "rsqb", "rsqb", "rsquo", "rsquo", "rsquor", "rsquor", "rthree", "rthree", "rtimes", "rtimes", "rtri", "rtri", "rtrie", "rtrie", "rtrif", "rtrif", "rtriltri", "rtriltri", "ruluhar", "ruluhar", "rx", "rx", "sacute", "sacute", "sbquo", "sbquo", "sc", "sc", "scE", "scE", "scap", "scap", "scaron", "scaron", "sccue", 7967 "sccue", "sce", "sce", "scedil", "scedil", "scirc", "scirc", "scnE", "scnE", "scnap", "scnap", "scnsim", "scnsim", "scpolint", "scpolint", "scsim", "scsim", "scy", "scy", "sdot", "sdot", "sdotb", "sdotb", "sdote", "sdote", "seArr", "seArr", "searhk", "searhk", "searr", "searr", "searrow", "searrow", "sect", "sect", "semi", 7968 "semi", "seswar", "seswar", "setminus", "setminus", "setmn", "setmn", "sext", "sext", "sfr", "sfr", "sfrown", "sfrown", "sharp", "sharp", "shchcy", "shchcy", "shcy", "shcy", "shortmid", "shortmid", "shortparallel", "shortparallel", "shy", "shy", "sigma", "sigma", "sigmaf", "sigmaf", "sigmav", "sigmav", "sim", "sim", "simdot", 7969 "simdot", "sime", "sime", "simeq", "simeq", "simg", "simg", "simgE", "simgE", "siml", "siml", "simlE", "simlE", "simne", "simne", "simplus", "simplus", "simrarr", "simrarr", "slarr", "slarr", "smallsetminus", "smallsetminus", "smashp", "smashp", "smeparsl", "smeparsl", "smid", "smid", "smile", "smile", "smt", "smt", "smte", 7970 "smte", "softcy", "softcy", "sol", "sol", "solb", "solb", "solbar", "solbar", "sopf", "sopf", "spades", "spades", "spadesuit", "spadesuit", "spar", "spar", "sqcap", "sqcap", "sqcup", "sqcup", "sqsub", "sqsub", "sqsube", "sqsube", "sqsubset", "sqsubset", "sqsubseteq", "sqsubseteq", "sqsup", "sqsup", "sqsupe", "sqsupe", 7971 "sqsupset", "sqsupset", "sqsupseteq", "sqsupseteq", "squ", "squ", "square", "square", "squarf", "squarf", "squf", "squf", "srarr", "srarr", "sscr", "sscr", "ssetmn", "ssetmn", "ssmile", "ssmile", "sstarf", "sstarf", "star", "star", "starf", "starf", "straightepsilon", "straightepsilon", "straightphi", "straightphi", "strns", 7972 "strns", "sub", "sub", "subE", "subE", "subdot", "subdot", "sube", "sube", "subedot", "subedot", "submult", "submult", "subnE", "subnE", "subne", "subne", "subplus", "subplus", "subrarr", "subrarr", "subset", "subset", "subseteq", "subseteq", "subseteqq", "subseteqq", "subsetneq", "subsetneq", "subsetneqq", "subsetneqq", 7973 "subsim", "subsim", "subsub", "subsub", "subsup", "subsup", "succ", "succ", "succapprox", "succapprox", "succcurlyeq", "succcurlyeq", "succeq", "succeq", "succnapprox", "succnapprox", "succneqq", "succneqq", "succnsim", "succnsim", "succsim", "succsim", "sum", "sum", "sung", "sung", "sup", "sup", "sup1", "sup1", "sup2", 7974 "sup2", "sup3", "sup3", "supE", "supE", "supdot", "supdot", "supdsub", "supdsub", "supe", "supe", "supedot", "supedot", "suphsol", "suphsol", "suphsub", "suphsub", "suplarr", "suplarr", "supmult", "supmult", "supnE", "supnE", "supne", "supne", "supplus", "supplus", "supset", "supset", "supseteq", "supseteq", "supseteqq", 7975 "supseteqq", "supsetneq", "supsetneq", "supsetneqq", "supsetneqq", "supsim", "supsim", "supsub", "supsub", "supsup", "supsup", "swArr", "swArr", "swarhk", "swarhk", "swarr", "swarr", "swarrow", "swarrow", "swnwar", "swnwar", "szlig", "szlig", "target", "target", "tau", "tau", "tbrk", "tbrk", "tcaron", "tcaron", "tcedil", 7976 "tcedil", "tcy", "tcy", "tdot", "tdot", "telrec", "telrec", "tfr", "tfr", "there4", "there4", "therefore", "therefore", "theta", "theta", "thetasym", "thetasym", "thetav", "thetav", "thickapprox", "thickapprox", "thicksim", "thicksim", "thinsp", "thinsp", "thkap", "thkap", "thksim", "thksim", "thorn", "thorn", "tilde", 7977 "tilde", "times", "times", "timesb", "timesb", "timesbar", "timesbar", "timesd", "timesd", "tint", "tint", "toea", "toea", "top", "top", "topbot", "topbot", "topcir", "topcir", "topf", "topf", "topfork", "topfork", "tosa", "tosa", "tprime", "tprime", "trade", "trade", "triangle", "triangle", "triangledown", "triangledown", 7978 "triangleleft", "triangleleft", "trianglelefteq", "trianglelefteq", "triangleq", "triangleq", "triangleright", "triangleright", "trianglerighteq", "trianglerighteq", "tridot", "tridot", "trie", "trie", "triminus", "triminus", "triplus", "triplus", "trisb", "trisb", "tritime", "tritime", "trpezium", "trpezium", "tscr", 7979 "tscr", "tscy", "tscy", "tshcy", "tshcy", "tstrok", "tstrok", "twixt", "twixt", "twoheadleftarrow", "twoheadleftarrow", "twoheadrightarrow", "twoheadrightarrow", "uArr", "uArr", "uHar", "uHar", "uacute", "uacute", "uarr", "uarr", "ubrcy", "ubrcy", "ubreve", "ubreve", "ucirc", "ucirc", "ucy", "ucy", "udarr", "udarr", "udblac", 7980 "udblac", "udhar", "udhar", "ufisht", "ufisht", "ufr", "ufr", "ugrave", "ugrave", "uharl", "uharl", "uharr", "uharr", "uhblk", "uhblk", "ulcorn", "ulcorn", "ulcorner", "ulcorner", "ulcrop", "ulcrop", "ultri", "ultri", "umacr", "umacr", "uml", "uml", "uogon", "uogon", "uopf", "uopf", "uparrow", "uparrow", "updownarrow", 7981 "updownarrow", "upharpoonleft", "upharpoonleft", "upharpoonright", "upharpoonright", "uplus", "uplus", "upsi", "upsi", "upsih", "upsih", "upsilon", "upsilon", "upuparrows", "upuparrows", "urcorn", "urcorn", "urcorner", "urcorner", "urcrop", "urcrop", "uring", "uring", "urtri", "urtri", "uscr", "uscr", "utdot", "utdot", 7982 "utilde", "utilde", "utri", "utri", "utrif", "utrif", "uuarr", "uuarr", "uuml", "uuml", "uwangle", "uwangle", "vArr", "vArr", "vBar", "vBar", "vBarv", "vBarv", "vDash", "vDash", "vangrt", "vangrt", "varepsilon", "varepsilon", "varkappa", "varkappa", "varnothing", "varnothing", "varphi", "varphi", "varpi", "varpi", "varpropto", 7983 "varpropto", "varr", "varr", "varrho", "varrho", "varsigma", "varsigma", "vartheta", "vartheta", "vartriangleleft", "vartriangleleft", "vartriangleright", "vartriangleright", "vcy", "vcy", "vdash", "vdash", "vee", "vee", "veebar", "veebar", "veeeq", "veeeq", "vellip", "vellip", "verbar", "verbar", "vert", "vert", "vfr", 7984 "vfr", "vltri", "vltri", "vopf", "vopf", "vprop", "vprop", "vrtri", "vrtri", "vscr", "vscr", "vzigzag", "vzigzag", "wcirc", "wcirc", "wedbar", "wedbar", "wedge", "wedge", "wedgeq", "wedgeq", "weierp", "weierp", "wfr", "wfr", "wopf", "wopf", "wp", "wp", "wr", "wr", "wreath", "wreath", "wscr", "wscr", "xcap", "xcap", "xcirc", 7985 "xcirc", "xcup", "xcup", "xdtri", "xdtri", "xfr", "xfr", "xhArr", "xhArr", "xharr", "xharr", "xi", "xi", "xlArr", "xlArr", "xlarr", "xlarr", "xmap", "xmap", "xnis", "xnis", "xodot", "xodot", "xopf", "xopf", "xoplus", "xoplus", "xotime", "xotime", "xrArr", "xrArr", "xrarr", "xrarr", "xscr", "xscr", "xsqcup", "xsqcup", "xuplus", 7986 "xuplus", "xutri", "xutri", "xvee", "xvee", "xwedge", "xwedge", "yacute", "yacute", "yacy", "yacy", "ycirc", "ycirc", "ycy", "ycy", "yen", "yen", "yfr", "yfr", "yicy", "yicy", "yopf", "yopf", "yscr", "yscr", "yucy", "yucy", "yuml", "yuml", "zacute", "zacute", "zcaron", "zcaron", "zcy", "zcy", "zdot", "zdot", "zeetrf", 7987 "zeetrf", "zeta", "zeta", "zfr", "zfr", "zhcy", "zhcy", "zigrarr", "zigrarr", "zopf", "zopf", "zscr", "zscr", "zwj", "zwj", "zwnj", "zwnj", ]; 7988 7989 immutable dchar[] availableEntitiesValues = 7990 ['\u00c6', '\u00c6', '\u0026', '\u0026', '\u00c1', '\u00c1', '\u0102', '\u0102', '\u00c2', '\u00c2', '\u0410', '\u0410', '\U0001d504', '\U0001d504', '\u00c0', '\u00c0', '\u0391', '\u0391', '\u0100', '\u0100', '\u2a53', '\u2a53', '\u0104', '\u0104', '\U0001d538', '\U0001d538', '\u2061', '\u2061', '\u00c5', '\u00c5', '\U0001d49c', '\U0001d49c', '\u2254', '\u2254', '\u00c3', 7991 '\u00c3', '\u00c4', '\u00c4', '\u2216', '\u2216', '\u2ae7', '\u2ae7', '\u2306', '\u2306', '\u0411', '\u0411', '\u2235', '\u2235', '\u212c', '\u212c', '\u0392', '\u0392', '\U0001d505', '\U0001d505', '\U0001d539', '\U0001d539', '\u02d8', '\u02d8', '\u212c', '\u212c', '\u224e', '\u224e', '\u0427', '\u0427', '\u00a9', '\u00a9', '\u0106', '\u0106', '\u22d2', '\u22d2', '\u2145', 7992 '\u2145', '\u212d', '\u212d', '\u010c', '\u010c', '\u00c7', '\u00c7', '\u0108', '\u0108', '\u2230', '\u2230', '\u010a', '\u010a', '\u00b8', '\u00b8', '\u00b7', '\u00b7', '\u212d', '\u212d', '\u03a7', '\u03a7', '\u2299', '\u2299', '\u2296', '\u2296', '\u2295', '\u2295', '\u2297', '\u2297', 7993 '\u2232', '\u2232', '\u201d', '\u201d', '\u2019', '\u2019', '\u2237', '\u2237', '\u2a74', '\u2a74', '\u2261', '\u2261', '\u222f', '\u222f', '\u222e', '\u222e', '\u2102', '\u2102', '\u2210', '\u2210', '\u2233', 7994 '\u2233', '\u2a2f', '\u2a2f', '\U0001d49e', '\U0001d49e', '\u22d3', '\u22d3', '\u224d', '\u224d', '\u2145', '\u2145', '\u2911', '\u2911', '\u0402', '\u0402', '\u0405', '\u0405', '\u040f', '\u040f', '\u2021', '\u2021', '\u21a1', '\u21a1', '\u2ae4', '\u2ae4', '\u010e', '\u010e', '\u0414', '\u0414', '\u2207', '\u2207', '\u0394', '\u0394', '\U0001d507', '\U0001d507', 7995 '\u00b4', '\u00b4', '\u02d9', '\u02d9', '\u02dd', '\u02dd', '\u0060', '\u0060', '\u02dc', '\u02dc', '\u22c4', '\u22c4', '\u2146', '\u2146', '\U0001d53b', '\U0001d53b', '\u00a8', '\u00a8', '\u20dc', '\u20dc', '\u2250', 7996 '\u2250', '\u222f', '\u222f', '\u00a8', '\u00a8', '\u21d3', '\u21d3', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u2ae4', '\u2ae4', '\u27f8', '\u27f8', '\u27fa', 7997 '\u27fa', '\u27f9', '\u27f9', '\u21d2', '\u21d2', '\u22a8', '\u22a8', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2225', '\u2225', '\u2193', '\u2193', '\u2913', '\u2913', 7998 '\u21f5', '\u21f5', '\u0311', '\u0311', '\u2950', '\u2950', '\u295e', '\u295e', '\u21bd', '\u21bd', '\u2956', '\u2956', '\u295f', '\u295f', '\u21c1', '\u21c1', '\u2957', 7999 '\u2957', '\u22a4', '\u22a4', '\u21a7', '\u21a7', '\u21d3', '\u21d3', '\U0001d49f', '\U0001d49f', '\u0110', '\u0110', '\u014a', '\u014a', '\u00d0', '\u00d0', '\u00c9', '\u00c9', '\u011a', '\u011a', '\u00ca', '\u00ca', '\u042d', '\u042d', '\u0116', '\u0116', '\U0001d508', '\U0001d508', '\u00c8', '\u00c8', '\u2208', '\u2208', '\u0112', '\u0112', 8000 '\u25fb', '\u25fb', '\u25ab', '\u25ab', '\u0118', '\u0118', '\U0001d53c', '\U0001d53c', '\u0395', '\u0395', '\u2a75', '\u2a75', '\u2242', '\u2242', '\u21cc', '\u21cc', '\u2130', '\u2130', '\u2a73', '\u2a73', '\u0397', '\u0397', '\u00cb', '\u00cb', '\u2203', '\u2203', '\u2147', '\u2147', 8001 '\u0424', '\u0424', '\U0001d509', '\U0001d509', '\u25fc', '\u25fc', '\u25aa', '\u25aa', '\U0001d53d', '\U0001d53d', '\u2200', '\u2200', '\u2131', '\u2131', '\u2131', '\u2131', '\u0403', '\u0403', '\u003e', '\u003e', '\u0393', '\u0393', '\u03dc', '\u03dc', '\u011e', '\u011e', '\u0122', '\u0122', '\u011c', '\u011c', 8002 '\u0413', '\u0413', '\u0120', '\u0120', '\U0001d50a', '\U0001d50a', '\u22d9', '\u22d9', '\U0001d53e', '\U0001d53e', '\u2265', '\u2265', '\u22db', '\u22db', '\u2267', '\u2267', '\u2aa2', '\u2aa2', '\u2277', '\u2277', '\u2a7e', '\u2a7e', '\u2273', '\u2273', 8003 '\U0001d4a2', '\U0001d4a2', '\u226b', '\u226b', '\u042a', '\u042a', '\u02c7', '\u02c7', '\u005e', '\u005e', '\u0124', '\u0124', '\u210c', '\u210c', '\u210b', '\u210b', '\u210d', '\u210d', '\u2500', '\u2500', '\u210b', '\u210b', '\u0126', '\u0126', '\u224e', '\u224e', '\u224f', '\u224f', '\u0415', '\u0415', '\u0132', '\u0132', 8004 '\u0401', '\u0401', '\u00cd', '\u00cd', '\u00ce', '\u00ce', '\u0418', '\u0418', '\u0130', '\u0130', '\u2111', '\u2111', '\u00cc', '\u00cc', '\u2111', '\u2111', '\u012a', '\u012a', '\u2148', '\u2148', '\u21d2', '\u21d2', '\u222c', '\u222c', '\u222b', '\u222b', '\u22c2', '\u22c2', '\u2063', '\u2063', '\u2062', 8005 '\u2062', '\u012e', '\u012e', '\U0001d540', '\U0001d540', '\u0399', '\u0399', '\u2110', '\u2110', '\u0128', '\u0128', '\u0406', '\u0406', '\u00cf', '\u00cf', '\u0134', '\u0134', '\u0419', '\u0419', '\U0001d50d', '\U0001d50d', '\U0001d541', '\U0001d541', '\U0001d4a5', '\U0001d4a5', '\u0408', '\u0408', '\u0404', '\u0404', '\u0425', '\u0425', '\u040c', '\u040c', '\u039a', '\u039a', '\u0136', '\u0136', 8006 '\u041a', '\u041a', '\U0001d50e', '\U0001d50e', '\U0001d542', '\U0001d542', '\U0001d4a6', '\U0001d4a6', '\u0409', '\u0409', '\u003c', '\u003c', '\u0139', '\u0139', '\u039b', '\u039b', '\u27ea', '\u27ea', '\u2112', '\u2112', '\u219e', '\u219e', '\u013d', '\u013d', '\u013b', '\u013b', '\u041b', '\u041b', '\u27e8', '\u27e8', '\u2190', '\u2190', '\u21e4', 8007 '\u21e4', '\u21c6', '\u21c6', '\u2308', '\u2308', '\u27e6', '\u27e6', '\u2961', '\u2961', '\u21c3', '\u21c3', '\u2959', '\u2959', '\u230a', '\u230a', '\u2194', '\u2194', '\u294e', 8008 '\u294e', '\u22a3', '\u22a3', '\u21a4', '\u21a4', '\u295a', '\u295a', '\u22b2', '\u22b2', '\u29cf', '\u29cf', '\u22b4', '\u22b4', '\u2951', '\u2951', '\u2960', '\u2960', '\u21bf', '\u21bf', 8009 '\u2958', '\u2958', '\u21bc', '\u21bc', '\u2952', '\u2952', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u22da', '\u22da', '\u2266', '\u2266', '\u2276', '\u2276', '\u2aa1', '\u2aa1', '\u2a7d', '\u2a7d', 8010 '\u2272', '\u2272', '\U0001d50f', '\U0001d50f', '\u22d8', '\u22d8', '\u21da', '\u21da', '\u013f', '\u013f', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27f6', '\u27f6', '\u27f8', '\u27f8', '\u27fa', '\u27fa', '\u27f9', '\u27f9', 8011 '\U0001d543', '\U0001d543', '\u2199', '\u2199', '\u2198', '\u2198', '\u2112', '\u2112', '\u21b0', '\u21b0', '\u0141', '\u0141', '\u226a', '\u226a', '\u2905', '\u2905', '\u041c', '\u041c', '\u205f', '\u205f', '\u2133', '\u2133', '\U0001d510', '\U0001d510', '\u2213', '\u2213', '\U0001d544', '\U0001d544', '\u2133', '\u2133', '\u039c', '\u039c', 8012 '\u040a', '\u040a', '\u0143', '\u0143', '\u0147', '\u0147', '\u0145', '\u0145', '\u041d', '\u041d', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u226b', '\u226b', 8013 '\u226a', '\u226a', '\u000a', '\u000a', '\U0001d511', '\U0001d511', '\u2060', '\u2060', '\u00a0', '\u00a0', '\u2115', '\u2115', '\u2aec', '\u2aec', '\u2262', '\u2262', '\u226d', '\u226d', '\u2226', '\u2226', '\u2209', '\u2209', '\u2260', '\u2260', 8014 '\u2204', '\u2204', '\u226f', '\u226f', '\u2271', '\u2271', '\u2279', '\u2279', '\u2275', '\u2275', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u226e', '\u226e', '\u2270', '\u2270', '\u2278', 8015 '\u2278', '\u2274', '\u2274', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u220c', '\u220c', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u22e2', '\u22e2', '\u22e3', 8016 '\u22e3', '\u2288', '\u2288', '\u2281', '\u2281', '\u22e1', '\u22e1', '\u2289', '\u2289', '\u2241', '\u2241', '\u2244', '\u2244', '\u2247', '\u2247', '\u2249', '\u2249', '\u2224', 8017 '\u2224', '\U0001d4a9', '\U0001d4a9', '\u00d1', '\u00d1', '\u039d', '\u039d', '\u0152', '\u0152', '\u00d3', '\u00d3', '\u00d4', '\u00d4', '\u041e', '\u041e', '\u0150', '\u0150', '\U0001d512', '\U0001d512', '\u00d2', '\u00d2', '\u014c', '\u014c', '\u03a9', '\u03a9', '\u039f', '\u039f', '\U0001d546', '\U0001d546', '\u201c', '\u201c', '\u2018', 8018 '\u2018', '\u2a54', '\u2a54', '\U0001d4aa', '\U0001d4aa', '\u00d8', '\u00d8', '\u00d5', '\u00d5', '\u2a37', '\u2a37', '\u00d6', '\u00d6', '\u203e', '\u203e', '\u23de', '\u23de', '\u23b4', '\u23b4', '\u23dc', '\u23dc', '\u2202', '\u2202', '\u041f', '\u041f', '\U0001d513', '\U0001d513', '\u03a6', '\u03a6', '\u03a0', '\u03a0', '\u00b1', 8019 '\u00b1', '\u210c', '\u210c', '\u2119', '\u2119', '\u2abb', '\u2abb', '\u227a', '\u227a', '\u2aaf', '\u2aaf', '\u227c', '\u227c', '\u227e', '\u227e', '\u2033', '\u2033', '\u220f', '\u220f', '\u2237', '\u2237', '\u221d', '\u221d', '\U0001d4ab', '\U0001d4ab', 8020 '\u03a8', '\u03a8', '\u0022', '\u0022', '\U0001d514', '\U0001d514', '\u211a', '\u211a', '\U0001d4ac', '\U0001d4ac', '\u2910', '\u2910', '\u00ae', '\u00ae', '\u0154', '\u0154', '\u27eb', '\u27eb', '\u21a0', '\u21a0', '\u2916', '\u2916', '\u0158', '\u0158', '\u0156', '\u0156', '\u0420', '\u0420', '\u211c', '\u211c', '\u220b', '\u220b', '\u21cb', '\u21cb', 8021 '\u296f', '\u296f', '\u211c', '\u211c', '\u03a1', '\u03a1', '\u27e9', '\u27e9', '\u2192', '\u2192', '\u21e5', '\u21e5', '\u21c4', '\u21c4', '\u2309', '\u2309', '\u27e7', '\u27e7', '\u295d', 8022 '\u295d', '\u21c2', '\u21c2', '\u2955', '\u2955', '\u230b', '\u230b', '\u22a2', '\u22a2', '\u21a6', '\u21a6', '\u295b', '\u295b', '\u22b3', '\u22b3', '\u29d0', '\u29d0', '\u22b5', 8023 '\u22b5', '\u294f', '\u294f', '\u295c', '\u295c', '\u21be', '\u21be', '\u2954', '\u2954', '\u21c0', '\u21c0', '\u2953', '\u2953', '\u21d2', '\u21d2', '\u211d', '\u211d', '\u2970', '\u2970', 8024 '\u21db', '\u21db', '\u211b', '\u211b', '\u21b1', '\u21b1', '\u29f4', '\u29f4', '\u0429', '\u0429', '\u0428', '\u0428', '\u042c', '\u042c', '\u015a', '\u015a', '\u2abc', '\u2abc', '\u0160', '\u0160', '\u015e', '\u015e', '\u015c', '\u015c', '\u0421', '\u0421', '\U0001d516', '\U0001d516', '\u2193', '\u2193', '\u2190', '\u2190', 8025 '\u2192', '\u2192', '\u2191', '\u2191', '\u03a3', '\u03a3', '\u2218', '\u2218', '\U0001d54a', '\U0001d54a', '\u221a', '\u221a', '\u25a1', '\u25a1', '\u2293', '\u2293', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', 8026 '\u2292', '\u2292', '\u2294', '\u2294', '\U0001d4ae', '\U0001d4ae', '\u22c6', '\u22c6', '\u22d0', '\u22d0', '\u22d0', '\u22d0', '\u2286', '\u2286', '\u227b', '\u227b', '\u2ab0', '\u2ab0', '\u227d', '\u227d', '\u227f', '\u227f', '\u220b', 8027 '\u220b', '\u2211', '\u2211', '\u22d1', '\u22d1', '\u2283', '\u2283', '\u2287', '\u2287', '\u22d1', '\u22d1', '\u00de', '\u00de', '\u2122', '\u2122', '\u040b', '\u040b', '\u0426', '\u0426', '\u0009', '\u0009', '\u03a4', '\u03a4', '\u0164', '\u0164', '\u0162', '\u0162', '\u0422', '\u0422', '\U0001d517', '\U0001d517', '\u2234', '\u2234', '\u0398', '\u0398', 8028 '\u2009', '\u2009', '\u223c', '\u223c', '\u2243', '\u2243', '\u2245', '\u2245', '\u2248', '\u2248', '\U0001d54b', '\U0001d54b', '\u20db', '\u20db', '\U0001d4af', '\U0001d4af', '\u0166', '\u0166', '\u00da', '\u00da', '\u219f', '\u219f', '\u2949', '\u2949', '\u040e', '\u040e', '\u016c', '\u016c', '\u00db', 8029 '\u00db', '\u0423', '\u0423', '\u0170', '\u0170', '\U0001d518', '\U0001d518', '\u00d9', '\u00d9', '\u016a', '\u016a', '\u005f', '\u005f', '\u23df', '\u23df', '\u23b5', '\u23b5', '\u23dd', '\u23dd', '\u22c3', '\u22c3', '\u228e', '\u228e', '\u0172', '\u0172', '\U0001d54c', '\U0001d54c', '\u2191', '\u2191', '\u2912', 8030 '\u2912', '\u21c5', '\u21c5', '\u2195', '\u2195', '\u296e', '\u296e', '\u22a5', '\u22a5', '\u21a5', '\u21a5', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2196', '\u2196', '\u2197', '\u2197', '\u03d2', '\u03d2', '\u03a5', '\u03a5', 8031 '\u016e', '\u016e', '\U0001d4b0', '\U0001d4b0', '\u0168', '\u0168', '\u00dc', '\u00dc', '\u22ab', '\u22ab', '\u2aeb', '\u2aeb', '\u0412', '\u0412', '\u22a9', '\u22a9', '\u2ae6', '\u2ae6', '\u22c1', '\u22c1', '\u2016', '\u2016', '\u2016', '\u2016', '\u2223', '\u2223', '\u007c', '\u007c', '\u2758', '\u2758', '\u2240', 8032 '\u2240', '\u200a', '\u200a', '\U0001d519', '\U0001d519', '\U0001d54d', '\U0001d54d', '\U0001d4b1', '\U0001d4b1', '\u22aa', '\u22aa', '\u0174', '\u0174', '\u22c0', '\u22c0', '\U0001d51a', '\U0001d51a', '\U0001d54e', '\U0001d54e', '\U0001d4b2', '\U0001d4b2', '\U0001d51b', '\U0001d51b', '\u039e', '\u039e', '\U0001d54f', '\U0001d54f', '\U0001d4b3', '\U0001d4b3', '\u042f', '\u042f', '\u0407', '\u0407', '\u042e', '\u042e', '\u00dd', '\u00dd', 8033 '\u0176', '\u0176', '\u042b', '\u042b', '\U0001d51c', '\U0001d51c', '\U0001d550', '\U0001d550', '\U0001d4b4', '\U0001d4b4', '\u0178', '\u0178', '\u0416', '\u0416', '\u0179', '\u0179', '\u017d', '\u017d', '\u0417', '\u0417', '\u017b', '\u017b', '\u200b', '\u200b', '\u0396', '\u0396', '\u2128', '\u2128', '\u2124', '\u2124', '\U0001d4b5', '\U0001d4b5', '\u00e1', '\u00e1', '\u0103', '\u0103', '\u223e', 8034 '\u223e', '\u223f', '\u223f', '\u00e2', '\u00e2', '\u00b4', '\u00b4', '\u0430', '\u0430', '\u00e6', '\u00e6', '\u2061', '\u2061', '\U0001d51e', '\U0001d51e', '\u00e0', '\u00e0', '\u2135', '\u2135', '\u2135', '\u2135', '\u03b1', '\u03b1', '\u0101', '\u0101', '\u2a3f', '\u2a3f', '\u2227', '\u2227', '\u2a55', '\u2a55', '\u2a5c', '\u2a5c', '\u2a58', '\u2a58', '\u2a5a', '\u2a5a', '\u2220', 8035 '\u2220', '\u29a4', '\u29a4', '\u2220', '\u2220', '\u2221', '\u2221', '\u29a8', '\u29a8', '\u29a9', '\u29a9', '\u29aa', '\u29aa', '\u29ab', '\u29ab', '\u29ac', '\u29ac', '\u29ad', '\u29ad', '\u29ae', '\u29ae', '\u29af', '\u29af', '\u221f', '\u221f', '\u22be', '\u22be', '\u299d', '\u299d', '\u2222', 8036 '\u2222', '\u00c5', '\u00c5', '\u237c', '\u237c', '\u0105', '\u0105', '\U0001d552', '\U0001d552', '\u2248', '\u2248', '\u2a70', '\u2a70', '\u2a6f', '\u2a6f', '\u224a', '\u224a', '\u224b', '\u224b', '\u2248', '\u2248', '\u224a', '\u224a', '\u00e5', '\u00e5', '\U0001d4b6', '\U0001d4b6', '\u002a', '\u002a', '\u2248', '\u2248', '\u224d', '\u224d', '\u00e3', '\u00e3', '\u00e4', 8037 '\u00e4', '\u2233', '\u2233', '\u2a11', '\u2a11', '\u2aed', '\u2aed', '\u224c', '\u224c', '\u03f6', '\u03f6', '\u2035', '\u2035', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u22bd', '\u22bd', '\u2305', '\u2305', '\u2305', '\u2305', '\u23b5', '\u23b5', '\u23b6', '\u23b6', '\u224c', '\u224c', '\u0431', 8038 '\u0431', '\u201e', '\u201e', '\u2235', '\u2235', '\u2235', '\u2235', '\u29b0', '\u29b0', '\u03f6', '\u03f6', '\u212c', '\u212c', '\u03b2', '\u03b2', '\u2136', '\u2136', '\u226c', '\u226c', '\U0001d51f', '\U0001d51f', '\u22c2', '\u22c2', '\u25ef', '\u25ef', '\u22c3', '\u22c3', '\u2a00', '\u2a00', '\u2a01', '\u2a01', '\u2a02', '\u2a02', 8039 '\u2a06', '\u2a06', '\u2605', '\u2605', '\u25bd', '\u25bd', '\u25b3', '\u25b3', '\u2a04', '\u2a04', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u290d', '\u290d', '\u29eb', '\u29eb', '\u25aa', '\u25aa', '\u25b4', '\u25b4', '\u25be', 8040 '\u25be', '\u25c2', '\u25c2', '\u25b8', '\u25b8', '\u2423', '\u2423', '\u2592', '\u2592', '\u2591', '\u2591', '\u2593', '\u2593', '\u2588', '\u2588', '\u2310', '\u2310', '\U0001d553', '\U0001d553', '\u22a5', '\u22a5', '\u22a5', '\u22a5', '\u22c8', '\u22c8', '\u2557', '\u2557', '\u2554', '\u2554', '\u2556', 8041 '\u2556', '\u2553', '\u2553', '\u2550', '\u2550', '\u2566', '\u2566', '\u2569', '\u2569', '\u2564', '\u2564', '\u2567', '\u2567', '\u255d', '\u255d', '\u255a', '\u255a', '\u255c', '\u255c', '\u2559', '\u2559', '\u2551', '\u2551', '\u256c', '\u256c', '\u2563', '\u2563', '\u2560', '\u2560', '\u256b', '\u256b', '\u2562', '\u2562', '\u255f', '\u255f', '\u29c9', 8042 '\u29c9', '\u2555', '\u2555', '\u2552', '\u2552', '\u2510', '\u2510', '\u250c', '\u250c', '\u2500', '\u2500', '\u2565', '\u2565', '\u2568', '\u2568', '\u252c', '\u252c', '\u2534', '\u2534', '\u229f', '\u229f', '\u229e', '\u229e', '\u22a0', '\u22a0', '\u255b', '\u255b', '\u2558', '\u2558', '\u2518', '\u2518', '\u2514', '\u2514', '\u2502', 8043 '\u2502', '\u256a', '\u256a', '\u2561', '\u2561', '\u255e', '\u255e', '\u253c', '\u253c', '\u2524', '\u2524', '\u251c', '\u251c', '\u2035', '\u2035', '\u02d8', '\u02d8', '\u00a6', '\u00a6', '\U0001d4b7', '\U0001d4b7', '\u204f', '\u204f', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u005c', '\u005c', '\u29c5', '\u29c5', '\u27c8', '\u27c8', '\u2022', '\u2022', '\u2022', 8044 '\u2022', '\u224e', '\u224e', '\u2aae', '\u2aae', '\u224f', '\u224f', '\u224f', '\u224f', '\u0107', '\u0107', '\u2229', '\u2229', '\u2a44', '\u2a44', '\u2a49', '\u2a49', '\u2a4b', '\u2a4b', '\u2a47', '\u2a47', '\u2a40', '\u2a40', '\u2041', '\u2041', '\u02c7', '\u02c7', '\u2a4d', '\u2a4d', '\u010d', '\u010d', '\u00e7', '\u00e7', '\u0109', 8045 '\u0109', '\u2a4c', '\u2a4c', '\u2a50', '\u2a50', '\u010b', '\u010b', '\u00b8', '\u00b8', '\u29b2', '\u29b2', '\u00a2', '\u00a2', '\u00b7', '\u00b7', '\U0001d520', '\U0001d520', '\u0447', '\u0447', '\u2713', '\u2713', '\u2713', '\u2713', '\u03c7', '\u03c7', '\u25cb', '\u25cb', '\u29c3', '\u29c3', '\u02c6', '\u02c6', '\u2257', '\u2257', '\u21ba', 8046 '\u21ba', '\u21bb', '\u21bb', '\u00ae', '\u00ae', '\u24c8', '\u24c8', '\u229b', '\u229b', '\u229a', '\u229a', '\u229d', '\u229d', '\u2257', '\u2257', '\u2a10', '\u2a10', '\u2aef', '\u2aef', '\u29c2', '\u29c2', '\u2663', '\u2663', '\u2663', '\u2663', '\u003a', 8047 '\u003a', '\u2254', '\u2254', '\u2254', '\u2254', '\u002c', '\u002c', '\u0040', '\u0040', '\u2201', '\u2201', '\u2218', '\u2218', '\u2201', '\u2201', '\u2102', '\u2102', '\u2245', '\u2245', '\u2a6d', '\u2a6d', '\u222e', '\u222e', '\U0001d554', '\U0001d554', '\u2210', '\u2210', '\u00a9', '\u00a9', '\u2117', '\u2117', '\u21b5', '\u21b5', 8048 '\u2717', '\u2717', '\U0001d4b8', '\U0001d4b8', '\u2acf', '\u2acf', '\u2ad1', '\u2ad1', '\u2ad0', '\u2ad0', '\u2ad2', '\u2ad2', '\u22ef', '\u22ef', '\u2938', '\u2938', '\u2935', '\u2935', '\u22de', '\u22de', '\u22df', '\u22df', '\u21b6', '\u21b6', '\u293d', '\u293d', '\u222a', '\u222a', '\u2a48', '\u2a48', '\u2a46', '\u2a46', '\u2a4a', '\u2a4a', 8049 '\u228d', '\u228d', '\u2a45', '\u2a45', '\u21b7', '\u21b7', '\u293c', '\u293c', '\u22de', '\u22de', '\u22df', '\u22df', '\u22ce', '\u22ce', '\u22cf', '\u22cf', '\u00a4', '\u00a4', '\u21b6', '\u21b6', '\u21b7', '\u21b7', '\u22ce', '\u22ce', '\u22cf', '\u22cf', 8050 '\u2232', '\u2232', '\u2231', '\u2231', '\u232d', '\u232d', '\u21d3', '\u21d3', '\u2965', '\u2965', '\u2020', '\u2020', '\u2138', '\u2138', '\u2193', '\u2193', '\u2010', '\u2010', '\u22a3', '\u22a3', '\u290f', '\u290f', '\u02dd', '\u02dd', '\u010f', '\u010f', '\u0434', '\u0434', '\u2146', '\u2146', '\u2021', '\u2021', '\u21ca', '\u21ca', '\u2a77', 8051 '\u2a77', '\u00b0', '\u00b0', '\u03b4', '\u03b4', '\u29b1', '\u29b1', '\u297f', '\u297f', '\U0001d521', '\U0001d521', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u22c4', '\u22c4', '\u22c4', '\u22c4', '\u2666', '\u2666', '\u2666', '\u2666', '\u00a8', '\u00a8', '\u03dd', '\u03dd', '\u22f2', '\u22f2', '\u00f7', '\u00f7', '\u00f7', '\u00f7', '\u22c7', 8052 '\u22c7', '\u22c7', '\u22c7', '\u0452', '\u0452', '\u231e', '\u231e', '\u230d', '\u230d', '\u0024', '\u0024', '\U0001d555', '\U0001d555', '\u02d9', '\u02d9', '\u2250', '\u2250', '\u2251', '\u2251', '\u2238', '\u2238', '\u2214', '\u2214', '\u22a1', '\u22a1', '\u2306', '\u2306', '\u2193', '\u2193', '\u21ca', 8053 '\u21ca', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u2910', '\u2910', '\u231f', '\u231f', '\u230c', '\u230c', '\U0001d4b9', '\U0001d4b9', '\u0455', '\u0455', '\u29f6', '\u29f6', '\u0111', '\u0111', '\u22f1', '\u22f1', '\u25bf', '\u25bf', '\u25be', '\u25be', '\u21f5', '\u21f5', '\u296f', '\u296f', '\u29a6', 8054 '\u29a6', '\u045f', '\u045f', '\u27ff', '\u27ff', '\u2a77', '\u2a77', '\u2251', '\u2251', '\u00e9', '\u00e9', '\u2a6e', '\u2a6e', '\u011b', '\u011b', '\u2256', '\u2256', '\u00ea', '\u00ea', '\u2255', '\u2255', '\u044d', '\u044d', '\u0117', '\u0117', '\u2147', '\u2147', '\u2252', '\u2252', '\U0001d522', '\U0001d522', '\u2a9a', '\u2a9a', '\u00e8', '\u00e8', '\u2a96', '\u2a96', '\u2a98', 8055 '\u2a98', '\u2a99', '\u2a99', '\u23e7', '\u23e7', '\u2113', '\u2113', '\u2a95', '\u2a95', '\u2a97', '\u2a97', '\u0113', '\u0113', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2003', '\u2003', '\u2004', '\u2004', '\u2005', '\u2005', '\u014b', '\u014b', '\u2002', '\u2002', '\u0119', '\u0119', '\U0001d556', '\U0001d556', '\u22d5', '\u22d5', '\u29e3', 8056 '\u29e3', '\u2a71', '\u2a71', '\u03b5', '\u03b5', '\u03b5', '\u03b5', '\u03f5', '\u03f5', '\u2256', '\u2256', '\u2255', '\u2255', '\u2242', '\u2242', '\u2a96', '\u2a96', '\u2a95', '\u2a95', '\u003d', '\u003d', '\u225f', '\u225f', '\u2261', '\u2261', '\u2a78', '\u2a78', '\u29e5', '\u29e5', '\u2253', '\u2253', 8057 '\u2971', '\u2971', '\u212f', '\u212f', '\u2250', '\u2250', '\u2242', '\u2242', '\u03b7', '\u03b7', '\u00f0', '\u00f0', '\u00eb', '\u00eb', '\u20ac', '\u20ac', '\u0021', '\u0021', '\u2203', '\u2203', '\u2130', '\u2130', '\u2147', '\u2147', '\u2252', '\u2252', '\u0444', '\u0444', '\u2640', '\u2640', '\ufb03', '\ufb03', '\ufb00', 8058 '\ufb00', '\ufb04', '\ufb04', '\U0001d523', '\U0001d523', '\ufb01', '\ufb01', '\u266d', '\u266d', '\ufb02', '\ufb02', '\u25b1', '\u25b1', '\u0192', '\u0192', '\U0001d557', '\U0001d557', '\u2200', '\u2200', '\u22d4', '\u22d4', '\u2ad9', '\u2ad9', '\u2a0d', '\u2a0d', '\u00bd', '\u00bd', '\u2153', '\u2153', '\u00bc', '\u00bc', '\u2155', '\u2155', '\u2159', '\u2159', 8059 '\u215b', '\u215b', '\u2154', '\u2154', '\u2156', '\u2156', '\u00be', '\u00be', '\u2157', '\u2157', '\u215c', '\u215c', '\u2158', '\u2158', '\u215a', '\u215a', '\u215d', '\u215d', '\u215e', '\u215e', '\u2044', '\u2044', '\u2322', '\u2322', '\U0001d4bb', '\U0001d4bb', '\u2267', '\u2267', '\u2a8c', '\u2a8c', '\u01f5', '\u01f5', '\u03b3', '\u03b3', '\u03dd', 8060 '\u03dd', '\u2a86', '\u2a86', '\u011f', '\u011f', '\u011d', '\u011d', '\u0433', '\u0433', '\u0121', '\u0121', '\u2265', '\u2265', '\u22db', '\u22db', '\u2265', '\u2265', '\u2267', '\u2267', '\u2a7e', '\u2a7e', '\u2a7e', '\u2a7e', '\u2aa9', '\u2aa9', '\u2a80', '\u2a80', '\u2a82', '\u2a82', '\u2a84', '\u2a84', '\u2a94', '\u2a94', '\U0001d524', '\U0001d524', '\u226b', '\u226b', '\u22d9', 8061 '\u22d9', '\u2137', '\u2137', '\u0453', '\u0453', '\u2277', '\u2277', '\u2a92', '\u2a92', '\u2aa5', '\u2aa5', '\u2aa4', '\u2aa4', '\u2269', '\u2269', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a88', '\u2a88', '\u2a88', '\u2a88', '\u2269', '\u2269', '\u22e7', '\u22e7', '\U0001d558', '\U0001d558', '\u0060', '\u0060', '\u210a', '\u210a', '\u2273', '\u2273', '\u2a8e', '\u2a8e', '\u2a90', '\u2a90', '\u2aa7', 8062 '\u2aa7', '\u2a7a', '\u2a7a', '\u22d7', '\u22d7', '\u2995', '\u2995', '\u2a7c', '\u2a7c', '\u2a86', '\u2a86', '\u2978', '\u2978', '\u22d7', '\u22d7', '\u22db', '\u22db', '\u2a8c', '\u2a8c', '\u2277', '\u2277', '\u2273', '\u2273', '\u21d4', '\u21d4', '\u200a', '\u200a', '\u00bd', '\u00bd', '\u210b', '\u210b', 8063 '\u044a', '\u044a', '\u2194', '\u2194', '\u2948', '\u2948', '\u21ad', '\u21ad', '\u210f', '\u210f', '\u0125', '\u0125', '\u2665', '\u2665', '\u2665', '\u2665', '\u2026', '\u2026', '\u22b9', '\u22b9', '\U0001d525', '\U0001d525', '\u2925', '\u2925', '\u2926', '\u2926', '\u21ff', '\u21ff', '\u223b', '\u223b', '\u21a9', '\u21a9', 8064 '\u21aa', '\u21aa', '\U0001d559', '\U0001d559', '\u2015', '\u2015', '\U0001d4bd', '\U0001d4bd', '\u210f', '\u210f', '\u0127', '\u0127', '\u2043', '\u2043', '\u2010', '\u2010', '\u00ed', '\u00ed', '\u2063', '\u2063', '\u00ee', '\u00ee', '\u0438', '\u0438', '\u0435', '\u0435', '\u00a1', '\u00a1', '\u21d4', '\u21d4', '\U0001d526', '\U0001d526', '\u00ec', '\u00ec', '\u2148', 8065 '\u2148', '\u2a0c', '\u2a0c', '\u222d', '\u222d', '\u29dc', '\u29dc', '\u2129', '\u2129', '\u0133', '\u0133', '\u012b', '\u012b', '\u2111', '\u2111', '\u2110', '\u2110', '\u2111', '\u2111', '\u0131', '\u0131', '\u22b7', '\u22b7', '\u01b5', '\u01b5', '\u2208', '\u2208', '\u2105', '\u2105', '\u221e', '\u221e', '\u29dd', '\u29dd', '\u0131', 8066 '\u0131', '\u222b', '\u222b', '\u22ba', '\u22ba', '\u2124', '\u2124', '\u22ba', '\u22ba', '\u2a17', '\u2a17', '\u2a3c', '\u2a3c', '\u0451', '\u0451', '\u012f', '\u012f', '\U0001d55a', '\U0001d55a', '\u03b9', '\u03b9', '\u2a3c', '\u2a3c', '\u00bf', '\u00bf', '\U0001d4be', '\U0001d4be', '\u2208', '\u2208', '\u22f9', '\u22f9', '\u22f5', '\u22f5', '\u22f4', 8067 '\u22f4', '\u22f3', '\u22f3', '\u2208', '\u2208', '\u2062', '\u2062', '\u0129', '\u0129', '\u0456', '\u0456', '\u00ef', '\u00ef', '\u0135', '\u0135', '\u0439', '\u0439', '\U0001d527', '\U0001d527', '\u0237', '\u0237', '\U0001d55b', '\U0001d55b', '\U0001d4bf', '\U0001d4bf', '\u0458', '\u0458', '\u0454', '\u0454', '\u03ba', '\u03ba', '\u03f0', '\u03f0', '\u0137', '\u0137', '\u043a', '\u043a', '\U0001d528', 8068 '\U0001d528', '\u0138', '\u0138', '\u0445', '\u0445', '\u045c', '\u045c', '\U0001d55c', '\U0001d55c', '\U0001d4c0', '\U0001d4c0', '\u21da', '\u21da', '\u21d0', '\u21d0', '\u291b', '\u291b', '\u290e', '\u290e', '\u2266', '\u2266', '\u2a8b', '\u2a8b', '\u2962', '\u2962', '\u013a', '\u013a', '\u29b4', '\u29b4', '\u2112', '\u2112', '\u03bb', '\u03bb', '\u27e8', '\u27e8', '\u2991', '\u2991', 8069 '\u27e8', '\u27e8', '\u2a85', '\u2a85', '\u00ab', '\u00ab', '\u2190', '\u2190', '\u21e4', '\u21e4', '\u291f', '\u291f', '\u291d', '\u291d', '\u21a9', '\u21a9', '\u21ab', '\u21ab', '\u2939', '\u2939', '\u2973', '\u2973', '\u21a2', '\u21a2', '\u2aab', '\u2aab', '\u2919', '\u2919', '\u2aad', '\u2aad', '\u290c', '\u290c', '\u2772', '\u2772', '\u007b', 8070 '\u007b', '\u005b', '\u005b', '\u298b', '\u298b', '\u298f', '\u298f', '\u298d', '\u298d', '\u013e', '\u013e', '\u013c', '\u013c', '\u2308', '\u2308', '\u007b', '\u007b', '\u043b', '\u043b', '\u2936', '\u2936', '\u201c', '\u201c', '\u201e', '\u201e', '\u2967', '\u2967', '\u294b', '\u294b', '\u21b2', '\u21b2', '\u2264', '\u2264', '\u2190', 8071 '\u2190', '\u21a2', '\u21a2', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u21c7', '\u21c7', '\u2194', '\u2194', '\u21c6', '\u21c6', '\u21cb', '\u21cb', '\u21ad', '\u21ad', '\u22cb', 8072 '\u22cb', '\u22da', '\u22da', '\u2264', '\u2264', '\u2266', '\u2266', '\u2a7d', '\u2a7d', '\u2a7d', '\u2a7d', '\u2aa8', '\u2aa8', '\u2a7f', '\u2a7f', '\u2a81', '\u2a81', '\u2a83', '\u2a83', '\u2a93', '\u2a93', '\u2a85', '\u2a85', '\u22d6', '\u22d6', '\u22da', '\u22da', '\u2a8b', '\u2a8b', '\u2276', '\u2276', 8073 '\u2272', '\u2272', '\u297c', '\u297c', '\u230a', '\u230a', '\U0001d529', '\U0001d529', '\u2276', '\u2276', '\u2a91', '\u2a91', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u296a', '\u296a', '\u2584', '\u2584', '\u0459', '\u0459', '\u226a', '\u226a', '\u21c7', '\u21c7', '\u231e', '\u231e', '\u296b', '\u296b', '\u25fa', '\u25fa', '\u0140', '\u0140', '\u23b0', '\u23b0', 8074 '\u23b0', '\u23b0', '\u2268', '\u2268', '\u2a89', '\u2a89', '\u2a89', '\u2a89', '\u2a87', '\u2a87', '\u2a87', '\u2a87', '\u2268', '\u2268', '\u22e6', '\u22e6', '\u27ec', '\u27ec', '\u21fd', '\u21fd', '\u27e6', '\u27e6', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27fc', '\u27fc', '\u27f6', 8075 '\u27f6', '\u21ab', '\u21ab', '\u21ac', '\u21ac', '\u2985', '\u2985', '\U0001d55d', '\U0001d55d', '\u2a2d', '\u2a2d', '\u2a34', '\u2a34', '\u2217', '\u2217', '\u005f', '\u005f', '\u25ca', '\u25ca', '\u25ca', '\u25ca', '\u29eb', '\u29eb', '\u0028', '\u0028', '\u2993', '\u2993', '\u21c6', '\u21c6', '\u231f', 8076 '\u231f', '\u21cb', '\u21cb', '\u296d', '\u296d', '\u200e', '\u200e', '\u22bf', '\u22bf', '\u2039', '\u2039', '\U0001d4c1', '\U0001d4c1', '\u21b0', '\u21b0', '\u2272', '\u2272', '\u2a8d', '\u2a8d', '\u2a8f', '\u2a8f', '\u005b', '\u005b', '\u2018', '\u2018', '\u201a', '\u201a', '\u0142', '\u0142', '\u2aa6', '\u2aa6', '\u2a79', '\u2a79', '\u22d6', '\u22d6', '\u22cb', 8077 '\u22cb', '\u22c9', '\u22c9', '\u2976', '\u2976', '\u2a7b', '\u2a7b', '\u2996', '\u2996', '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u25c2', '\u25c2', '\u294a', '\u294a', '\u2966', '\u2966', '\u223a', '\u223a', '\u00af', '\u00af', '\u2642', '\u2642', '\u2720', '\u2720', '\u2720', '\u2720', '\u21a6', '\u21a6', '\u21a6', '\u21a6', '\u21a7', 8078 '\u21a7', '\u21a4', '\u21a4', '\u21a5', '\u21a5', '\u25ae', '\u25ae', '\u2a29', '\u2a29', '\u043c', '\u043c', '\u2014', '\u2014', '\u2221', '\u2221', '\U0001d52a', '\U0001d52a', '\u2127', '\u2127', '\u00b5', '\u00b5', '\u2223', '\u2223', '\u002a', '\u002a', '\u2af0', '\u2af0', '\u00b7', '\u00b7', '\u2212', '\u2212', '\u229f', 8079 '\u229f', '\u2238', '\u2238', '\u2a2a', '\u2a2a', '\u2adb', '\u2adb', '\u2026', '\u2026', '\u2213', '\u2213', '\u22a7', '\u22a7', '\U0001d55e', '\U0001d55e', '\u2213', '\u2213', '\U0001d4c2', '\U0001d4c2', '\u223e', '\u223e', '\u03bc', '\u03bc', '\u22b8', '\u22b8', '\u22b8', '\u22b8', '\u21cd', '\u21cd', '\u21ce', '\u21ce', '\u21cf', 8080 '\u21cf', '\u22af', '\u22af', '\u22ae', '\u22ae', '\u2207', '\u2207', '\u0144', '\u0144', '\u2249', '\u2249', '\u0149', '\u0149', '\u2249', '\u2249', '\u266e', '\u266e', '\u266e', '\u266e', '\u2115', '\u2115', '\u00a0', '\u00a0', '\u2a43', '\u2a43', '\u0148', '\u0148', '\u0146', '\u0146', '\u2247', '\u2247', '\u2a42', '\u2a42', '\u043d', 8081 '\u043d', '\u2013', '\u2013', '\u2260', '\u2260', '\u21d7', '\u21d7', '\u2924', '\u2924', '\u2197', '\u2197', '\u2197', '\u2197', '\u2262', '\u2262', '\u2928', '\u2928', '\u2204', '\u2204', '\u2204', '\u2204', '\U0001d52b', '\U0001d52b', '\u2271', '\u2271', '\u2271', '\u2271', '\u2275', '\u2275', '\u226f', '\u226f', '\u226f', '\u226f', '\u21ce', '\u21ce', '\u21ae', '\u21ae', 8082 '\u2af2', '\u2af2', '\u220b', '\u220b', '\u22fc', '\u22fc', '\u22fa', '\u22fa', '\u220b', '\u220b', '\u045a', '\u045a', '\u21cd', '\u21cd', '\u219a', '\u219a', '\u2025', '\u2025', '\u2270', '\u2270', '\u219a', '\u219a', '\u21ae', '\u21ae', '\u2270', '\u2270', '\u226e', '\u226e', '\u2274', '\u2274', '\u226e', '\u226e', '\u22ea', '\u22ea', '\u22ec', '\u22ec', 8083 '\u2224', '\u2224', '\U0001d55f', '\U0001d55f', '\u00ac', '\u00ac', '\u2209', '\u2209', '\u2209', '\u2209', '\u22f7', '\u22f7', '\u22f6', '\u22f6', '\u220c', '\u220c', '\u220c', '\u220c', '\u22fe', '\u22fe', '\u22fd', '\u22fd', '\u2226', '\u2226', '\u2226', '\u2226', '\u2a14', '\u2a14', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u2280', 8084 '\u2280', '\u21cf', '\u21cf', '\u219b', '\u219b', '\u219b', '\u219b', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u2281', '\u2281', '\u22e1', '\u22e1', '\U0001d4c3', '\U0001d4c3', '\u2224', '\u2224', '\u2226', '\u2226', '\u2241', '\u2241', '\u2244', '\u2244', '\u2244', '\u2244', '\u2224', '\u2224', '\u2226', '\u2226', '\u22e2', 8085 '\u22e2', '\u22e3', '\u22e3', '\u2284', '\u2284', '\u2288', '\u2288', '\u2288', '\u2288', '\u2281', '\u2281', '\u2285', '\u2285', '\u2289', '\u2289', '\u2289', '\u2289', '\u2279', '\u2279', '\u00f1', '\u00f1', '\u2278', '\u2278', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u22eb', '\u22eb', 8086 '\u22ed', '\u22ed', '\u03bd', '\u03bd', '\u0023', '\u0023', '\u2116', '\u2116', '\u2007', '\u2007', '\u22ad', '\u22ad', '\u2904', '\u2904', '\u22ac', '\u22ac', '\u29de', '\u29de', '\u2902', '\u2902', '\u2903', '\u2903', '\u21d6', '\u21d6', '\u2923', '\u2923', '\u2196', '\u2196', '\u2196', '\u2196', '\u2927', '\u2927', 8087 '\u24c8', '\u24c8', '\u00f3', '\u00f3', '\u229b', '\u229b', '\u229a', '\u229a', '\u00f4', '\u00f4', '\u043e', '\u043e', '\u229d', '\u229d', '\u0151', '\u0151', '\u2a38', '\u2a38', '\u2299', '\u2299', '\u29bc', '\u29bc', '\u0153', '\u0153', '\u29bf', '\u29bf', '\U0001d52c', '\U0001d52c', '\u02db', '\u02db', '\u00f2', '\u00f2', '\u29c1', '\u29c1', '\u29b5', '\u29b5', '\u03a9', '\u03a9', '\u222e', 8088 '\u222e', '\u21ba', '\u21ba', '\u29be', '\u29be', '\u29bb', '\u29bb', '\u203e', '\u203e', '\u29c0', '\u29c0', '\u014d', '\u014d', '\u03c9', '\u03c9', '\u03bf', '\u03bf', '\u29b6', '\u29b6', '\u2296', '\u2296', '\U0001d560', '\U0001d560', '\u29b7', '\u29b7', '\u29b9', '\u29b9', '\u2295', '\u2295', '\u2228', '\u2228', '\u21bb', '\u21bb', '\u2a5d', '\u2a5d', '\u2134', '\u2134', 8089 '\u2134', '\u2134', '\u00aa', '\u00aa', '\u00ba', '\u00ba', '\u22b6', '\u22b6', '\u2a56', '\u2a56', '\u2a57', '\u2a57', '\u2a5b', '\u2a5b', '\u2134', '\u2134', '\u00f8', '\u00f8', '\u2298', '\u2298', '\u00f5', '\u00f5', '\u2297', '\u2297', '\u2a36', '\u2a36', '\u00f6', '\u00f6', '\u233d', '\u233d', '\u2225', '\u2225', '\u00b6', '\u00b6', '\u2225', '\u2225', 8090 '\u2af3', '\u2af3', '\u2afd', '\u2afd', '\u2202', '\u2202', '\u043f', '\u043f', '\u0025', '\u0025', '\u002e', '\u002e', '\u2030', '\u2030', '\u22a5', '\u22a5', '\u2031', '\u2031', '\U0001d52d', '\U0001d52d', '\u03c6', '\u03c6', '\u03d5', '\u03d5', '\u2133', '\u2133', '\u260e', '\u260e', '\u03c0', '\u03c0', '\u22d4', '\u22d4', '\u03d6', '\u03d6', '\u210f', '\u210f', 8091 '\u210e', '\u210e', '\u210f', '\u210f', '\u002b', '\u002b', '\u2a23', '\u2a23', '\u229e', '\u229e', '\u2a22', '\u2a22', '\u2214', '\u2214', '\u2a25', '\u2a25', '\u2a72', '\u2a72', '\u00b1', '\u00b1', '\u2a26', '\u2a26', '\u2a27', '\u2a27', '\u00b1', '\u00b1', '\u2a15', '\u2a15', '\U0001d561', '\U0001d561', '\u00a3', '\u00a3', '\u227a', 8092 '\u227a', '\u2ab3', '\u2ab3', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u227a', '\u227a', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u2ab9', '\u2ab9', '\u2ab5', '\u2ab5', '\u22e8', '\u22e8', '\u227e', '\u227e', '\u2032', '\u2032', '\u2119', '\u2119', '\u2ab5', '\u2ab5', '\u2ab9', 8093 '\u2ab9', '\u22e8', '\u22e8', '\u220f', '\u220f', '\u232e', '\u232e', '\u2312', '\u2312', '\u2313', '\u2313', '\u221d', '\u221d', '\u221d', '\u221d', '\u227e', '\u227e', '\u22b0', '\u22b0', '\U0001d4c5', '\U0001d4c5', '\u03c8', '\u03c8', '\u2008', '\u2008', '\U0001d52e', '\U0001d52e', '\u2a0c', '\u2a0c', '\U0001d562', '\U0001d562', '\u2057', '\u2057', '\U0001d4c6', '\U0001d4c6', 8094 '\u210d', '\u210d', '\u2a16', '\u2a16', '\u003f', '\u003f', '\u225f', '\u225f', '\u21db', '\u21db', '\u21d2', '\u21d2', '\u291c', '\u291c', '\u290f', '\u290f', '\u2964', '\u2964', '\u0155', '\u0155', '\u221a', '\u221a', '\u29b3', '\u29b3', '\u27e9', '\u27e9', '\u2992', '\u2992', '\u29a5', '\u29a5', '\u27e9', '\u27e9', '\u00bb', 8095 '\u00bb', '\u2192', '\u2192', '\u2975', '\u2975', '\u21e5', '\u21e5', '\u2920', '\u2920', '\u2933', '\u2933', '\u291e', '\u291e', '\u21aa', '\u21aa', '\u21ac', '\u21ac', '\u2945', '\u2945', '\u2974', '\u2974', '\u21a3', '\u21a3', '\u219d', '\u219d', '\u291a', '\u291a', '\u2236', '\u2236', '\u211a', '\u211a', '\u290d', '\u290d', 8096 '\u2773', '\u2773', '\u007d', '\u007d', '\u005d', '\u005d', '\u298c', '\u298c', '\u298e', '\u298e', '\u2990', '\u2990', '\u0159', '\u0159', '\u0157', '\u0157', '\u2309', '\u2309', '\u007d', '\u007d', '\u0440', '\u0440', '\u2937', '\u2937', '\u2969', '\u2969', '\u201d', '\u201d', '\u201d', '\u201d', '\u21b3', '\u21b3', '\u211c', '\u211c', '\u211b', 8097 '\u211b', '\u211c', '\u211c', '\u211d', '\u211d', '\u25ad', '\u25ad', '\u00ae', '\u00ae', '\u297d', '\u297d', '\u230b', '\u230b', '\U0001d52f', '\U0001d52f', '\u21c1', '\u21c1', '\u21c0', '\u21c0', '\u296c', '\u296c', '\u03c1', '\u03c1', '\u03f1', '\u03f1', '\u2192', '\u2192', '\u21a3', '\u21a3', '\u21c1', '\u21c1', 8098 '\u21c0', '\u21c0', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u21c9', '\u21c9', '\u219d', '\u219d', '\u22cc', '\u22cc', '\u02da', '\u02da', '\u2253', '\u2253', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u200f', 8099 '\u200f', '\u23b1', '\u23b1', '\u23b1', '\u23b1', '\u2aee', '\u2aee', '\u27ed', '\u27ed', '\u21fe', '\u21fe', '\u27e7', '\u27e7', '\u2986', '\u2986', '\U0001d563', '\U0001d563', '\u2a2e', '\u2a2e', '\u2a35', '\u2a35', '\u0029', '\u0029', '\u2994', '\u2994', '\u2a12', '\u2a12', '\u21c9', '\u21c9', '\u203a', '\u203a', '\U0001d4c7', '\U0001d4c7', '\u21b1', 8100 '\u21b1', '\u005d', '\u005d', '\u2019', '\u2019', '\u2019', '\u2019', '\u22cc', '\u22cc', '\u22ca', '\u22ca', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25b8', '\u25b8', '\u29ce', '\u29ce', '\u2968', '\u2968', '\u211e', '\u211e', '\u015b', '\u015b', '\u201a', '\u201a', '\u227b', '\u227b', '\u2ab4', '\u2ab4', '\u2ab8', '\u2ab8', '\u0161', '\u0161', '\u227d', 8101 '\u227d', '\u2ab0', '\u2ab0', '\u015f', '\u015f', '\u015d', '\u015d', '\u2ab6', '\u2ab6', '\u2aba', '\u2aba', '\u22e9', '\u22e9', '\u2a13', '\u2a13', '\u227f', '\u227f', '\u0441', '\u0441', '\u22c5', '\u22c5', '\u22a1', '\u22a1', '\u2a66', '\u2a66', '\u21d8', '\u21d8', '\u2925', '\u2925', '\u2198', '\u2198', '\u2198', '\u2198', '\u00a7', '\u00a7', '\u003b', 8102 '\u003b', '\u2929', '\u2929', '\u2216', '\u2216', '\u2216', '\u2216', '\u2736', '\u2736', '\U0001d530', '\U0001d530', '\u2322', '\u2322', '\u266f', '\u266f', '\u0449', '\u0449', '\u0448', '\u0448', '\u2223', '\u2223', '\u2225', '\u2225', '\u00ad', '\u00ad', '\u03c3', '\u03c3', '\u03c2', '\u03c2', '\u03c2', '\u03c2', '\u223c', '\u223c', '\u2a6a', 8103 '\u2a6a', '\u2243', '\u2243', '\u2243', '\u2243', '\u2a9e', '\u2a9e', '\u2aa0', '\u2aa0', '\u2a9d', '\u2a9d', '\u2a9f', '\u2a9f', '\u2246', '\u2246', '\u2a24', '\u2a24', '\u2972', '\u2972', '\u2190', '\u2190', '\u2216', '\u2216', '\u2a33', '\u2a33', '\u29e4', '\u29e4', '\u2223', '\u2223', '\u2323', '\u2323', '\u2aaa', '\u2aaa', '\u2aac', 8104 '\u2aac', '\u044c', '\u044c', '\u002f', '\u002f', '\u29c4', '\u29c4', '\u233f', '\u233f', '\U0001d564', '\U0001d564', '\u2660', '\u2660', '\u2660', '\u2660', '\u2225', '\u2225', '\u2293', '\u2293', '\u2294', '\u2294', '\u228f', '\u228f', '\u2291', '\u2291', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', '\u2292', '\u2292', 8105 '\u2290', '\u2290', '\u2292', '\u2292', '\u25a1', '\u25a1', '\u25a1', '\u25a1', '\u25aa', '\u25aa', '\u25aa', '\u25aa', '\u2192', '\u2192', '\U0001d4c8', '\U0001d4c8', '\u2216', '\u2216', '\u2323', '\u2323', '\u22c6', '\u22c6', '\u2606', '\u2606', '\u2605', '\u2605', '\u03f5', '\u03f5', '\u03d5', '\u03d5', '\u00af', 8106 '\u00af', '\u2282', '\u2282', '\u2ac5', '\u2ac5', '\u2abd', '\u2abd', '\u2286', '\u2286', '\u2ac3', '\u2ac3', '\u2ac1', '\u2ac1', '\u2acb', '\u2acb', '\u228a', '\u228a', '\u2abf', '\u2abf', '\u2979', '\u2979', '\u2282', '\u2282', '\u2286', '\u2286', '\u2ac5', '\u2ac5', '\u228a', '\u228a', '\u2acb', '\u2acb', 8107 '\u2ac7', '\u2ac7', '\u2ad5', '\u2ad5', '\u2ad3', '\u2ad3', '\u227b', '\u227b', '\u2ab8', '\u2ab8', '\u227d', '\u227d', '\u2ab0', '\u2ab0', '\u2aba', '\u2aba', '\u2ab6', '\u2ab6', '\u22e9', '\u22e9', '\u227f', '\u227f', '\u2211', '\u2211', '\u266a', '\u266a', '\u2283', '\u2283', '\u00b9', '\u00b9', '\u00b2', 8108 '\u00b2', '\u00b3', '\u00b3', '\u2ac6', '\u2ac6', '\u2abe', '\u2abe', '\u2ad8', '\u2ad8', '\u2287', '\u2287', '\u2ac4', '\u2ac4', '\u27c9', '\u27c9', '\u2ad7', '\u2ad7', '\u297b', '\u297b', '\u2ac2', '\u2ac2', '\u2acc', '\u2acc', '\u228b', '\u228b', '\u2ac0', '\u2ac0', '\u2283', '\u2283', '\u2287', '\u2287', '\u2ac6', 8109 '\u2ac6', '\u228b', '\u228b', '\u2acc', '\u2acc', '\u2ac8', '\u2ac8', '\u2ad4', '\u2ad4', '\u2ad6', '\u2ad6', '\u21d9', '\u21d9', '\u2926', '\u2926', '\u2199', '\u2199', '\u2199', '\u2199', '\u292a', '\u292a', '\u00df', '\u00df', '\u2316', '\u2316', '\u03c4', '\u03c4', '\u23b4', '\u23b4', '\u0165', '\u0165', '\u0163', 8110 '\u0163', '\u0442', '\u0442', '\u20db', '\u20db', '\u2315', '\u2315', '\U0001d531', '\U0001d531', '\u2234', '\u2234', '\u2234', '\u2234', '\u03b8', '\u03b8', '\u03d1', '\u03d1', '\u03d1', '\u03d1', '\u2248', '\u2248', '\u223c', '\u223c', '\u2009', '\u2009', '\u2248', '\u2248', '\u223c', '\u223c', '\u00fe', '\u00fe', '\u02dc', 8111 '\u02dc', '\u00d7', '\u00d7', '\u22a0', '\u22a0', '\u2a31', '\u2a31', '\u2a30', '\u2a30', '\u222d', '\u222d', '\u2928', '\u2928', '\u22a4', '\u22a4', '\u2336', '\u2336', '\u2af1', '\u2af1', '\U0001d565', '\U0001d565', '\u2ada', '\u2ada', '\u2929', '\u2929', '\u2034', '\u2034', '\u2122', '\u2122', '\u25b5', '\u25b5', '\u25bf', '\u25bf', 8112 '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u225c', '\u225c', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25ec', '\u25ec', '\u225c', '\u225c', '\u2a3a', '\u2a3a', '\u2a39', '\u2a39', '\u29cd', '\u29cd', '\u2a3b', '\u2a3b', '\u23e2', '\u23e2', '\U0001d4c9', 8113 '\U0001d4c9', '\u0446', '\u0446', '\u045b', '\u045b', '\u0167', '\u0167', '\u226c', '\u226c', '\u219e', '\u219e', '\u21a0', '\u21a0', '\u21d1', '\u21d1', '\u2963', '\u2963', '\u00fa', '\u00fa', '\u2191', '\u2191', '\u045e', '\u045e', '\u016d', '\u016d', '\u00fb', '\u00fb', '\u0443', '\u0443', '\u21c5', '\u21c5', '\u0171', 8114 '\u0171', '\u296e', '\u296e', '\u297e', '\u297e', '\U0001d532', '\U0001d532', '\u00f9', '\u00f9', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u2580', '\u2580', '\u231c', '\u231c', '\u231c', '\u231c', '\u230f', '\u230f', '\u25f8', '\u25f8', '\u016b', '\u016b', '\u00a8', '\u00a8', '\u0173', '\u0173', '\U0001d566', '\U0001d566', '\u2191', '\u2191', '\u2195', 8115 '\u2195', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u228e', '\u228e', '\u03c5', '\u03c5', '\u03d2', '\u03d2', '\u03c5', '\u03c5', '\u21c8', '\u21c8', '\u231d', '\u231d', '\u231d', '\u231d', '\u230e', '\u230e', '\u016f', '\u016f', '\u25f9', '\u25f9', '\U0001d4ca', '\U0001d4ca', '\u22f0', '\u22f0', 8116 '\u0169', '\u0169', '\u25b5', '\u25b5', '\u25b4', '\u25b4', '\u21c8', '\u21c8', '\u00fc', '\u00fc', '\u29a7', '\u29a7', '\u21d5', '\u21d5', '\u2ae8', '\u2ae8', '\u2ae9', '\u2ae9', '\u22a8', '\u22a8', '\u299c', '\u299c', '\u03f5', '\u03f5', '\u03f0', '\u03f0', '\u2205', '\u2205', '\u03d5', '\u03d5', '\u03d6', '\u03d6', '\u221d', 8117 '\u221d', '\u2195', '\u2195', '\u03f1', '\u03f1', '\u03c2', '\u03c2', '\u03d1', '\u03d1', '\u22b2', '\u22b2', '\u22b3', '\u22b3', '\u0432', '\u0432', '\u22a2', '\u22a2', '\u2228', '\u2228', '\u22bb', '\u22bb', '\u225a', '\u225a', '\u22ee', '\u22ee', '\u007c', '\u007c', '\u007c', '\u007c', '\U0001d533', 8118 '\U0001d533', '\u22b2', '\u22b2', '\U0001d567', '\U0001d567', '\u221d', '\u221d', '\u22b3', '\u22b3', '\U0001d4cb', '\U0001d4cb', '\u299a', '\u299a', '\u0175', '\u0175', '\u2a5f', '\u2a5f', '\u2227', '\u2227', '\u2259', '\u2259', '\u2118', '\u2118', '\U0001d534', '\U0001d534', '\U0001d568', '\U0001d568', '\u2118', '\u2118', '\u2240', '\u2240', '\u2240', '\u2240', '\U0001d4cc', '\U0001d4cc', '\u22c2', '\u22c2', '\u25ef', 8119 '\u25ef', '\u22c3', '\u22c3', '\u25bd', '\u25bd', '\U0001d535', '\U0001d535', '\u27fa', '\u27fa', '\u27f7', '\u27f7', '\u03be', '\u03be', '\u27f8', '\u27f8', '\u27f5', '\u27f5', '\u27fc', '\u27fc', '\u22fb', '\u22fb', '\u2a00', '\u2a00', '\U0001d569', '\U0001d569', '\u2a01', '\u2a01', '\u2a02', '\u2a02', '\u27f9', '\u27f9', '\u27f6', '\u27f6', '\U0001d4cd', '\U0001d4cd', '\u2a06', '\u2a06', '\u2a04', 8120 '\u2a04', '\u25b3', '\u25b3', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u00fd', '\u00fd', '\u044f', '\u044f', '\u0177', '\u0177', '\u044b', '\u044b', '\u00a5', '\u00a5', '\U0001d536', '\U0001d536', '\u0457', '\u0457', '\U0001d56a', '\U0001d56a', '\U0001d4ce', '\U0001d4ce', '\u044e', '\u044e', '\u00ff', '\u00ff', '\u017a', '\u017a', '\u017e', '\u017e', '\u0437', '\u0437', '\u017c', '\u017c', '\u2128', 8121 '\u2128', '\u03b6', '\u03b6', '\U0001d537', '\U0001d537', '\u0436', '\u0436', '\u21dd', '\u21dd', '\U0001d56b', '\U0001d56b', '\U0001d4cf', '\U0001d4cf', '\u200d', '\u200d', '\u200c', '\u200c', ]; 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 // dom event support, if you want to use it 8146 8147 /// used for DOM events 8148 version(dom_with_events) 8149 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8150 8151 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8152 version(dom_with_events) 8153 class Event { 8154 this(string eventName, Element target) { 8155 this.eventName = eventName; 8156 this.srcElement = target; 8157 } 8158 8159 /// Prevents the default event handler (if there is one) from being called 8160 void preventDefault() { 8161 defaultPrevented = true; 8162 } 8163 8164 /// Stops the event propagation immediately. 8165 void stopPropagation() { 8166 propagationStopped = true; 8167 } 8168 8169 bool defaultPrevented; 8170 bool propagationStopped; 8171 string eventName; 8172 8173 Element srcElement; 8174 alias srcElement target; 8175 8176 Element relatedTarget; 8177 8178 int clientX; 8179 int clientY; 8180 8181 int button; 8182 8183 bool isBubbling; 8184 8185 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8186 void send() { 8187 if(srcElement is null) 8188 return; 8189 8190 auto e = srcElement; 8191 8192 if(eventName in e.bubblingEventHandlers) 8193 foreach(handler; e.bubblingEventHandlers[eventName]) 8194 handler(e, this); 8195 8196 if(!defaultPrevented) 8197 if(eventName in e.defaultEventHandlers) 8198 e.defaultEventHandlers[eventName](e, this); 8199 } 8200 8201 /// this dispatches the element using the capture -> target -> bubble process 8202 void dispatch() { 8203 if(srcElement is null) 8204 return; 8205 8206 // first capture, then bubble 8207 8208 Element[] chain; 8209 Element curr = srcElement; 8210 while(curr) { 8211 auto l = curr; 8212 chain ~= l; 8213 curr = curr.parentNode; 8214 8215 } 8216 8217 isBubbling = false; 8218 8219 foreach(e; chain.retro()) { 8220 if(eventName in e.capturingEventHandlers) 8221 foreach(handler; e.capturingEventHandlers[eventName]) 8222 handler(e, this); 8223 8224 // the default on capture should really be to always do nothing 8225 8226 //if(!defaultPrevented) 8227 // if(eventName in e.defaultEventHandlers) 8228 // e.defaultEventHandlers[eventName](e.element, this); 8229 8230 if(propagationStopped) 8231 break; 8232 } 8233 8234 isBubbling = true; 8235 if(!propagationStopped) 8236 foreach(e; chain) { 8237 if(eventName in e.bubblingEventHandlers) 8238 foreach(handler; e.bubblingEventHandlers[eventName]) 8239 handler(e, this); 8240 8241 if(propagationStopped) 8242 break; 8243 } 8244 8245 if(!defaultPrevented) 8246 foreach(e; chain) { 8247 if(eventName in e.defaultEventHandlers) 8248 e.defaultEventHandlers[eventName](e, this); 8249 } 8250 } 8251 } 8252 8253 struct FormFieldOptions { 8254 // usable for any 8255 8256 /// this is a regex pattern used to validate the field 8257 string pattern; 8258 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8259 bool isRequired; 8260 /// this is displayed as an example to the user 8261 string placeholder; 8262 8263 // usable for numeric ones 8264 8265 8266 // convenience methods to quickly get some options 8267 @property static FormFieldOptions none() { 8268 FormFieldOptions f; 8269 return f; 8270 } 8271 8272 static FormFieldOptions required() { 8273 FormFieldOptions f; 8274 f.isRequired = true; 8275 return f; 8276 } 8277 8278 static FormFieldOptions regex(string pattern, bool required = false) { 8279 FormFieldOptions f; 8280 f.pattern = pattern; 8281 f.isRequired = required; 8282 return f; 8283 } 8284 8285 static FormFieldOptions fromElement(Element e) { 8286 FormFieldOptions f; 8287 if(e.hasAttribute("required")) 8288 f.isRequired = true; 8289 if(e.hasAttribute("pattern")) 8290 f.pattern = e.pattern; 8291 if(e.hasAttribute("placeholder")) 8292 f.placeholder = e.placeholder; 8293 return f; 8294 } 8295 8296 Element applyToElement(Element e) { 8297 if(this.isRequired) 8298 e.required = "required"; 8299 if(this.pattern.length) 8300 e.pattern = this.pattern; 8301 if(this.placeholder.length) 8302 e.placeholder = this.placeholder; 8303 return e; 8304 } 8305 } 8306 8307 // this needs to look just like a string, but can expand as needed 8308 version(no_dom_stream) 8309 alias string Utf8Stream; 8310 else 8311 class Utf8Stream { 8312 protected: 8313 // these two should be overridden in subclasses to actually do the stream magic 8314 string getMore() { 8315 if(getMoreHelper !is null) 8316 return getMoreHelper(); 8317 return null; 8318 } 8319 8320 bool hasMore() { 8321 if(hasMoreHelper !is null) 8322 return hasMoreHelper(); 8323 return false; 8324 } 8325 // the rest should be ok 8326 8327 public: 8328 this(string d) { 8329 this.data = d; 8330 } 8331 8332 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8333 this.getMoreHelper = getMoreHelper; 8334 this.hasMoreHelper = hasMoreHelper; 8335 8336 if(hasMore()) 8337 this.data ~= getMore(); 8338 8339 // stdout.flush(); 8340 } 8341 8342 @property final size_t length() { 8343 // the parser checks length primarily directly before accessing the next character 8344 // so this is the place we'll hook to append more if possible and needed. 8345 if(lastIdx + 1 >= data.length && hasMore()) { 8346 data ~= getMore(); 8347 } 8348 return data.length; 8349 } 8350 8351 final char opIndex(size_t idx) { 8352 if(idx > lastIdx) 8353 lastIdx = idx; 8354 return data[idx]; 8355 } 8356 8357 final string opSlice(size_t start, size_t end) { 8358 if(end > lastIdx) 8359 lastIdx = end; 8360 return data[start .. end]; 8361 } 8362 8363 final size_t opDollar() { 8364 return length(); 8365 } 8366 8367 final Utf8Stream opBinary(string op : "~")(string s) { 8368 this.data ~= s; 8369 return this; 8370 } 8371 8372 final Utf8Stream opOpAssign(string op : "~")(string s) { 8373 this.data ~= s; 8374 return this; 8375 } 8376 8377 final Utf8Stream opAssign(string rhs) { 8378 this.data = rhs; 8379 return this; 8380 } 8381 private: 8382 string data; 8383 8384 size_t lastIdx; 8385 8386 bool delegate() hasMoreHelper; 8387 string delegate() getMoreHelper; 8388 8389 8390 /+ 8391 // used to maybe clear some old stuff 8392 // you might have to remove elements parsed with it too since they can hold slices into the 8393 // old stuff, preventing gc 8394 void dropFront(int bytes) { 8395 posAdjustment += bytes; 8396 data = data[bytes .. $]; 8397 } 8398 8399 int posAdjustment; 8400 +/ 8401 } 8402 8403 void fillForm(T)(Form form, T obj, string name) { 8404 import arsd.database; 8405 fillData((k, v) => form.setValue(k, v), obj, name); 8406 } 8407 8408 /++ 8409 Normalizes the whitespace in the given text according to HTML rules. 8410 8411 History: 8412 Added March 25, 2022 (dub v10.8) 8413 +/ 8414 string normalizeWhitespace(string text) { 8415 string ret; 8416 ret.reserve(text.length); 8417 bool lastWasWhite = true; 8418 foreach(char ch; text) { 8419 if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 8420 if(lastWasWhite) 8421 continue; 8422 lastWasWhite = true; 8423 ch = ' '; 8424 } else { 8425 lastWasWhite = false; 8426 } 8427 8428 ret ~= ch; 8429 } 8430 8431 return ret.stripRight; 8432 } 8433 8434 unittest { 8435 assert(normalizeWhitespace(" foo ") == "foo"); 8436 assert(normalizeWhitespace(" f\n \t oo ") == "f oo"); 8437 } 8438 8439 unittest { 8440 Document document; 8441 8442 document = new Document("<test> foo \r </test>"); 8443 assert(document.root.visibleText == "foo"); 8444 8445 document = new Document("<test> foo \r <br>hi</test>"); 8446 assert(document.root.visibleText == "foo\nhi"); 8447 8448 document = new Document("<test> foo \r <br>hi<pre>hi\nthere\n indent<br />line</pre></test>"); 8449 assert(document.root.visibleText == "foo\nhihi\nthere\n indent\nline", document.root.visibleText); 8450 } 8451 8452 /+ 8453 /+ 8454 Syntax: 8455 8456 Tag: tagname#id.class 8457 Tree: Tag(Children, comma, separated...) 8458 Children: Tee or Variable 8459 Variable: $varname with optional |funcname following. 8460 8461 If a variable has a tree after it, it breaks the variable down: 8462 * if array, foreach it does the tree 8463 * if struct, it breaks down the member variables 8464 8465 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8466 +/ 8467 struct Stringplate { 8468 /++ 8469 8470 +/ 8471 this(string s) { 8472 8473 } 8474 8475 /++ 8476 8477 +/ 8478 Element expand(T...)(T vars) { 8479 return null; 8480 } 8481 } 8482 /// 8483 unittest { 8484 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8485 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8486 } 8487 +/ 8488 8489 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 8490 foreach(child; children) { 8491 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8492 // cool 8493 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 8494 // cool, this is an inline element and none of its children contradict that 8495 } else { 8496 // prolly block 8497 return false; 8498 } 8499 } 8500 return true; 8501 } 8502 8503 private bool isSimpleWhite(dchar c) { 8504 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8505 } 8506 8507 unittest { 8508 // Test for issue #120 8509 string s = `<html> 8510 <body> 8511 <P>AN 8512 <P>bubbles</P> 8513 <P>giggles</P> 8514 </body> 8515 </html>`; 8516 auto doc = new Document(); 8517 doc.parseUtf8(s, false, false); 8518 auto s2 = doc.toString(); 8519 assert( 8520 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8521 "paragraph order incorrect:\n" ~ s2); 8522 } 8523 8524 unittest { 8525 // test for suncarpet email dec 24 2019 8526 // arbitrary id asduiwh 8527 auto document = new Document("<html> 8528 <head> 8529 <meta charset=\"utf-8\"></meta> 8530 <title>Element.querySelector Test</title> 8531 </head> 8532 <body> 8533 <div id=\"foo\"> 8534 <div>Foo</div> 8535 <div>Bar</div> 8536 </div> 8537 <div id=\"empty\"></div> 8538 <div id=\"empty-but-text\">test</div> 8539 </body> 8540 </html>"); 8541 8542 auto doc = document; 8543 8544 { 8545 auto empty = doc.requireElementById("empty"); 8546 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 8547 } 8548 { 8549 auto empty = doc.requireElementById("empty-but-text"); 8550 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 8551 } 8552 8553 assert(doc.querySelectorAll("div div").length == 2); 8554 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8555 assert(doc.querySelectorAll("> html").length == 0); 8556 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8557 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8558 8559 8560 assert(doc.root.matches("html")); 8561 assert(!doc.root.matches("nothtml")); 8562 assert(doc.querySelector("#foo > div").matches("div")); 8563 assert(doc.querySelector("body > #foo").matches("#foo")); 8564 8565 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8566 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8567 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8568 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8569 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8570 8571 // also confirming the querySelector works via the mdn definition 8572 auto foo = doc.requireSelector("#foo"); 8573 assert(foo.querySelector("#foo > div") !is null); 8574 assert(foo.querySelector("body #foo > div") !is null); 8575 8576 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8577 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8578 //assert(foo.querySelectorAll("#foo > div").length == 2); 8579 } 8580 8581 unittest { 8582 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8583 auto document = new Document(`<article> 8584 <div id="div-01">Here is div-01 8585 <div id="div-02">Here is div-02 8586 <div id="div-03">Here is div-03</div> 8587 </div> 8588 </div> 8589 </article>`, true, true); 8590 8591 auto el = document.getElementById("div-03"); 8592 assert(el.closest("#div-02").id == "div-02"); 8593 assert(el.closest("div div").id == "div-03"); 8594 assert(el.closest("article > div").id == "div-01"); 8595 assert(el.closest(":not(div)").tagName == "article"); 8596 8597 assert(el.closest("p") is null); 8598 assert(el.closest("p, div") is el); 8599 } 8600 8601 unittest { 8602 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8603 auto document = new Document(`<test> 8604 <div class="foo"><p>cool</p><span>bar</span></div> 8605 <main><p>two</p></main> 8606 </test>`); 8607 8608 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8609 assert(document.querySelector("div:where(.foo)") !is null); 8610 } 8611 8612 unittest { 8613 immutable string html = q{ 8614 <root> 8615 <div class="roundedbox"> 8616 <table> 8617 <caption class="boxheader">Recent Reviews</caption> 8618 <tr> 8619 <th>Game</th> 8620 <th>User</th> 8621 <th>Rating</th> 8622 <th>Created</th> 8623 </tr> 8624 8625 <tr> 8626 <td>June 13, 2020 15:10</td> 8627 <td><a href="/reviews/8833">[Show]</a></td> 8628 </tr> 8629 8630 <tr> 8631 <td>June 13, 2020 15:02</td> 8632 <td><a href="/reviews/8832">[Show]</a></td> 8633 </tr> 8634 8635 <tr> 8636 <td>June 13, 2020 14:41</td> 8637 <td><a href="/reviews/8831">[Show]</a></td> 8638 </tr> 8639 </table> 8640 </div> 8641 </root> 8642 }; 8643 8644 auto doc = new Document(cast(string)html); 8645 // this should select the second table row, but... 8646 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8647 assert(rd !is null); 8648 assert(rd.href == "/reviews/8832"); 8649 8650 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8651 assert(rd !is null); 8652 assert(rd.href == "/reviews/8832"); 8653 } 8654 8655 unittest { 8656 try { 8657 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 8658 assert(0); 8659 } catch(Exception e) { 8660 // good; it should throw an exception, not an error. 8661 } 8662 } 8663 8664 unittest { 8665 // toPrettyString is not stable, but these are some best-effort attempts 8666 // despite these being in a test, I might change these anyway! 8667 assert(Element.make("a").toPrettyString == "<a></a>"); 8668 assert(Element.make("a", "").toPrettyString(false, 0, " ") == "<a></a>"); 8669 assert(Element.make("a", " ").toPrettyString(false, 0, " ") == "<a> </a>");//, Element.make("a", " ").toPrettyString(false, 0, " ")); 8670 assert(Element.make("a", "b").toPrettyString == "<a>b</a>"); 8671 assert(Element.make("a", "b").toPrettyString(false, 0, "") == "<a>b</a>"); 8672 8673 { 8674 auto document = new Document("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 8675 auto pretty = document.toPrettyString(false, 0, " "); 8676 assert(pretty == 8677 `<!DOCTYPE html> 8678 <html> 8679 <body> 8680 <p>hello <a href="world">world</a></p> 8681 </body> 8682 </html>`, pretty); 8683 } 8684 8685 { 8686 auto document = new XmlDocument("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 8687 assert(document.toPrettyString(false, 0, " ") == 8688 `<?xml version="1.0" encoding="UTF-8"?> 8689 <html> 8690 <body> 8691 <p> 8692 hello 8693 <a href="world">world</a> 8694 </p> 8695 </body> 8696 </html>`); 8697 } 8698 8699 foreach(test; [ 8700 "<a att=\"http://ele\"><b><ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>", 8701 "<a att=\"http://ele\"><b><ele1>Hello</ele1><c><d><ele2>How are you?</ele2></d><e><ele3>Good & you?</ele3></e></c></b></a>", 8702 ] ) 8703 { 8704 auto document = new XmlDocument(test); 8705 assert(document.root.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 8706 assert(document.toPrettyString(false, 0, " ") == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 8707 auto omg = document.root; 8708 omg.parent_ = null; 8709 assert(omg.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 8710 } 8711 8712 { 8713 auto document = new XmlDocument(`<a><b>toto</b><c></c></a>`); 8714 assert(document.root.toPrettyString(false, 0, null) == `<a><b>toto</b><c></c></a>`); 8715 assert(document.root.toPrettyString(false, 0, " ") == `<a> 8716 <b>toto</b> 8717 <c></c> 8718 </a>`); 8719 } 8720 8721 { 8722 auto str = `<!DOCTYPE html> 8723 <html> 8724 <head> 8725 <title>Test</title> 8726 </head> 8727 <body> 8728 <p>Hello there</p> 8729 <p>I like <a href="">Links</a></p> 8730 <div> 8731 this is indented since there's a block inside 8732 <p>this is the block</p> 8733 and this gets its own line 8734 </div> 8735 </body> 8736 </html>`; 8737 auto doc = new Document(str, true, true); 8738 assert(doc.toPrettyString == str); 8739 } 8740 } 8741 8742 unittest { 8743 auto document = new Document("<foo><items><item><title>test</title><desc>desc</desc></item></items></foo>"); 8744 auto items = document.root.requireSelector("> items"); 8745 auto item = items.requireSelector("> item"); 8746 auto title = item.requireSelector("> title"); 8747 8748 // this not actually implemented at this point but i might want to later. it prolly should work as an extension of the standard behavior 8749 // assert(title.requireSelector("~ desc").innerText == "desc"); 8750 8751 assert(item.requireSelector("title ~ desc").innerText == "desc"); 8752 8753 assert(items.querySelector("item:has(title)") !is null); 8754 assert(items.querySelector("item:has(nothing)") is null); 8755 8756 assert(title.innerText == "test"); 8757 } 8758 8759 unittest { 8760 auto document = new Document("broken"); // just ensuring it doesn't crash 8761 } 8762 8763 8764 /* 8765 Copyright: Adam D. Ruppe, 2010 - 2023 8766 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8767 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8768 */