1 // FIXME: xml namespace support??? 2 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 3 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 4 5 // FIXME: the scriptable list is quite arbitrary 6 7 8 // xml entity references?! 9 10 /++ 11 This is an html DOM implementation, started with cloning 12 what the browser offers in Javascript, but going well beyond 13 it in convenience. 14 15 If you can do it in Javascript, you can probably do it with 16 this module, and much more. 17 18 --- 19 import arsd.dom; 20 21 void main() { 22 auto document = new Document("<html><p>paragraph</p></html>"); 23 writeln(document.querySelector("p")); 24 document.root.innerHTML = "<p>hey</p>"; 25 writeln(document); 26 } 27 --- 28 29 BTW: this file optionally depends on `arsd.characterencodings`, to 30 help it correctly read files from the internet. You should be able to 31 get characterencodings.d from the same place you got this file. 32 33 If you want it to stand alone, just always use the `Document.parseUtf8` 34 function or the constructor that takes a string. 35 36 Symbol_groups: 37 38 core_functionality = 39 40 These members provide core functionality. The members on these classes 41 will provide most your direct interaction. 42 43 bonus_functionality = 44 45 These provide additional functionality for special use cases. 46 47 implementations = 48 49 These provide implementations of other functionality. 50 +/ 51 module arsd.dom; 52 53 static import arsd.core; 54 import arsd.core : encodeUriComponent, decodeUriComponent; 55 56 // FIXME: support the css standard namespace thing in the selectors too 57 58 version(with_arsd_jsvar) 59 import arsd.jsvar; 60 else { 61 enum scriptable = "arsd_jsvar_compatible"; 62 } 63 64 // this is only meant to be used at compile time, as a filter for opDispatch 65 // lists the attributes we want to allow without the use of .attr 66 bool isConvenientAttribute(string name) { 67 static immutable list = [ 68 "name", "id", "href", "value", 69 "checked", "selected", "type", 70 "src", "content", "pattern", 71 "placeholder", "required", "alt", 72 "rel", 73 "method", "action", "enctype" 74 ]; 75 foreach(l; list) 76 if(name == l) return true; 77 return false; 78 } 79 80 81 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 82 // FIXME: failing to close a paragraph sometimes messes things up too 83 84 // FIXME: it would be kinda cool to have some support for internal DTDs 85 // and maybe XPath as well, to some extent 86 /* 87 we could do 88 meh this sux 89 90 auto xpath = XPath(element); 91 92 // get the first p 93 xpath.p[0].a["href"] 94 */ 95 96 97 /++ 98 The main document interface, including a html or xml parser. 99 100 There's three main ways to create a Document: 101 102 If you want to parse something and inspect the tags, you can use the [this|constructor]: 103 --- 104 // create and parse some HTML in one call 105 auto document = new Document("<html></html>"); 106 107 // or some XML 108 auto document = new Document("<xml></xml>", true, true); // strict mode enabled 109 110 // or better yet: 111 auto document = new XmlDocument("<xml></xml>"); // specialized subclass 112 --- 113 114 If you want to download something and parse it in one call, the [fromUrl] static function can help: 115 --- 116 auto document = Document.fromUrl("http://dlang.org/"); 117 --- 118 (note that this requires my [arsd.characterencodings] and [arsd.http2] libraries) 119 120 And, if you need to inspect things like `<%= foo %>` tags and comments, you can add them to the dom like this, with the [enableAddingSpecialTagsToDom] 121 and [parseUtf8] or [parseGarbage] functions: 122 --- 123 auto document = new Document(); 124 document.enableAddingSpecialTagsToDom(); 125 document.parseUtf8("<example></example>", true, true); // changes the trues to false to switch from xml to html mode 126 --- 127 128 You can also modify things like [selfClosedElements] and [rawSourceElements] before calling the `parse` family of functions to do further advanced tasks. 129 130 However you parse it, it will put a few things into special variables. 131 132 [root] contains the root document. 133 [prolog] contains the instructions before the root (like `<!DOCTYPE html>`). To keep the original things, you will need to [enableAddingSpecialTagsToDom] first, otherwise the library will return generic strings in there. [piecesBeforeRoot] will have other parsed instructions, if [enableAddingSpecialTagsToDom] is called. 134 [piecesAfterRoot] will contain any xml-looking data after the root tag is closed. 135 136 Most often though, you will not need to look at any of that data, since `Document` itself has methods like [querySelector], [appendChild], and more which will forward to the root [Element] for you. 137 +/ 138 /// Group: core_functionality 139 class Document : FileResource, DomParent { 140 inout(Document) asDocument() inout { return this; } 141 inout(Element) asElement() inout { return null; } 142 143 void processNodeWhileParsing(Element parent, Element child) { 144 parent.appendChild(child); 145 } 146 147 /++ 148 Convenience method for web scraping. Requires [arsd.http2] to be 149 included in the build as well as [arsd.characterencodings]. 150 151 This will download the file from the given url and create a document 152 off it, using a strict constructor or a [parseGarbage], depending on 153 the value of `strictMode`. 154 +/ 155 static Document fromUrl()(string url, bool strictMode = false) { 156 import arsd.http2; 157 auto client = new HttpClient(); 158 159 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 160 auto res = req.waitForCompletion(); 161 162 auto document = new Document(); 163 if(strictMode) { 164 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 165 } else { 166 document.parseGarbage(cast(string) res.content); 167 } 168 169 return document; 170 } 171 172 /++ 173 Creates a document with the given source data. If you want HTML behavior, use `caseSensitive` and `struct` set to `false`. For XML mode, set them to `true`. 174 175 Please note that anything after the root element will be found in [piecesAfterRoot]. Comments, processing instructions, and other special tags will be stripped out b default. You can customize this by using the zero-argument constructor and setting callbacks on the [parseSawComment], [parseSawBangInstruction], [parseSawAspCode], [parseSawPhpCode], and [parseSawQuestionInstruction] members, then calling one of the [parseUtf8], [parseGarbage], or [parse] functions. Calling the convenience method, [enableAddingSpecialTagsToDom], will enable all those things at once. 176 177 See_Also: 178 [parseGarbage] 179 [parseUtf8] 180 [parseUrl] 181 +/ 182 this(string data, bool caseSensitive = false, bool strict = false) { 183 parseUtf8(data, caseSensitive, strict); 184 } 185 186 /** 187 Creates an empty document. It has *nothing* in it at all, ready. 188 */ 189 this() { 190 191 } 192 193 /++ 194 This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 195 It returns a struct that forwards calls to all elements it holds, and returns itself so you 196 can chain it. 197 198 Example: document["p"].innerText("hello").addClass("modified"); 199 200 Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 201 202 Note: always use function calls (not property syntax) and don't use toString in there for best results. 203 204 You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 205 you could put in some kind of custom filter function tho. 206 +/ 207 ElementCollection opIndex(string selector) { 208 auto e = ElementCollection(this.root); 209 return e[selector]; 210 } 211 212 string _contentType = "text/html; charset=utf-8"; 213 214 /// If you're using this for some other kind of XML, you can 215 /// set the content type here. 216 /// 217 /// Note: this has no impact on the function of this class. 218 /// It is only used if the document is sent via a protocol like HTTP. 219 /// 220 /// This may be called by parse() if it recognizes the data. Otherwise, 221 /// if you don't set it, it assumes text/html; charset=utf-8. 222 @property string contentType(string mimeType) { 223 _contentType = mimeType; 224 return _contentType; 225 } 226 227 /// implementing the FileResource interface, useful for sending via 228 /// http automatically. 229 @property string filename() const { return null; } 230 231 /// implementing the FileResource interface, useful for sending via 232 /// http automatically. 233 override @property string contentType() const { 234 return _contentType; 235 } 236 237 /// implementing the FileResource interface; it calls toString. 238 override immutable(ubyte)[] getData() const { 239 return cast(immutable(ubyte)[]) this.toString(); 240 } 241 242 243 /* 244 /// Concatenates any consecutive text nodes 245 void normalize() { 246 247 } 248 */ 249 250 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 251 /// Call this before calling parse(). 252 253 /++ 254 Adds objects to the dom representing things normally stripped out during the default parse, like comments, `<!instructions>`, `<% code%>`, and `<? code?>` all at once. 255 256 Note this will also preserve the prolog and doctype from the original file, if there was one. 257 258 See_Also: 259 [parseSawComment] 260 [parseSawAspCode] 261 [parseSawPhpCode] 262 [parseSawQuestionInstruction] 263 [parseSawBangInstruction] 264 +/ 265 void enableAddingSpecialTagsToDom() { 266 parseSawComment = (string) => true; 267 parseSawAspCode = (string) => true; 268 parseSawPhpCode = (string) => true; 269 parseSawQuestionInstruction = (string) => true; 270 parseSawBangInstruction = (string) => true; 271 } 272 273 /// If the parser sees a html comment, it will call this callback 274 /// <!-- comment --> will call parseSawComment(" comment ") 275 /// Return true if you want the node appended to the document. It will be in a [HtmlComment] object. 276 bool delegate(string) parseSawComment; 277 278 /// If the parser sees <% asp code... %>, it will call this callback. 279 /// It will be passed "% asp code... %" or "%= asp code .. %" 280 /// Return true if you want the node appended to the document. It will be in an [AspCode] object. 281 bool delegate(string) parseSawAspCode; 282 283 /// If the parser sees <?php php code... ?>, it will call this callback. 284 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 285 /// Note: dom.d cannot identify the other php <? code ?> short format. 286 /// Return true if you want the node appended to the document. It will be in a [PhpCode] object. 287 bool delegate(string) parseSawPhpCode; 288 289 /// if it sees a <?xxx> that is not php or asp 290 /// it calls this function with the contents. 291 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 292 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 293 /// Return true if you want the node appended to the document. It will be in a [QuestionInstruction] object. 294 bool delegate(string) parseSawQuestionInstruction; 295 296 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 297 /// it calls this function with the contents. 298 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 299 /// Return true if you want the node appended to the document. It will be in a [BangInstruction] object. 300 bool delegate(string) parseSawBangInstruction; 301 302 /// Given the kind of garbage you find on the Internet, try to make sense of it. 303 /// Equivalent to document.parse(data, false, false, null); 304 /// (Case-insensitive, non-strict, determine character encoding from the data.) 305 306 /// NOTE: this makes no attempt at added security, but it will try to recover from anything instead of throwing. 307 /// 308 /// It is a template so it lazily imports characterencodings. 309 void parseGarbage()(string data) { 310 parse(data, false, false, null); 311 } 312 313 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 314 /// Will throw exceptions on things like unclosed tags. 315 void parseStrict(string data, bool pureXmlMode = false) { 316 parseStream(toUtf8Stream(data), true, true, pureXmlMode); 317 } 318 319 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 320 /// tag soup, but does NOT try to correct bad character encodings. 321 /// 322 /// They will still throw an exception. 323 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 324 parseStream(toUtf8Stream(data), caseSensitive, strict); 325 } 326 327 // this is a template so we get lazy import behavior 328 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 329 import arsd.characterencodings; 330 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 331 if(dataEncoding is null) { 332 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 333 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 334 // Now, XML and HTML can both list encoding in the document, but we can't really parse 335 // it here without changing a lot of code until we know the encoding. So I'm going to 336 // do some hackish string checking. 337 if(dataEncoding is null) { 338 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 339 // first, look for an XML prolog 340 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 341 if(idx != -1) { 342 idx += "encoding=\"".length; 343 // we're probably past the prolog if it's this far in; we might be looking at 344 // content. Forget about it. 345 if(idx > 100) 346 idx = -1; 347 } 348 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 349 if(idx == -1) { 350 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 351 if(idx != -1) { 352 idx += "charset=".length; 353 if(dataAsBytes[idx] == '"') 354 idx++; 355 } 356 } 357 358 // found something in either branch... 359 if(idx != -1) { 360 // read till a quote or about 12 chars, whichever comes first... 361 auto end = idx; 362 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 363 end++; 364 365 dataEncoding = cast(string) dataAsBytes[idx .. end]; 366 } 367 // otherwise, we just don't know. 368 } 369 } 370 371 if(dataEncoding is null) { 372 if(strict) 373 throw new MarkupException("I couldn't figure out the encoding of this document."); 374 else 375 // if we really don't know by here, it means we already tried UTF-8, 376 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 377 // tags... let's assume it's Windows-1252, since that's probably the most 378 // common aside from utf that wouldn't be labeled. 379 380 dataEncoding = "Windows 1252"; 381 } 382 383 // and now, go ahead and convert it. 384 385 string data; 386 387 if(!strict) { 388 // if we're in non-strict mode, we need to check 389 // the document for mislabeling too; sometimes 390 // web documents will say they are utf-8, but aren't 391 // actually properly encoded. If it fails to validate, 392 // we'll assume it's actually Windows encoding - the most 393 // likely candidate for mislabeled garbage. 394 dataEncoding = dataEncoding.toLower(); 395 dataEncoding = dataEncoding.replace(" ", ""); 396 dataEncoding = dataEncoding.replace("-", ""); 397 dataEncoding = dataEncoding.replace("_", ""); 398 if(dataEncoding == "utf8") { 399 try { 400 validate(rawdata); 401 } catch(UTFException e) { 402 dataEncoding = "Windows 1252"; 403 } 404 } 405 } 406 407 if(dataEncoding != "UTF-8") { 408 if(strict) 409 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 410 else { 411 try { 412 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 413 } catch(Exception e) { 414 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 415 } 416 } 417 } else 418 data = rawdata; 419 420 return toUtf8Stream(data); 421 } 422 423 private 424 Utf8Stream toUtf8Stream(in string rawdata) { 425 string data = rawdata; 426 static if(is(Utf8Stream == string)) 427 return data; 428 else 429 return new Utf8Stream(data); 430 } 431 432 /++ 433 List of elements that can be assumed to be self-closed 434 in this document. The default for a Document are a hard-coded 435 list of ones appropriate for HTML. For [XmlDocument], it defaults 436 to empty. You can modify this after construction but before parsing. 437 438 History: 439 Added February 8, 2021 (included in dub release 9.2) 440 441 Changed from `string[]` to `immutable(string)[]` on 442 February 4, 2024 (dub v11.5) to plug a hole discovered 443 by the OpenD compiler's diagnostics. 444 +/ 445 immutable(string)[] selfClosedElements = htmlSelfClosedElements; 446 447 /++ 448 List of elements that contain raw CDATA content for this 449 document, e.g. `<script>` and `<style>` for HTML. The parser 450 will read until the closing string and put everything else 451 in a [RawSource] object for future processing, not trying to 452 do any further child nodes or attributes, etc. 453 454 History: 455 Added February 4, 2024 (dub v11.5) 456 457 +/ 458 immutable(string)[] rawSourceElements = htmlRawSourceElements; 459 460 /++ 461 List of elements that are considered inline for pretty printing. 462 The default for a Document are hard-coded to something appropriate 463 for HTML. For [XmlDocument], it defaults to empty. You can modify 464 this after construction but before parsing. 465 466 History: 467 Added June 21, 2021 (included in dub release 10.1) 468 469 Changed from `string[]` to `immutable(string)[]` on 470 February 4, 2024 (dub v11.5) to plug a hole discovered 471 by the OpenD compiler's diagnostics. 472 +/ 473 immutable(string)[] inlineElements = htmlInlineElements; 474 475 /** 476 Take XMLish data and try to make the DOM tree out of it. 477 478 The goal isn't to be perfect, but to just be good enough to 479 approximate Javascript's behavior. 480 481 If strict, it throws on something that doesn't make sense. 482 (Examples: mismatched tags. It doesn't validate!) 483 If not strict, it tries to recover anyway, and only throws 484 when something is REALLY unworkable. 485 486 If strict is false, it uses a magic list of tags that needn't 487 be closed. If you are writing a document specifically for this, 488 try to avoid such - use self closed tags at least. Easier to parse. 489 490 The dataEncoding argument can be used to pass a specific 491 charset encoding for automatic conversion. If null (which is NOT 492 the default!), it tries to determine from the data itself, 493 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 494 495 If this assumption is wrong, it can throw on non-ascii 496 characters! 497 498 499 Note that it previously assumed the data was encoded as UTF-8, which 500 is why the dataEncoding argument defaults to that. 501 502 So it shouldn't break backward compatibility. 503 504 But, if you want the best behavior on wild data - figuring it out from the document 505 instead of assuming - you'll probably want to change that argument to null. 506 507 This is a template so it lazily imports arsd.characterencodings, which is required 508 to fix up data encodings. 509 510 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 511 dependency. If it is data from the Internet though, a random website, the encoding 512 is often a lie. This function, if dataEncoding == null, can correct for that, or 513 you can try parseGarbage. In those cases, arsd.characterencodings is required to 514 compile. 515 */ 516 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 517 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 518 parseStream(data, caseSensitive, strict); 519 } 520 521 // note: this work best in strict mode, unless data is just a simple string wrapper 522 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false, bool pureXmlMode = false) { 523 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 524 // of my big app. 525 526 assert(data !is null); 527 528 // go through character by character. 529 // if you see a <, consider it a tag. 530 // name goes until the first non tagname character 531 // then see if it self closes or has an attribute 532 533 // if not in a tag, anything not a tag is a big text 534 // node child. It ends as soon as it sees a < 535 536 // Whitespace in text or attributes is preserved, but not between attributes 537 538 // & and friends are converted when I know them, left the same otherwise 539 540 541 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 542 //validate(data); // it *must* be UTF-8 for this to work correctly 543 544 sizediff_t pos = 0; 545 546 clear(); 547 548 loose = !caseSensitive; 549 550 bool sawImproperNesting = false; 551 bool nonNestableHackRequired = false; 552 553 int getLineNumber(sizediff_t p) { 554 int line = 1; 555 foreach(c; data[0..p]) 556 if(c == '\n') 557 line++; 558 return line; 559 } 560 561 void parseError(string message) { 562 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 563 } 564 565 bool eatWhitespace() { 566 bool ateAny = false; 567 while(pos < data.length && data[pos].isSimpleWhite) { 568 pos++; 569 ateAny = true; 570 } 571 return ateAny; 572 } 573 574 string readTagName() { 575 // remember to include : for namespaces 576 // basically just keep going until >, /, or whitespace 577 auto start = pos; 578 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 579 { 580 pos++; 581 if(pos == data.length) { 582 if(strict) 583 throw new Exception("tag name incomplete when file ended"); 584 else 585 break; 586 } 587 } 588 589 if(!caseSensitive) 590 return toLower(data[start..pos]); 591 else 592 return data[start..pos]; 593 } 594 595 string readAttributeName() { 596 // remember to include : for namespaces 597 // basically just keep going until >, /, or whitespace 598 auto start = pos; 599 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 600 { 601 if(data[pos] == '<') { 602 if(strict) 603 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 604 else 605 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 606 } 607 pos++; 608 if(pos == data.length) { 609 if(strict) 610 throw new Exception("unterminated attribute name"); 611 else 612 break; 613 } 614 } 615 616 if(!caseSensitive) 617 return toLower(data[start..pos]); 618 else 619 return data[start..pos]; 620 } 621 622 string readAttributeValue() { 623 if(pos >= data.length) { 624 if(strict) 625 throw new Exception("no attribute value before end of file"); 626 else 627 return null; 628 } 629 switch(data[pos]) { 630 case '\'': 631 case '"': 632 auto started = pos; 633 char end = data[pos]; 634 pos++; 635 auto start = pos; 636 while(pos < data.length && data[pos] != end) 637 pos++; 638 if(strict && pos == data.length) 639 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 640 string v = htmlEntitiesDecode(data[start..pos], strict); 641 pos++; // skip over the end 642 return v; 643 default: 644 if(strict) 645 parseError("Attributes must be quoted"); 646 // read until whitespace or terminator (/> or >) 647 auto start = pos; 648 while( 649 pos < data.length && 650 data[pos] != '>' && 651 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 652 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 653 !data[pos].isSimpleWhite) 654 pos++; 655 656 string v = htmlEntitiesDecode(data[start..pos], strict); 657 // don't skip the end - we'll need it later 658 return v; 659 } 660 } 661 662 TextNode readTextNode() { 663 auto start = pos; 664 while(pos < data.length && data[pos] != '<') { 665 pos++; 666 } 667 668 return TextNode.fromUndecodedString(this, data[start..pos]); 669 } 670 671 // this is obsolete! 672 RawSource readCDataNode() { 673 auto start = pos; 674 while(pos < data.length && data[pos] != '<') { 675 pos++; 676 } 677 678 return new RawSource(this, data[start..pos]); 679 } 680 681 682 struct Ele { 683 int type; // element or closing tag or nothing 684 /* 685 type == 0 means regular node, self-closed (element is valid) 686 type == 1 means closing tag (payload is the tag name, element may be valid) 687 type == 2 means you should ignore it completely 688 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 689 type == 4 means the document was totally empty 690 */ 691 Element element; // for type == 0 or type == 3 692 string payload; // for type == 1 693 } 694 // recursively read a tag 695 Ele readElement(string[] parentChain = null) { 696 // FIXME: this is the slowest function in this module, by far, even in strict mode. 697 // Loose mode should perform decently, but strict mode is the important one. 698 if(!strict && parentChain is null) 699 parentChain = []; 700 701 static string[] recentAutoClosedTags; 702 703 if(pos >= data.length) 704 { 705 if(strict) { 706 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 707 } else { 708 if(parentChain.length) 709 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 710 else 711 return Ele(4); // signal emptiness upstream 712 } 713 } 714 715 if(data[pos] != '<') { 716 return Ele(0, readTextNode(), null); 717 } 718 719 enforce(data[pos] == '<'); 720 pos++; 721 if(pos == data.length) { 722 if(strict) 723 throw new MarkupException("Found trailing < at end of file"); 724 // if not strict, we'll just skip the switch 725 } else 726 switch(data[pos]) { 727 // I don't care about these, so I just want to skip them 728 case '!': // might be a comment, a doctype, or a special instruction 729 pos++; 730 731 // FIXME: we should store these in the tree too 732 // though I like having it stripped out tbh. 733 734 if(pos == data.length) { 735 if(strict) 736 throw new MarkupException("<! opened at end of file"); 737 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 738 // comment 739 pos += 2; 740 741 // FIXME: technically, a comment is anything 742 // between -- and -- inside a <!> block. 743 // so in <!-- test -- lol> , the " lol" is NOT a comment 744 // and should probably be handled differently in here, but for now 745 // I'll just keep running until --> since that's the common way 746 747 auto commentStart = pos; 748 while(pos+3 < data.length && data[pos..pos+3] != "-->") 749 pos++; 750 751 auto end = commentStart; 752 753 if(pos + 3 >= data.length) { 754 if(strict) 755 throw new MarkupException("unclosed comment"); 756 end = data.length; 757 pos = data.length; 758 } else { 759 end = pos; 760 assert(data[pos] == '-'); 761 pos++; 762 assert(data[pos] == '-'); 763 pos++; 764 assert(data[pos] == '>'); 765 pos++; 766 } 767 768 if(parseSawComment !is null) 769 if(parseSawComment(data[commentStart .. end])) { 770 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 771 } 772 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 773 pos += 7; 774 775 auto cdataStart = pos; 776 777 ptrdiff_t end = -1; 778 typeof(end) cdataEnd; 779 780 if(pos < data.length) { 781 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 782 end = data[pos .. $].indexOf("]]>"); 783 } 784 785 if(end == -1) { 786 if(strict) 787 throw new MarkupException("Unclosed CDATA section"); 788 end = pos; 789 cdataEnd = pos; 790 } else { 791 cdataEnd = pos + end; 792 pos = cdataEnd + 3; 793 } 794 795 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 796 } else { 797 auto start = pos; 798 while(pos < data.length && data[pos] != '>') 799 pos++; 800 801 auto bangEnds = pos; 802 if(pos == data.length) { 803 if(strict) 804 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 805 } else pos++; // skipping the > 806 807 if(parseSawBangInstruction !is null) 808 if(parseSawBangInstruction(data[start .. bangEnds])) { 809 // FIXME: these should be able to modify the parser state, 810 // doing things like adding entities, somehow. 811 812 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 813 } 814 } 815 816 /* 817 if(pos < data.length && data[pos] == '>') 818 pos++; // skip the > 819 else 820 assert(!strict); 821 */ 822 break; 823 case '%': 824 case '?': 825 /* 826 Here's what we want to support: 827 828 <% asp code %> 829 <%= asp code %> 830 <?php php code ?> 831 <?= php code ?> 832 833 The contents don't really matter, just if it opens with 834 one of the above for, it ends on the two char terminator. 835 836 <?something> 837 this is NOT php code 838 because I've seen this in the wild: <?EM-dummyText> 839 840 This could be php with shorttags which would be cut off 841 prematurely because if(a >) - that > counts as the close 842 of the tag, but since dom.d can't tell the difference 843 between that and the <?EM> real world example, it will 844 not try to look for the ?> ending. 845 846 The difference between this and the asp/php stuff is that it 847 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 848 on >. 849 */ 850 851 char end = data[pos]; 852 auto started = pos; 853 bool isAsp = end == '%'; 854 int currentIndex = 0; 855 bool isPhp = false; 856 bool isEqualTag = false; 857 int phpCount = 0; 858 859 more: 860 pos++; // skip the start 861 if(pos == data.length) { 862 if(strict) 863 throw new MarkupException("Unclosed <"~end~" by end of file"); 864 } else { 865 currentIndex++; 866 if(currentIndex == 1 && data[pos] == '=') { 867 if(!isAsp) 868 isPhp = true; 869 isEqualTag = true; 870 goto more; 871 } 872 if(currentIndex == 1 && data[pos] == 'p') 873 phpCount++; 874 if(currentIndex == 2 && data[pos] == 'h') 875 phpCount++; 876 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 877 isPhp = true; 878 879 if(data[pos] == '>') { 880 if((isAsp || isPhp) && data[pos - 1] != end) 881 goto more; 882 // otherwise we're done 883 } else 884 goto more; 885 } 886 887 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 888 auto code = data[started .. pos]; 889 890 891 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 892 if(pos < data.length) 893 pos++; // get past the > 894 895 if(isAsp && parseSawAspCode !is null) { 896 if(parseSawAspCode(code)) { 897 return Ele(3, new AspCode(this, code), null); 898 } 899 } else if(isPhp && parseSawPhpCode !is null) { 900 if(parseSawPhpCode(code)) { 901 return Ele(3, new PhpCode(this, code), null); 902 } 903 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 904 if(parseSawQuestionInstruction(code)) { 905 return Ele(3, new QuestionInstruction(this, code), null); 906 } 907 } 908 break; 909 case '/': // closing an element 910 pos++; // skip the start 911 auto p = pos; 912 while(pos < data.length && data[pos] != '>') 913 pos++; 914 //writefln("</%s>", data[p..pos]); 915 if(pos == data.length && data[pos-1] != '>') { 916 if(strict) 917 throw new MarkupException("File ended before closing tag had a required >"); 918 else 919 data ~= ">"; // just hack it in 920 } 921 pos++; // skip the '>' 922 923 string tname = data[p..pos-1]; 924 if(!strict) 925 tname = tname.strip; 926 if(!caseSensitive) 927 tname = tname.toLower(); 928 929 return Ele(1, null, tname); // closing tag reports itself here 930 case ' ': // assume it isn't a real element... 931 if(strict) { 932 parseError("bad markup - improperly placed <"); 933 assert(0); // parseError always throws 934 } else 935 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 936 default: 937 938 if(!strict) { 939 // what about something that kinda looks like a tag, but isn't? 940 auto nextTag = data[pos .. $].indexOf("<"); 941 auto closeTag = data[pos .. $].indexOf(">"); 942 if(closeTag != -1 && nextTag != -1) 943 if(nextTag < closeTag) { 944 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 945 946 auto equal = data[pos .. $].indexOf("=\""); 947 if(equal != -1 && equal < closeTag) { 948 // this MIGHT be ok, soldier on 949 } else { 950 // definitely no good, this must be a (horribly distorted) text node 951 pos++; // skip the < we're on - don't want text node to end prematurely 952 auto node = readTextNode(); 953 node.contents = "<" ~ node.contents; // put this back 954 return Ele(0, node, null); 955 } 956 } 957 } 958 959 string tagName = readTagName(); 960 AttributesHolder attributes; 961 962 Ele addTag(bool selfClosed) { 963 if(selfClosed) 964 pos++; 965 else { 966 if(!strict) 967 if(tagName.isInArray(selfClosedElements)) 968 // these are de-facto self closed 969 selfClosed = true; 970 } 971 972 import std.algorithm.comparison; 973 974 if(strict) { 975 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 976 } else { 977 // if we got here, it's probably because a slash was in an 978 // unquoted attribute - don't trust the selfClosed value 979 if(!selfClosed) 980 selfClosed = tagName.isInArray(selfClosedElements); 981 982 while(pos < data.length && data[pos] != '>') 983 pos++; 984 985 if(pos >= data.length) { 986 // the tag never closed 987 assert(data.length != 0); 988 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 989 } 990 } 991 992 auto whereThisTagStarted = pos; // for better error messages 993 994 pos++; 995 996 auto e = createElement(tagName); 997 e.attributes = attributes; 998 version(dom_node_indexes) { 999 if(e.dataset.nodeIndex.length == 0) 1000 e.dataset.nodeIndex = to!string(&(e.attributes)); 1001 } 1002 e.selfClosed = selfClosed; 1003 e.parseAttributes(); 1004 1005 1006 // HACK to handle script and style as a raw data section as it is in HTML browsers 1007 if(!pureXmlMode && tagName.isInArray(rawSourceElements)) { 1008 if(!selfClosed) { 1009 string closer = "</" ~ tagName ~ ">"; 1010 ptrdiff_t ending; 1011 if(pos >= data.length) 1012 ending = -1; 1013 else 1014 ending = indexOf(data[pos..$], closer); 1015 1016 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 1017 /* 1018 if(loose && ending == -1 && pos < data.length) 1019 ending = indexOf(data[pos..$], closer.toUpper()); 1020 */ 1021 if(ending == -1) { 1022 if(strict) 1023 throw new Exception("tag " ~ tagName ~ " never closed"); 1024 else { 1025 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 1026 if(pos < data.length) { 1027 e = new TextNode(this, data[pos .. $]); 1028 pos = data.length; 1029 } 1030 } 1031 } else { 1032 ending += pos; 1033 e.innerRawSource = data[pos..ending]; 1034 pos = ending + closer.length; 1035 } 1036 } 1037 return Ele(0, e, null); 1038 } 1039 1040 bool closed = selfClosed; 1041 1042 void considerHtmlNonNestableElementHack(Element n) { 1043 assert(!strict); 1044 if(!canNestElementsInHtml(e.tagName, n.tagName)) { 1045 // html lets you write <p> para 1 <p> para 1 1046 // but in the dom tree, they should be siblings, not children. 1047 nonNestableHackRequired = true; 1048 } 1049 } 1050 1051 //writef("<%s>", tagName); 1052 while(!closed) { 1053 Ele n; 1054 if(strict) 1055 n = readElement(); 1056 else 1057 n = readElement(parentChain ~ tagName); 1058 1059 if(n.type == 4) return n; // the document is empty 1060 1061 if(n.type == 3 && n.element !is null) { 1062 // special node, append if possible 1063 if(e !is null) 1064 processNodeWhileParsing(e, n.element); 1065 else 1066 piecesBeforeRoot ~= n.element; 1067 } else if(n.type == 0) { 1068 if(!strict) 1069 considerHtmlNonNestableElementHack(n.element); 1070 processNodeWhileParsing(e, n.element); 1071 } else if(n.type == 1) { 1072 bool found = false; 1073 if(n.payload != tagName) { 1074 if(strict) 1075 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 1076 else { 1077 sawImproperNesting = true; 1078 // this is so we don't drop several levels of awful markup 1079 if(n.element) { 1080 if(!strict) 1081 considerHtmlNonNestableElementHack(n.element); 1082 processNodeWhileParsing(e, n.element); 1083 n.element = null; 1084 } 1085 1086 // is the element open somewhere up the chain? 1087 foreach(i, parent; parentChain) 1088 if(parent == n.payload) { 1089 recentAutoClosedTags ~= tagName; 1090 // just rotating it so we don't inadvertently break stuff with vile crap 1091 if(recentAutoClosedTags.length > 4) 1092 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 1093 1094 n.element = e; 1095 return n; 1096 } 1097 1098 /+ 1099 // COMMENTED OUT BLOCK 1100 // dom.d used to replace improper close tags with their 1101 // text so they'd be visible in the output. the html 1102 // spec says to just ignore them, and browsers do indeed 1103 // seem to jsut ignore them, even checking back on IE6. 1104 // so i guess i was wrong to do this (tho tbh i find it kinda 1105 // useful to call out an obvious mistake in the source... 1106 // but for calling out obvious mistakes, just use strict 1107 // mode.) 1108 1109 // if not, this is a text node; we can't fix it up... 1110 1111 // If it's already in the tree somewhere, assume it is closed by algorithm 1112 // and we shouldn't output it - odds are the user just flipped a couple tags 1113 foreach(ele; e.tree) { 1114 if(ele.tagName == n.payload) { 1115 found = true; 1116 break; 1117 } 1118 } 1119 1120 foreach(ele; recentAutoClosedTags) { 1121 if(ele == n.payload) { 1122 found = true; 1123 break; 1124 } 1125 } 1126 1127 if(!found) // if not found in the tree though, it's probably just text 1128 processNodeWhileParsing(e, TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1129 1130 +/ 1131 } 1132 } else { 1133 if(n.element) { 1134 if(!strict) 1135 considerHtmlNonNestableElementHack(n.element); 1136 processNodeWhileParsing(e, n.element); 1137 } 1138 } 1139 1140 if(n.payload == tagName) // in strict mode, this is always true 1141 closed = true; 1142 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1143 } 1144 //writef("</%s>\n", tagName); 1145 return Ele(0, e, null); 1146 } 1147 1148 // if a tag was opened but not closed by end of file, we can arrive here 1149 if(!strict && pos >= data.length) 1150 return addTag(false); 1151 //else if(strict) assert(0); // should be caught before 1152 1153 switch(data[pos]) { 1154 default: assert(0); 1155 case '/': // self closing tag 1156 return addTag(true); 1157 case '>': 1158 return addTag(false); 1159 case ' ': 1160 case '\t': 1161 case '\n': 1162 case '\r': 1163 // there might be attributes... 1164 moreAttributes: 1165 eatWhitespace(); 1166 1167 // same deal as above the switch.... 1168 if(!strict && pos >= data.length) 1169 return addTag(false); 1170 1171 if(strict && pos >= data.length) 1172 throw new MarkupException("tag open, didn't find > before end of file"); 1173 1174 switch(data[pos]) { 1175 case '/': // self closing tag 1176 return addTag(true); 1177 case '>': // closed tag; open -- we now read the contents 1178 return addTag(false); 1179 default: // it is an attribute 1180 string attrName = readAttributeName(); 1181 string attrValue = attrName; 1182 1183 bool ateAny = eatWhitespace(); 1184 // the spec allows this too, sigh https://www.w3.org/TR/REC-xml/#NT-Eq 1185 //if(strict && ateAny) 1186 //throw new MarkupException("inappropriate whitespace after attribute name"); 1187 1188 if(pos >= data.length) { 1189 if(strict) 1190 assert(0, "this should have thrown in readAttributeName"); 1191 else { 1192 data ~= ">"; 1193 goto blankValue; 1194 } 1195 } 1196 if(data[pos] == '=') { 1197 pos++; 1198 1199 ateAny = eatWhitespace(); 1200 // the spec actually allows this! 1201 //if(strict && ateAny) 1202 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1203 1204 attrValue = readAttributeValue(); 1205 1206 eatWhitespace(); 1207 } 1208 1209 blankValue: 1210 1211 if(strict && attrName in attributes) 1212 throw new MarkupException("Repeated attribute: " ~ attrName); 1213 1214 if(attrName.strip().length) 1215 attributes[attrName] = attrValue; 1216 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1217 1218 if(!strict && pos < data.length && data[pos] == '<') { 1219 // this is the broken tag that doesn't have a > at the end 1220 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1221 // let's insert one as a hack 1222 goto case '>'; 1223 } 1224 1225 goto moreAttributes; 1226 } 1227 } 1228 } 1229 1230 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1231 //assert(0); 1232 } 1233 1234 eatWhitespace(); 1235 Ele r; 1236 do { 1237 r = readElement(); // there SHOULD only be one element... 1238 1239 if(r.type == 3 && r.element !is null) 1240 piecesBeforeRoot ~= r.element; 1241 1242 if(r.type == 4) 1243 break; // the document is completely empty... 1244 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1245 1246 root = r.element; 1247 if(root !is null) 1248 root.parent_ = this; 1249 1250 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1251 while(r.type != 4) { 1252 r = readElement(); 1253 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1254 if(r.element !is null) 1255 piecesAfterRoot ~= r.element; 1256 } 1257 } 1258 1259 if(root is null) 1260 { 1261 if(strict) 1262 assert(0, "empty document should be impossible in strict mode"); 1263 else 1264 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1265 } 1266 1267 if(nonNestableHackRequired) { 1268 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1269 1270 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1271 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1272 1273 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1274 // Kind of inefficient because we can't detect when we recurse back out of a node. 1275 Element[Element] insertLocations; 1276 auto iterator = root.tree; 1277 foreach(ele; iterator) { 1278 if(ele.parentNode is null) 1279 continue; 1280 1281 if(!canNestElementsInHtml(ele.parentNode.tagName, ele.tagName)) { 1282 auto shouldBePreviousSibling = ele.parentNode; 1283 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1284 if (auto p = holder in insertLocations) { 1285 shouldBePreviousSibling = *p; 1286 assert(shouldBePreviousSibling.parentNode is holder); 1287 } 1288 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1289 insertLocations[holder] = ele; 1290 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1291 } 1292 } 1293 } 1294 } 1295 1296 /* end massive parse function */ 1297 1298 /// Gets the <title> element's innerText, if one exists 1299 @property string title() { 1300 bool doesItMatch(Element e) { 1301 return (e.tagName == "title"); 1302 } 1303 1304 auto e = findFirst(&doesItMatch); 1305 if(e) 1306 return e.innerText(); 1307 return ""; 1308 } 1309 1310 /// Sets the title of the page, creating a <title> element if needed. 1311 @property void title(string t) { 1312 bool doesItMatch(Element e) { 1313 return (e.tagName == "title"); 1314 } 1315 1316 auto e = findFirst(&doesItMatch); 1317 1318 if(!e) { 1319 e = createElement("title"); 1320 auto heads = getElementsByTagName("head"); 1321 if(heads.length) 1322 heads[0].appendChild(e); 1323 } 1324 1325 if(e) 1326 e.innerText = t; 1327 } 1328 1329 // FIXME: would it work to alias root this; ???? might be a good idea 1330 /// These functions all forward to the root element. See the documentation in the Element class. 1331 Element getElementById(string id) { 1332 return root.getElementById(id); 1333 } 1334 1335 /// ditto 1336 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1337 if( is(SomeElementType : Element)) 1338 out(ret) { assert(ret !is null); } 1339 do { 1340 return root.requireElementById!(SomeElementType)(id, file, line); 1341 } 1342 1343 /// ditto 1344 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1345 if( is(SomeElementType : Element)) 1346 out(ret) { assert(ret !is null); } 1347 do { 1348 auto e = cast(SomeElementType) querySelector(selector); 1349 if(e is null) 1350 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1351 return e; 1352 } 1353 1354 /// ditto 1355 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1356 if(is(SomeElementType : Element)) 1357 { 1358 auto e = cast(SomeElementType) querySelector(selector); 1359 return MaybeNullElement!SomeElementType(e); 1360 } 1361 1362 /// ditto 1363 @scriptable 1364 Element querySelector(string selector) { 1365 // see comment below on Document.querySelectorAll 1366 auto s = Selector(selector);//, !loose); 1367 foreach(ref comp; s.components) 1368 if(comp.parts.length && comp.parts[0].separation == 0) 1369 comp.parts[0].separation = -1; 1370 foreach(e; s.getMatchingElementsLazy(this.root)) 1371 return e; 1372 return null; 1373 1374 } 1375 1376 /// ditto 1377 @scriptable 1378 Element[] querySelectorAll(string selector) { 1379 // In standards-compliant code, the document is slightly magical 1380 // in that it is a pseudoelement at top level. It should actually 1381 // match the root as one of its children. 1382 // 1383 // In versions of dom.d before Dec 29 2019, this worked because 1384 // querySelectorAll was willing to return itself. With that bug fix 1385 // (search "arbitrary id asduiwh" in this file for associated unittest) 1386 // this would have failed. Hence adding back the root if it matches the 1387 // selector itself. 1388 // 1389 // I'd love to do this better later. 1390 1391 auto s = Selector(selector);//, !loose); 1392 foreach(ref comp; s.components) 1393 if(comp.parts.length && comp.parts[0].separation == 0) 1394 comp.parts[0].separation = -1; 1395 return s.getMatchingElements(this.root, null); 1396 } 1397 1398 /// ditto 1399 deprecated("use querySelectorAll instead") 1400 Element[] getElementsBySelector(string selector) { 1401 return root.getElementsBySelector(selector); 1402 } 1403 1404 /// ditto 1405 @scriptable 1406 Element[] getElementsByTagName(string tag) { 1407 return root.getElementsByTagName(tag); 1408 } 1409 1410 /// ditto 1411 @scriptable 1412 Element[] getElementsByClassName(string tag) { 1413 return root.getElementsByClassName(tag); 1414 } 1415 1416 /** FIXME: btw, this could just be a lazy range...... */ 1417 Element getFirstElementByTagName(string tag) { 1418 if(loose) 1419 tag = tag.toLower(); 1420 bool doesItMatch(Element e) { 1421 return e.tagName == tag; 1422 } 1423 return findFirst(&doesItMatch); 1424 } 1425 1426 /++ 1427 This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body used to be a keyword in D.) 1428 1429 History: 1430 `body` alias added February 26, 2024 1431 +/ 1432 Element mainBody() { 1433 return getFirstElementByTagName("body"); 1434 } 1435 1436 /// ditto 1437 alias body = mainBody; 1438 1439 /// this uses a weird thing... it's [name=] if no colon and 1440 /// [property=] if colon 1441 string getMeta(string name) { 1442 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1443 auto e = querySelector("head meta["~thing~"="~name~"]"); 1444 if(e is null) 1445 return null; 1446 return e.content; 1447 } 1448 1449 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1450 void setMeta(string name, string value) { 1451 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1452 auto e = querySelector("head meta["~thing~"="~name~"]"); 1453 if(e is null) { 1454 e = requireSelector("head").addChild("meta"); 1455 e.setAttribute(thing, name); 1456 } 1457 1458 e.content = value; 1459 } 1460 1461 ///. 1462 Form[] forms() { 1463 return cast(Form[]) getElementsByTagName("form"); 1464 } 1465 1466 ///. 1467 Form createForm() 1468 out(ret) { 1469 assert(ret !is null); 1470 } 1471 do { 1472 return cast(Form) createElement("form"); 1473 } 1474 1475 ///. 1476 Element createElement(string name) { 1477 if(loose) 1478 name = name.toLower(); 1479 1480 auto e = Element.make(name, null, null, selfClosedElements); 1481 1482 return e; 1483 1484 // return new Element(this, name, null, selfClosed); 1485 } 1486 1487 ///. 1488 Element createFragment() { 1489 return new DocumentFragment(this); 1490 } 1491 1492 ///. 1493 Element createTextNode(string content) { 1494 return new TextNode(this, content); 1495 } 1496 1497 1498 ///. 1499 Element findFirst(bool delegate(Element) doesItMatch) { 1500 if(root is null) 1501 return null; 1502 Element result; 1503 1504 bool goThroughElement(Element e) { 1505 if(doesItMatch(e)) { 1506 result = e; 1507 return true; 1508 } 1509 1510 foreach(child; e.children) { 1511 if(goThroughElement(child)) 1512 return true; 1513 } 1514 1515 return false; 1516 } 1517 1518 goThroughElement(root); 1519 1520 return result; 1521 } 1522 1523 ///. 1524 void clear() { 1525 root = null; 1526 loose = false; 1527 } 1528 1529 private string _prolog = "<!DOCTYPE html>\n"; 1530 private bool prologWasSet = false; // set to true if the user changed it 1531 1532 /++ 1533 Returns or sets the string before the root element. This is, for example, 1534 `<!DOCTYPE html>\n` or similar. 1535 +/ 1536 @property string prolog() const { 1537 // if the user explicitly changed it, do what they want 1538 // or if we didn't keep/find stuff from the document itself, 1539 // we'll use the builtin one as a default. 1540 if(prologWasSet || piecesBeforeRoot.length == 0) 1541 return _prolog; 1542 1543 string p; 1544 foreach(e; piecesBeforeRoot) 1545 p ~= e.toString() ~ "\n"; 1546 return p; 1547 } 1548 1549 /// ditto 1550 void setProlog(string d) { 1551 _prolog = d; 1552 prologWasSet = true; 1553 } 1554 1555 /++ 1556 Returns the document as string form. Please note that if there is anything in [piecesAfterRoot], 1557 they are discarded. If you want to add them to the file, loop over that and append it yourself 1558 (but remember xml isn't supposed to have anything after the root element). 1559 +/ 1560 override string toString() const { 1561 return prolog ~ root.toString(); 1562 } 1563 1564 /++ 1565 Writes it out with whitespace for easier eyeball debugging 1566 1567 Do NOT use for anything other than eyeball debugging, 1568 because whitespace may be significant content in XML. 1569 +/ 1570 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1571 import std.string; 1572 string s = prolog.strip; 1573 1574 /* 1575 if(insertComments) s ~= "<!--"; 1576 s ~= "\n"; 1577 if(insertComments) s ~= "-->"; 1578 */ 1579 1580 s ~= root.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1581 foreach(a; piecesAfterRoot) 1582 s ~= a.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1583 return s; 1584 } 1585 1586 /// The root element, like `<html>`. Most the methods on Document forward to this object. 1587 Element root; 1588 1589 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1590 Element[] piecesBeforeRoot; 1591 1592 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1593 Element[] piecesAfterRoot; 1594 1595 ///. 1596 bool loose; 1597 1598 1599 1600 // what follows are for mutation events that you can observe 1601 void delegate(DomMutationEvent)[] eventObservers; 1602 1603 void dispatchMutationEvent(DomMutationEvent e) { 1604 foreach(o; eventObservers) 1605 o(e); 1606 } 1607 } 1608 1609 /++ 1610 Basic parsing of HTML tag soup 1611 1612 If you simply make a `new Document("some string")` or use [Document.fromUrl] to automatically 1613 download a page (that's function is shorthand for `new Document(arsd.http2.get(your_given_url).contentText)`), 1614 the Document parser will assume it is broken HTML. It will try to fix up things like charset messes, missing 1615 closing tags, flipped tags, inconsistent letter cases, and other forms of commonly found HTML on the web. 1616 1617 It isn't exactly the same as what a HTML5 web browser does in all cases, but it usually it, and where it 1618 disagrees, it is still usually good enough (but sometimes a bug). 1619 +/ 1620 unittest { 1621 auto document = new Document(`<html><body><p>hello <P>there`); 1622 // this will automatically try to normalize the html and fix up broken tags, etc 1623 // so notice how it added the missing closing tags here and made them all lower case 1624 assert(document.toString() == "<!DOCTYPE html>\n<html><body><p>hello </p><p>there</p></body></html>", document.toString()); 1625 } 1626 1627 /++ 1628 Stricter parsing of HTML 1629 1630 When you are writing the HTML yourself, you can remove most ambiguity by making it throw exceptions instead 1631 of trying to automatically fix up things basic parsing tries to do. Using strict mode accomplishes this. 1632 1633 This will help guarantee that you have well-formed HTML, which means it is going to parse a lot more reliably 1634 by all users - browsers, dom.d, other libraries, all behave better with well-formed input... people too! 1635 1636 (note it is not a full *validator*, just a well-formedness checker. Full validation is a lot more work for very 1637 little benefit in my experience, so I stopped here.) 1638 +/ 1639 unittest { 1640 try { 1641 auto document = new Document(`<html><body><p>hello <P>there`, true, true); // turns on strict and case sensitive mode to ctor 1642 assert(0); // never reached, the constructor will throw because strict mode is turned on 1643 } catch(Exception e) { 1644 1645 } 1646 1647 // you can also create the object first, then use the [parseStrict] method 1648 auto document = new Document; 1649 document.parseStrict(`<foo></foo>`); // this is invalid html - no such foo tag - but it is well-formed, since it is opened and closed properly, so it passes 1650 1651 } 1652 1653 /++ 1654 Custom HTML extensions 1655 1656 dom.d is a custom HTML parser, which means you can add custom HTML extensions to it too. It normally reads 1657 and discards things like ASP style `<% ... %>` code as well as XML processing instruction / PHP style embeds `<? ... ?>` 1658 but you can keep this data if you call a function to opt into it in before parsing. 1659 1660 Additionally, you can add special tags to be read like `<script>` to preserve its insides for future processing 1661 via the `.innerRawSource` member. 1662 +/ 1663 unittest { 1664 auto document = new Document; // construct an empty thing first 1665 document.enableAddingSpecialTagsToDom(); // add the special tags like <% ... %> etc 1666 document.rawSourceElements ~= "embedded-plaintext"; // tell it we want a custom 1667 1668 document.parseStrict(`<html> 1669 <% some asp code %> 1670 <script>embedded && javascript</script> 1671 <embedded-plaintext>my <custom> plaintext & stuff</embedded-plaintext> 1672 </html>`); 1673 1674 // please note that if we did `document.toString()` right now, the original source - almost your same 1675 // string you passed to parseStrict - would be spit back out. Meaning the embedded-plaintext still has its 1676 // special text inside it. Another parser won't understand how to use this! So if you want to pass this 1677 // document somewhere else, you need to do some transformations. 1678 // 1679 // This differs from cases like CDATA sections, which dom.d will automatically convert into plain html entities 1680 // on the output that can be read by anyone. 1681 1682 assert(document.root.tagName == "html"); // the root element is normal 1683 1684 int foundCount; 1685 // now let's loop through the whole tree 1686 foreach(element; document.root.tree) { 1687 // the asp thing will be in 1688 if(auto asp = cast(AspCode) element) { 1689 // you use the `asp.source` member to get the code for these 1690 assert(asp.source == "% some asp code %"); 1691 foundCount++; 1692 } else if(element.tagName == "script") { 1693 // and for raw source elements - script, style, or the ones you add, 1694 // you use the innerHTML method to get the code inside 1695 assert(element.innerHTML == "embedded && javascript"); 1696 foundCount++; 1697 } else if(element.tagName == "embedded-plaintext") { 1698 // and innerHTML again 1699 assert(element.innerHTML == "my <custom> plaintext & stuff"); 1700 foundCount++; 1701 } 1702 1703 } 1704 1705 assert(foundCount == 3); 1706 1707 // writeln(document.toString()); 1708 } 1709 1710 // FIXME: <textarea> contents are treated kinda special in html5 as well... 1711 1712 /++ 1713 Demoing CDATA, entities, and non-ascii characters. 1714 1715 The previous example mentioned CDATA, let's show you what that does too. These are all read in as plain strings accessible in the DOM - there is no CDATA, no entities once you get inside the object model - but when you convert back into a string, it will normalize them in a particular way. 1716 1717 This is not exactly standards compliant completely in and out thanks to it doing some transformations... but I find it more useful - it reads the data in consistently and writes it out consistently, both in ways that work well for interop. Take a look: 1718 +/ 1719 unittest { 1720 auto document = new Document(`<html> 1721 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1722 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1723 <p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p> 1724 </html>`, true, true); // strict mode turned on 1725 1726 // Inside the object model, things are simplified to D strings. 1727 auto paragraphs = document.querySelectorAll("p"); 1728 // no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string 1729 assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output."); 1730 // but note on the second paragraph, the entity has been converted to the appropriate *character* in the object 1731 assert(paragraphs[1].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output."); 1732 // and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string: 1733 assert(paragraphs[2].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too."); 1734 // and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing. 1735 assert(paragraphs[2].childNodes.length == 1 && paragraphs[2].childNodes[0].nodeType == NodeType.Text); 1736 1737 // And now, in the output string, we can see they are normalized thusly: 1738 assert(document.toString() == "<!DOCTYPE html>\n<html> 1739 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1740 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1741 <p>xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.</p> 1742 </html>"); 1743 } 1744 1745 /++ 1746 Streaming parsing 1747 1748 dom.d normally takes a big string and returns a big DOM object tree - hence its name. This is usually the simplest 1749 code to read and write, so I prefer to stick to that, but if you wanna jump through a few hoops, you can still make 1750 dom.d work with streams. 1751 1752 It is awkward - again, dom.d's whole design is based on building the dom tree, but you can do it if you're willing to 1753 subclass a little and trust the garbage collector. Here's how. 1754 +/ 1755 unittest { 1756 bool encountered; 1757 class StreamDocument : Document { 1758 // the normal behavior for this function is to `parent.appendChild(child)` 1759 // but we can override to read it as it is processed and not append it 1760 override void processNodeWhileParsing(Element parent, Element child) { 1761 if(child.tagName == "bar") 1762 encountered = true; 1763 // note that each element's object is created but then discarded as garbage. 1764 // the GC will take care of it, even with a large document, whereas the normal 1765 // object tree could become quite large. 1766 } 1767 1768 this() { 1769 super("<foo><bar></bar></foo>"); 1770 } 1771 } 1772 1773 auto test = new StreamDocument(); 1774 assert(encountered); // it should have been seen 1775 assert(test.querySelector("bar") is null); // but not appended to the dom node, since we didn't append it 1776 } 1777 1778 /++ 1779 Basic parsing of XML. 1780 1781 dom.d is not technically a standards-compliant xml parser and doesn't implement all xml features, 1782 but its stricter parse options together with turning off HTML's special tag handling (e.g. treating 1783 `<script>` and `<style>` the same as any other tag) gets close enough to work fine for a great many 1784 use cases. 1785 1786 For more information, see [XmlDocument]. 1787 +/ 1788 unittest { 1789 auto xml = new XmlDocument(`<my-stuff>hello</my-stuff>`); 1790 } 1791 1792 bool canNestElementsInHtml(string parentTagName, string childTagName) { 1793 switch(parentTagName) { 1794 case "p", "h1", "h2", "h3", "h4", "h5", "h6": 1795 // only should include "phrasing content" 1796 switch(childTagName) { 1797 case "p", "dl", "dt", "dd", "h1", "h2", "h3", "h4", "h5", "h6": 1798 return false; 1799 default: return true; 1800 } 1801 case "dt", "dd": 1802 switch(childTagName) { 1803 case "dd", "dt": 1804 return false; 1805 default: return true; 1806 } 1807 default: 1808 return true; 1809 } 1810 } 1811 1812 interface DomParent { 1813 inout(Document) asDocument() inout; 1814 inout(Element) asElement() inout; 1815 } 1816 1817 /++ 1818 This represents almost everything in the DOM and offers a lot of inspection and manipulation functions. Element, or its subclasses, are what makes the dom tree. 1819 +/ 1820 /// Group: core_functionality 1821 class Element : DomParent { 1822 inout(Document) asDocument() inout { return null; } 1823 inout(Element) asElement() inout { return this; } 1824 1825 /// Returns a collection of elements by selector. 1826 /// See: [Document.opIndex] 1827 ElementCollection opIndex(string selector) { 1828 auto e = ElementCollection(this); 1829 return e[selector]; 1830 } 1831 1832 /++ 1833 Returns the child node with the particular index. 1834 1835 Be aware that child nodes include text nodes, including 1836 whitespace-only nodes. 1837 +/ 1838 Element opIndex(size_t index) { 1839 if(index >= children.length) 1840 return null; 1841 return this.children[index]; 1842 } 1843 1844 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1845 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1846 if( 1847 is(SomeElementType : Element) 1848 ) 1849 out(ret) { 1850 assert(ret !is null); 1851 } 1852 do { 1853 auto e = cast(SomeElementType) getElementById(id); 1854 if(e is null) 1855 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1856 return e; 1857 } 1858 1859 /// ditto but with selectors instead of ids 1860 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1861 if( 1862 is(SomeElementType : Element) 1863 ) 1864 out(ret) { 1865 assert(ret !is null); 1866 } 1867 do { 1868 auto e = cast(SomeElementType) querySelector(selector); 1869 if(e is null) 1870 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1871 return e; 1872 } 1873 1874 1875 /++ 1876 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1877 +/ 1878 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1879 if(is(SomeElementType : Element)) 1880 { 1881 auto e = cast(SomeElementType) querySelector(selector); 1882 return MaybeNullElement!SomeElementType(e); 1883 } 1884 1885 1886 1887 /// get all the classes on this element 1888 @property string[] classes() const { 1889 // FIXME: remove blank names 1890 auto cs = split(className, " "); 1891 foreach(ref c; cs) 1892 c = c.strip(); 1893 return cs; 1894 } 1895 1896 /++ 1897 The object [classList] returns. 1898 +/ 1899 static struct ClassListHelper { 1900 Element this_; 1901 this(inout(Element) this_) inout { 1902 this.this_ = this_; 1903 } 1904 1905 /// 1906 bool contains(string cn) const { 1907 return this_.hasClass(cn); 1908 } 1909 1910 /// 1911 void add(string cn) { 1912 this_.addClass(cn); 1913 } 1914 1915 /// 1916 void remove(string cn) { 1917 this_.removeClass(cn); 1918 } 1919 1920 /// 1921 void toggle(string cn) { 1922 if(contains(cn)) 1923 remove(cn); 1924 else 1925 add(cn); 1926 } 1927 1928 // this thing supposed to be iterable in javascript but idk how i want to do it in D. meh 1929 /+ 1930 string[] opIndex() const { 1931 return this_.classes; 1932 } 1933 +/ 1934 } 1935 1936 /++ 1937 Returns a helper object to work with classes, just like javascript. 1938 1939 History: 1940 Added August 25, 2022 1941 +/ 1942 @property inout(ClassListHelper) classList() inout { 1943 return inout(ClassListHelper)(this); 1944 } 1945 // FIXME: classList is supposed to whitespace and duplicates when you use it. need to test. 1946 1947 unittest { 1948 Element element = Element.make("div"); 1949 element.classList.add("foo"); 1950 assert(element.classList.contains("foo")); 1951 element.classList.remove("foo"); 1952 assert(!element.classList.contains("foo")); 1953 element.classList.toggle("bar"); 1954 assert(element.classList.contains("bar")); 1955 } 1956 1957 /// ditto 1958 alias classNames = classes; 1959 1960 1961 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1962 @scriptable 1963 Element addClass(string c) { 1964 if(hasClass(c)) 1965 return this; // don't add it twice 1966 1967 string cn = getAttribute("class"); 1968 if(cn.length == 0) { 1969 setAttribute("class", c); 1970 return this; 1971 } else { 1972 setAttribute("class", cn ~ " " ~ c); 1973 } 1974 1975 return this; 1976 } 1977 1978 /// Removes a particular class name. 1979 @scriptable 1980 Element removeClass(string c) { 1981 if(!hasClass(c)) 1982 return this; 1983 string n; 1984 foreach(name; classes) { 1985 if(c == name) 1986 continue; // cut it out 1987 if(n.length) 1988 n ~= " "; 1989 n ~= name; 1990 } 1991 1992 className = n.strip(); 1993 1994 return this; 1995 } 1996 1997 /// Returns whether the given class appears in this element. 1998 bool hasClass(string c) const { 1999 string cn = className; 2000 2001 auto idx = cn.indexOf(c); 2002 if(idx == -1) 2003 return false; 2004 2005 foreach(cla; cn.split(" ")) 2006 if(cla.strip == c) 2007 return true; 2008 return false; 2009 2010 /* 2011 int rightSide = idx + c.length; 2012 2013 bool checkRight() { 2014 if(rightSide == cn.length) 2015 return true; // it's the only class 2016 else if(iswhite(cn[rightSide])) 2017 return true; 2018 return false; // this is a substring of something else.. 2019 } 2020 2021 if(idx == 0) { 2022 return checkRight(); 2023 } else { 2024 if(!iswhite(cn[idx - 1])) 2025 return false; // substring 2026 return checkRight(); 2027 } 2028 2029 assert(0); 2030 */ 2031 } 2032 2033 2034 /* ******************************* 2035 DOM Mutation 2036 *********************************/ 2037 /++ 2038 Family of convenience functions to quickly add a tag with some text or 2039 other relevant info (for example, it's a src for an <img> element 2040 instead of inner text). They forward to [Element.make] then calls [appendChild]. 2041 2042 --- 2043 div.addChild("span", "hello there"); 2044 div.addChild("div", Html("<p>children of the div</p>")); 2045 --- 2046 +/ 2047 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 2048 in { 2049 assert(tagName !is null); 2050 } 2051 out(e) { 2052 //assert(e.parentNode is this); 2053 //assert(e.parentDocument is this.parentDocument); 2054 } 2055 do { 2056 auto e = Element.make(tagName, childInfo, childInfo2); 2057 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 2058 // return the parent. That will break existing code though. 2059 return appendChild(e); 2060 } 2061 2062 /// ditto 2063 Element addChild(Element e) { 2064 return this.appendChild(e); 2065 } 2066 2067 /// ditto 2068 Element addChild(string tagName, Element firstChild, string info2 = null) 2069 in { 2070 assert(firstChild !is null); 2071 } 2072 out(ret) { 2073 assert(ret !is null); 2074 assert(ret.parentNode is this); 2075 assert(firstChild.parentNode is ret); 2076 2077 assert(ret.parentDocument is this.parentDocument); 2078 //assert(firstChild.parentDocument is this.parentDocument); 2079 } 2080 do { 2081 auto e = Element.make(tagName, "", info2); 2082 e.appendChild(firstChild); 2083 this.appendChild(e); 2084 return e; 2085 } 2086 2087 /// ditto 2088 Element addChild(string tagName, in Html innerHtml, string info2 = null) 2089 in { 2090 } 2091 out(ret) { 2092 assert(ret !is null); 2093 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 2094 assert(ret.parentDocument is this.parentDocument); 2095 } 2096 do { 2097 auto e = Element.make(tagName, "", info2); 2098 this.appendChild(e); 2099 e.innerHTML = innerHtml.source; 2100 return e; 2101 } 2102 2103 2104 /// Another convenience function. Adds a child directly after the current one, returning 2105 /// the new child. 2106 /// 2107 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 2108 /// See_Also: [addChild] 2109 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 2110 in { 2111 assert(tagName !is null); 2112 assert(parentNode !is null); 2113 } 2114 out(e) { 2115 assert(e.parentNode is this.parentNode); 2116 assert(e.parentDocument is this.parentDocument); 2117 } 2118 do { 2119 auto e = Element.make(tagName, childInfo, childInfo2); 2120 return parentNode.insertAfter(this, e); 2121 } 2122 2123 /// ditto 2124 Element addSibling(Element e) { 2125 return parentNode.insertAfter(this, e); 2126 } 2127 2128 /// Convenience function to append text intermixed with other children. 2129 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 2130 /// or div.addChildren("Hello, ", user.name, "!"); 2131 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 2132 void addChildren(T...)(T t) { 2133 foreach(item; t) { 2134 static if(is(item : Element)) 2135 appendChild(item); 2136 else static if (is(isSomeString!(item))) 2137 appendText(to!string(item)); 2138 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 2139 } 2140 } 2141 2142 /// Appends the list of children to this element. 2143 void appendChildren(Element[] children) { 2144 foreach(ele; children) 2145 appendChild(ele); 2146 } 2147 2148 /// Removes this element form its current parent and appends it to the given `newParent`. 2149 void reparent(Element newParent) 2150 in { 2151 assert(newParent !is null); 2152 assert(parentNode !is null); 2153 } 2154 out { 2155 assert(this.parentNode is newParent); 2156 //assert(isInArray(this, newParent.children)); 2157 } 2158 do { 2159 parentNode.removeChild(this); 2160 newParent.appendChild(this); 2161 } 2162 2163 /** 2164 Strips this tag out of the document, putting its inner html 2165 as children of the parent. 2166 2167 For example, given: `<p>hello <b>there</b></p>`, if you 2168 call `stripOut` on the `b` element, you'll be left with 2169 `<p>hello there<p>`. 2170 2171 The idea here is to make it easy to get rid of garbage 2172 markup you aren't interested in. 2173 */ 2174 void stripOut() 2175 in { 2176 assert(parentNode !is null); 2177 } 2178 out { 2179 assert(parentNode is null); 2180 assert(children.length == 0); 2181 } 2182 do { 2183 foreach(c; children) 2184 c.parentNode = null; // remove the parent 2185 if(children.length) 2186 parentNode.replaceChild(this, this.children); 2187 else 2188 parentNode.removeChild(this); 2189 this.children.length = 0; // we reparented them all above 2190 } 2191 2192 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 2193 /// if the element already isn't in a tree, it does nothing. 2194 Element removeFromTree() 2195 in { 2196 2197 } 2198 out(var) { 2199 assert(this.parentNode is null); 2200 assert(var is this); 2201 } 2202 do { 2203 if(this.parentNode is null) 2204 return this; 2205 2206 this.parentNode.removeChild(this); 2207 2208 return this; 2209 } 2210 2211 /++ 2212 Wraps this element inside the given element. 2213 It's like `this.replaceWith(what); what.appendchild(this);` 2214 2215 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 2216 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 2217 +/ 2218 Element wrapIn(Element what) 2219 in { 2220 assert(what !is null); 2221 } 2222 out(ret) { 2223 assert(this.parentNode is what); 2224 assert(ret is what); 2225 } 2226 do { 2227 this.replaceWith(what); 2228 what.appendChild(this); 2229 2230 return what; 2231 } 2232 2233 /// Replaces this element with something else in the tree. 2234 Element replaceWith(Element e) 2235 in { 2236 assert(this.parentNode !is null); 2237 } 2238 do { 2239 e.removeFromTree(); 2240 this.parentNode.replaceChild(this, e); 2241 return e; 2242 } 2243 2244 /** 2245 Fetches the first consecutive text nodes concatenated together. 2246 2247 2248 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 2249 2250 See_also: [directText], [innerText] 2251 */ 2252 string firstInnerText() const { 2253 string s; 2254 foreach(child; children) { 2255 if(child.nodeType != NodeType.Text) 2256 break; 2257 2258 s ~= child.nodeValue(); 2259 } 2260 return s; 2261 } 2262 2263 2264 /** 2265 Returns the text directly under this element. 2266 2267 2268 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 2269 past child tags. So, `<example>some <b>bold</b> text</example>` 2270 will return `some text` because it only gets the text, skipping non-text children. 2271 2272 See_also: [firstInnerText], [innerText] 2273 */ 2274 @property string directText() { 2275 string ret; 2276 foreach(e; children) { 2277 if(e.nodeType == NodeType.Text) 2278 ret ~= e.nodeValue(); 2279 } 2280 2281 return ret; 2282 } 2283 2284 /** 2285 Sets the direct text, without modifying other child nodes. 2286 2287 2288 Unlike [innerText], this does *not* remove existing elements in the element. 2289 2290 It only replaces the first text node it sees. 2291 2292 If there are no text nodes, it calls [appendText]. 2293 2294 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 2295 */ 2296 @property void directText(string text) { 2297 foreach(e; children) { 2298 if(e.nodeType == NodeType.Text) { 2299 auto it = cast(TextNode) e; 2300 it.contents = text; 2301 return; 2302 } 2303 } 2304 2305 appendText(text); 2306 } 2307 2308 // do nothing, this is primarily a virtual hook 2309 // for links and forms 2310 void setValue(string field, string value) { } 2311 2312 2313 // this is a thing so i can remove observer support if it gets slow 2314 // I have not implemented all these yet 2315 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 2316 if(parentDocument is null) return; 2317 DomMutationEvent me; 2318 me.operation = operation; 2319 me.target = this; 2320 me.relatedString = s1; 2321 me.relatedString2 = s2; 2322 me.related = r; 2323 me.related2 = r2; 2324 parentDocument.dispatchMutationEvent(me); 2325 } 2326 2327 // putting all the members up front 2328 2329 // this ought to be private. don't use it directly. 2330 Element[] children; 2331 2332 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 2333 string tagName; 2334 2335 /++ 2336 This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 2337 2338 History: 2339 `AttributesHolder` replaced `string[string]` on August 22, 2024 2340 +/ 2341 AttributesHolder attributes; 2342 2343 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 2344 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 2345 private bool selfClosed; 2346 2347 private DomParent parent_; 2348 2349 /// Get the parent Document object that contains this element. 2350 /// It may be null, so remember to check for that. 2351 @property inout(Document) parentDocument() inout { 2352 if(this.parent_ is null) 2353 return null; 2354 auto p = cast() this.parent_.asElement; 2355 auto prev = cast() this; 2356 while(p) { 2357 prev = p; 2358 if(p.parent_ is null) 2359 return null; 2360 p = cast() p.parent_.asElement; 2361 } 2362 return cast(inout) prev.parent_.asDocument; 2363 } 2364 2365 /*deprecated*/ @property void parentDocument(Document doc) { 2366 parent_ = doc; 2367 } 2368 2369 /// Returns the parent node in the tree this element is attached to. 2370 inout(Element) parentNode() inout { 2371 if(parent_ is null) 2372 return null; 2373 2374 auto p = parent_.asElement; 2375 2376 if(cast(DocumentFragment) p) { 2377 if(p.parent_ is null) 2378 return null; 2379 else 2380 return p.parent_.asElement; 2381 } 2382 2383 return p; 2384 } 2385 2386 //protected 2387 Element parentNode(Element e) { 2388 parent_ = e; 2389 return e; 2390 } 2391 2392 // these are here for event handlers. Don't forget that this library never fires events. 2393 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 2394 2395 version(dom_with_events) { 2396 EventHandler[][string] bubblingEventHandlers; 2397 EventHandler[][string] capturingEventHandlers; 2398 EventHandler[string] defaultEventHandlers; 2399 2400 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 2401 if(event.length > 2 && event[0..2] == "on") 2402 event = event[2 .. $]; 2403 2404 if(useCapture) 2405 capturingEventHandlers[event] ~= handler; 2406 else 2407 bubblingEventHandlers[event] ~= handler; 2408 } 2409 } 2410 2411 2412 // and now methods 2413 2414 /++ 2415 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2416 2417 History: 2418 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2419 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2420 the parameter. 2421 See_Also: 2422 [addChild], [addSibling] 2423 +/ 2424 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2425 bool selfClosed = tagName.isInArray(selfClosedElements); 2426 2427 Element e; 2428 // want to create the right kind of object for the given tag... 2429 switch(tagName) { 2430 case "#text": 2431 e = new TextNode(null, childInfo); 2432 return e; 2433 // break; 2434 case "table": 2435 e = new Table(null); 2436 break; 2437 case "a": 2438 e = new Link(null); 2439 break; 2440 case "form": 2441 e = new Form(null); 2442 break; 2443 case "tr": 2444 e = new TableRow(null); 2445 break; 2446 case "td", "th": 2447 e = new TableCell(null, tagName); 2448 break; 2449 default: 2450 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2451 } 2452 2453 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2454 e.tagName = tagName; 2455 e.selfClosed = selfClosed; 2456 2457 if(childInfo !is null) 2458 switch(tagName) { 2459 /* html5 convenience tags */ 2460 case "audio": 2461 if(childInfo.length) 2462 e.addChild("source", childInfo); 2463 if(childInfo2 !is null) 2464 e.appendText(childInfo2); 2465 break; 2466 case "source": 2467 e.src = childInfo; 2468 if(childInfo2 !is null) 2469 e.type = childInfo2; 2470 break; 2471 /* regular html 4 stuff */ 2472 case "img": 2473 e.src = childInfo; 2474 if(childInfo2 !is null) 2475 e.alt = childInfo2; 2476 break; 2477 case "link": 2478 e.href = childInfo; 2479 if(childInfo2 !is null) 2480 e.rel = childInfo2; 2481 break; 2482 case "option": 2483 e.innerText = childInfo; 2484 if(childInfo2 !is null) 2485 e.value = childInfo2; 2486 break; 2487 case "input": 2488 e.type = "hidden"; 2489 e.name = childInfo; 2490 if(childInfo2 !is null) 2491 e.value = childInfo2; 2492 break; 2493 case "button": 2494 e.innerText = childInfo; 2495 if(childInfo2 !is null) 2496 e.type = childInfo2; 2497 break; 2498 case "a": 2499 e.innerText = childInfo; 2500 if(childInfo2 !is null) 2501 e.href = childInfo2; 2502 break; 2503 case "script": 2504 case "style": 2505 e.innerRawSource = childInfo; 2506 break; 2507 case "meta": 2508 e.name = childInfo; 2509 if(childInfo2 !is null) 2510 e.content = childInfo2; 2511 break; 2512 /* generically, assume we were passed text and perhaps class */ 2513 default: 2514 e.innerText = childInfo; 2515 if(childInfo2.length) 2516 e.className = childInfo2; 2517 } 2518 2519 return e; 2520 } 2521 2522 /// ditto 2523 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2524 // FIXME: childInfo2 is ignored when info1 is null 2525 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2526 m.innerHTML = innerHtml.source; 2527 return m; 2528 } 2529 2530 /// ditto 2531 static Element make(string tagName, Element child, string childInfo2 = null) { 2532 auto m = Element.make(tagName, cast(string) null, childInfo2); 2533 m.appendChild(child); 2534 return m; 2535 } 2536 2537 2538 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2539 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2540 tagName = _tagName; 2541 foreach(k, v; _attributes) 2542 attributes[k] = v; 2543 selfClosed = _selfClosed; 2544 2545 version(dom_node_indexes) 2546 this.dataset.nodeIndex = to!string(&(this.attributes)); 2547 2548 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2549 } 2550 2551 /++ 2552 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2553 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2554 2555 History: 2556 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2557 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2558 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2559 something here if you like. 2560 +/ 2561 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2562 tagName = _tagName; 2563 foreach(k, v; _attributes) 2564 attributes[k] = v; 2565 selfClosed = tagName.isInArray(selfClosedElements); 2566 2567 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2568 //children.length = 8; 2569 //children.length = 0; 2570 2571 version(dom_node_indexes) 2572 this.dataset.nodeIndex = to!string(&(this.attributes)); 2573 } 2574 2575 private this(Document _parentDocument) { 2576 version(dom_node_indexes) 2577 this.dataset.nodeIndex = to!string(&(this.attributes)); 2578 } 2579 2580 2581 /* ******************************* 2582 Navigating the DOM 2583 *********************************/ 2584 2585 /// Returns the first child of this element. If it has no children, returns null. 2586 /// Remember, text nodes are children too. 2587 @property Element firstChild() { 2588 return children.length ? children[0] : null; 2589 } 2590 2591 /// Returns the last child of the element, or null if it has no children. Remember, text nodes are children too. 2592 @property Element lastChild() { 2593 return children.length ? children[$ - 1] : null; 2594 } 2595 2596 // FIXME UNTESTED 2597 /// the next or previous element you would encounter if you were reading it in the source. May be a text node or other special non-tag object if you enabled them. 2598 Element nextInSource() { 2599 auto n = firstChild; 2600 if(n is null) 2601 n = nextSibling(); 2602 if(n is null) { 2603 auto p = this.parentNode; 2604 while(p !is null && n is null) { 2605 n = p.nextSibling; 2606 } 2607 } 2608 2609 return n; 2610 } 2611 2612 /// ditto 2613 Element previousInSource() { 2614 auto p = previousSibling; 2615 if(p is null) { 2616 auto par = parentNode; 2617 if(par) 2618 p = par.lastChild; 2619 if(p is null) 2620 p = par; 2621 } 2622 return p; 2623 } 2624 2625 /++ 2626 Returns the next or previous sibling that is not a text node. Please note: the behavior with comments is subject to change. Currently, it will return a comment or other nodes if it is in the tree (if you enabled it with [Document.enableAddingSpecialTagsToDom] or [Document.parseSawComment]) and not if you didn't, but the implementation will probably change at some point to skip them regardless. 2627 2628 Equivalent to [previousSibling]/[nextSibling]("*"). 2629 2630 Please note it may return `null`. 2631 +/ 2632 @property Element previousElementSibling() { 2633 return previousSibling("*"); 2634 } 2635 2636 /// ditto 2637 @property Element nextElementSibling() { 2638 return nextSibling("*"); 2639 } 2640 2641 /++ 2642 Returns the next or previous sibling matching the `tagName` filter. The default filter of `null` will return the first sibling it sees, even if it is a comment or text node, or anything else. A filter of `"*"` will match any tag with a name. Otherwise, the string must match the [tagName] of the sibling you want to find. 2643 +/ 2644 @property Element previousSibling(string tagName = null) { 2645 if(this.parentNode is null) 2646 return null; 2647 Element ps = null; 2648 foreach(e; this.parentNode.childNodes) { 2649 if(e is this) 2650 break; 2651 if(tagName == "*" && e.nodeType != NodeType.Text) { 2652 ps = e; 2653 } else if(tagName is null || e.tagName == tagName) 2654 ps = e; 2655 } 2656 2657 return ps; 2658 } 2659 2660 /// ditto 2661 @property Element nextSibling(string tagName = null) { 2662 if(this.parentNode is null) 2663 return null; 2664 Element ns = null; 2665 bool mightBe = false; 2666 foreach(e; this.parentNode.childNodes) { 2667 if(e is this) { 2668 mightBe = true; 2669 continue; 2670 } 2671 if(mightBe) { 2672 if(tagName == "*" && e.nodeType != NodeType.Text) { 2673 ns = e; 2674 break; 2675 } 2676 if(tagName is null || e.tagName == tagName) { 2677 ns = e; 2678 break; 2679 } 2680 } 2681 } 2682 2683 return ns; 2684 } 2685 2686 2687 /++ 2688 Gets the nearest node, going up the chain, with the given tagName 2689 May return null or throw. The type `T` will specify a subclass like 2690 [Form], [Table], or [Link], which it will cast for you when found. 2691 +/ 2692 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2693 if(tagName is null) { 2694 static if(is(T == Form)) 2695 tagName = "form"; 2696 else static if(is(T == Table)) 2697 tagName = "table"; 2698 else static if(is(T == Link)) 2699 tagName == "a"; 2700 } 2701 2702 auto par = this.parentNode; 2703 while(par !is null) { 2704 if(tagName is null || par.tagName == tagName) 2705 break; 2706 par = par.parentNode; 2707 } 2708 2709 static if(!is(T == Element)) { 2710 auto t = cast(T) par; 2711 if(t is null) 2712 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2713 } else 2714 auto t = par; 2715 2716 return t; 2717 } 2718 2719 /++ 2720 Searches this element and the tree of elements under it for one matching the given `id` attribute. 2721 +/ 2722 Element getElementById(string id) { 2723 // FIXME: I use this function a lot, and it's kinda slow 2724 // not terribly slow, but not great. 2725 foreach(e; tree) 2726 if(e.id == id) 2727 return e; 2728 return null; 2729 } 2730 2731 /++ 2732 Returns a child element that matches the given `selector`. 2733 2734 Note: you can give multiple selectors, separated by commas. 2735 It will return the first match it finds. 2736 2737 Tip: to use namespaces, escape the colon in the name: 2738 2739 --- 2740 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2741 --- 2742 +/ 2743 @scriptable 2744 Element querySelector(string selector) { 2745 Selector s = Selector(selector); 2746 2747 foreach(ref comp; s.components) 2748 if(comp.parts.length && comp.parts[0].separation > 0) { 2749 // this is illegal in standard dom, but i use it a lot 2750 // gonna insert a :scope thing 2751 2752 SelectorPart part; 2753 part.separation = -1; 2754 part.scopeElement = true; 2755 comp.parts = part ~ comp.parts; 2756 } 2757 2758 foreach(ele; tree) 2759 if(s.matchesElement(ele, this)) 2760 return ele; 2761 return null; 2762 } 2763 2764 /// If the element matches the given selector. Previously known as `matchesSelector`. 2765 @scriptable 2766 bool matches(string selector) { 2767 /+ 2768 bool caseSensitiveTags = true; 2769 if(parentDocument && parentDocument.loose) 2770 caseSensitiveTags = false; 2771 +/ 2772 2773 Selector s = Selector(selector); 2774 return s.matchesElement(this); 2775 } 2776 2777 /// Returns itself or the closest parent that matches the given selector, or null if none found 2778 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2779 @scriptable 2780 Element closest(string selector) { 2781 Element e = this; 2782 while(e !is null) { 2783 if(e.matches(selector)) 2784 return e; 2785 e = e.parentNode; 2786 } 2787 return null; 2788 } 2789 2790 /** 2791 Returns elements that match the given CSS selector 2792 2793 * -- all, default if nothing else is there 2794 2795 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2796 2797 It is all additive 2798 2799 OP 2800 2801 space = descendant 2802 > = direct descendant 2803 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2804 2805 [foo] Foo is present as an attribute 2806 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2807 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2808 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2809 2810 [item$=sdas] ends with 2811 [item^-sdsad] begins with 2812 2813 Quotes are optional here. 2814 2815 Pseudos: 2816 :first-child 2817 :last-child 2818 :link (same as a[href] for our purposes here) 2819 2820 2821 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2822 2823 2824 2825 This ONLY cares about elements. text, etc, are ignored 2826 2827 2828 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2829 2830 The name `getElementsBySelector` was the original name, written back before the name `querySelector` was standardized (this library is older than you might think!), but they do the same thing.. 2831 */ 2832 @scriptable 2833 Element[] querySelectorAll(string selector) { 2834 // FIXME: this function could probably use some performance attention 2835 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2836 2837 2838 bool caseSensitiveTags = true; 2839 if(parentDocument && parentDocument.loose) 2840 caseSensitiveTags = false; 2841 2842 Element[] ret; 2843 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2844 ret ~= sel.getElements(this, null); 2845 return ret; 2846 } 2847 2848 /// ditto 2849 alias getElementsBySelector = querySelectorAll; 2850 2851 /++ 2852 Returns child elements that have the given class name or tag name. 2853 2854 Please note the standard specifies this should return a live node list. This means, in Javascript for example, if you loop over the value returned by getElementsByTagName and getElementsByClassName and remove the elements, the length of the list will decrease. When I implemented this, I figured that was more trouble than it was worth and returned a plain array instead. By the time I had the infrastructure to make it simple, I didn't want to do the breaking change. 2855 2856 So these is incompatible with Javascript in the face of live dom mutation and will likely remain so. 2857 +/ 2858 Element[] getElementsByClassName(string cn) { 2859 // is this correct? 2860 return getElementsBySelector("." ~ cn); 2861 } 2862 2863 /// ditto 2864 Element[] getElementsByTagName(string tag) { 2865 if(parentDocument && parentDocument.loose) 2866 tag = tag.toLower(); 2867 Element[] ret; 2868 foreach(e; tree) 2869 if(e.tagName == tag || tag == "*") 2870 ret ~= e; 2871 return ret; 2872 } 2873 2874 2875 /* ******************************* 2876 Attributes 2877 *********************************/ 2878 2879 /** 2880 Gets the given attribute value, or null if the 2881 attribute is not set. 2882 2883 Note that the returned string is decoded, so it no longer contains any xml entities. 2884 */ 2885 @scriptable 2886 string getAttribute(string name) const { 2887 if(parentDocument && parentDocument.loose) 2888 name = name.toLower(); 2889 return attributes.get(name, null); 2890 } 2891 2892 /** 2893 Sets an attribute. Returns this for easy chaining 2894 */ 2895 @scriptable 2896 Element setAttribute(string name, string value) { 2897 if(parentDocument && parentDocument.loose) 2898 name = name.toLower(); 2899 2900 // I never use this shit legitimately and neither should you 2901 auto it = name.toLower(); 2902 if(it == "href" || it == "src") { 2903 auto v = value.strip().toLower(); 2904 if(v.startsWith("vbscript:")) 2905 value = value[9..$]; 2906 if(v.startsWith("javascript:")) 2907 value = value[11..$]; 2908 } 2909 2910 attributes[name] = value; 2911 2912 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2913 2914 return this; 2915 } 2916 2917 /** 2918 Returns if the attribute exists. 2919 */ 2920 @scriptable 2921 bool hasAttribute(string name) { 2922 if(parentDocument && parentDocument.loose) 2923 name = name.toLower(); 2924 2925 if(name in attributes) 2926 return true; 2927 else 2928 return false; 2929 } 2930 2931 /** 2932 Removes the given attribute from the element. 2933 */ 2934 @scriptable 2935 Element removeAttribute(string name) 2936 out(ret) { 2937 assert(ret is this); 2938 } 2939 do { 2940 if(parentDocument && parentDocument.loose) 2941 name = name.toLower(); 2942 if(name in attributes) 2943 attributes.remove(name); 2944 2945 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2946 return this; 2947 } 2948 2949 /** 2950 Gets or sets the class attribute's contents. Returns 2951 an empty string if it has no class. 2952 */ 2953 @property string className() const { 2954 auto c = getAttribute("class"); 2955 if(c is null) 2956 return ""; 2957 return c; 2958 } 2959 2960 /// ditto 2961 @property Element className(string c) { 2962 setAttribute("class", c); 2963 return this; 2964 } 2965 2966 /** 2967 Provides easy access to common HTML attributes, object style. 2968 2969 --- 2970 auto element = Element.make("a"); 2971 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2972 string where = a.href; // same as a.getAttribute("href"); 2973 --- 2974 2975 */ 2976 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2977 if(v !is null) 2978 setAttribute(name, v); 2979 return getAttribute(name); 2980 } 2981 2982 /** 2983 Old access to attributes. Use [attrs] instead. 2984 2985 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2986 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2987 2988 Instead, use element.attrs.attribute, element.attrs["attribute"], 2989 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2990 */ 2991 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2992 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2993 } 2994 2995 /* 2996 // this would be nice for convenience, but it broke the getter above. 2997 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2998 if(boolean) 2999 setAttribute(name, name); 3000 else 3001 removeAttribute(name); 3002 } 3003 */ 3004 3005 /** 3006 Returns the element's children. 3007 */ 3008 @property inout(Element[]) childNodes() inout { 3009 return children; 3010 } 3011 3012 /++ 3013 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 3014 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 3015 +/ 3016 @property DataSet dataset() { 3017 return DataSet(this); 3018 } 3019 3020 /++ 3021 Gives dot/opIndex access to attributes 3022 --- 3023 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 3024 --- 3025 +/ 3026 @property AttributeSet attrs() { 3027 return AttributeSet(this); 3028 } 3029 3030 /++ 3031 Provides both string and object style (like in Javascript) access to the style attribute. 3032 3033 --- 3034 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 3035 --- 3036 +/ 3037 @property ElementStyle style() { 3038 return ElementStyle(this); 3039 } 3040 3041 /++ 3042 This sets the style attribute with a string. 3043 +/ 3044 @property ElementStyle style(string s) { 3045 this.setAttribute("style", s); 3046 return this.style; 3047 } 3048 3049 private void parseAttributes(string[] whichOnes = null) { 3050 /+ 3051 if(whichOnes is null) 3052 whichOnes = attributes.keys; 3053 foreach(attr; whichOnes) { 3054 switch(attr) { 3055 case "id": 3056 3057 break; 3058 case "class": 3059 3060 break; 3061 case "style": 3062 3063 break; 3064 default: 3065 // we don't care about it 3066 } 3067 } 3068 +/ 3069 } 3070 3071 3072 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 3073 3074 // the next few methods are for implementing interactive kind of things 3075 private CssStyle _computedStyle; 3076 3077 /// Don't use this. It can try to parse out the style element but it isn't complete and if I get back to it, it won't be for a while. 3078 @property CssStyle computedStyle() { 3079 if(_computedStyle is null) { 3080 auto style = this.getAttribute("style"); 3081 /* we'll treat shitty old html attributes as css here */ 3082 if(this.hasAttribute("width")) 3083 style ~= "; width: " ~ this.attrs.width; 3084 if(this.hasAttribute("height")) 3085 style ~= "; height: " ~ this.attrs.height; 3086 if(this.hasAttribute("bgcolor")) 3087 style ~= "; background-color: " ~ this.attrs.bgcolor; 3088 if(this.tagName == "body" && this.hasAttribute("text")) 3089 style ~= "; color: " ~ this.attrs.text; 3090 if(this.hasAttribute("color")) 3091 style ~= "; color: " ~ this.attrs.color; 3092 /* done */ 3093 3094 3095 _computedStyle = computedStyleFactory(this); 3096 } 3097 return _computedStyle; 3098 } 3099 3100 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 3101 version(browser) { 3102 void* expansionHook; ///ditto 3103 int offsetWidth; ///ditto 3104 int offsetHeight; ///ditto 3105 int offsetLeft; ///ditto 3106 int offsetTop; ///ditto 3107 Element offsetParent; ///ditto 3108 bool hasLayout; ///ditto 3109 int zIndex; ///ditto 3110 3111 ///ditto 3112 int absoluteLeft() { 3113 int a = offsetLeft; 3114 auto p = offsetParent; 3115 while(p) { 3116 a += p.offsetLeft; 3117 p = p.offsetParent; 3118 } 3119 3120 return a; 3121 } 3122 3123 ///ditto 3124 int absoluteTop() { 3125 int a = offsetTop; 3126 auto p = offsetParent; 3127 while(p) { 3128 a += p.offsetTop; 3129 p = p.offsetParent; 3130 } 3131 3132 return a; 3133 } 3134 } 3135 3136 // Back to the regular dom functions 3137 3138 public: 3139 3140 3141 /* ******************************* 3142 DOM Mutation 3143 *********************************/ 3144 3145 /// Removes all inner content from the tag; all child text and elements are gone. 3146 void removeAllChildren() 3147 out { 3148 assert(this.children.length == 0); 3149 } 3150 do { 3151 foreach(child; children) 3152 child.parentNode = null; 3153 children = null; 3154 } 3155 3156 /++ 3157 Adds a sibling element before or after this one in the dom. 3158 3159 History: added June 13, 2020 3160 +/ 3161 Element appendSibling(Element e) { 3162 parentNode.insertAfter(this, e); 3163 return e; 3164 } 3165 3166 /// ditto 3167 Element prependSibling(Element e) { 3168 parentNode.insertBefore(this, e); 3169 return e; 3170 } 3171 3172 3173 /++ 3174 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 3175 3176 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 3177 3178 History: 3179 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 3180 +/ 3181 Element appendChild(Element e) 3182 in { 3183 assert(e !is null); 3184 assert(e !is this); 3185 } 3186 out (ret) { 3187 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 3188 assert(e.parentDocument is this.parentDocument); 3189 assert(e is ret); 3190 } 3191 do { 3192 if(e.parentNode !is null) 3193 e.parentNode.removeChild(e); 3194 3195 selfClosed = false; 3196 if(auto frag = cast(DocumentFragment) e) 3197 children ~= frag.children; 3198 else 3199 children ~= e; 3200 3201 e.parentNode = this; 3202 3203 /+ 3204 foreach(item; e.tree) 3205 item.parentDocument = this.parentDocument; 3206 +/ 3207 3208 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 3209 3210 return e; 3211 } 3212 3213 /// Inserts the second element to this node, right before the first param 3214 Element insertBefore(in Element where, Element what) 3215 in { 3216 assert(where !is null); 3217 assert(where.parentNode is this); 3218 assert(what !is null); 3219 assert(what.parentNode is null); 3220 } 3221 out (ret) { 3222 assert(where.parentNode is this); 3223 assert(what.parentNode is this); 3224 3225 assert(what.parentDocument is this.parentDocument); 3226 assert(ret is what); 3227 } 3228 do { 3229 foreach(i, e; children) { 3230 if(e is where) { 3231 if(auto frag = cast(DocumentFragment) what) { 3232 children = children[0..i] ~ frag.children ~ children[i..$]; 3233 foreach(child; frag.children) 3234 child.parentNode = this; 3235 } else { 3236 children = children[0..i] ~ what ~ children[i..$]; 3237 } 3238 what.parentNode = this; 3239 return what; 3240 } 3241 } 3242 3243 return what; 3244 3245 assert(0); 3246 } 3247 3248 /++ 3249 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 3250 +/ 3251 Element insertAfter(in Element where, Element what) 3252 in { 3253 assert(where !is null); 3254 assert(where.parentNode is this); 3255 assert(what !is null); 3256 assert(what.parentNode is null); 3257 } 3258 out (ret) { 3259 assert(where.parentNode is this); 3260 assert(what.parentNode is this); 3261 assert(what.parentDocument is this.parentDocument); 3262 assert(ret is what); 3263 } 3264 do { 3265 foreach(i, e; children) { 3266 if(e is where) { 3267 if(auto frag = cast(DocumentFragment) what) { 3268 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 3269 foreach(child; frag.children) 3270 child.parentNode = this; 3271 } else 3272 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 3273 what.parentNode = this; 3274 return what; 3275 } 3276 } 3277 3278 return what; 3279 3280 assert(0); 3281 } 3282 3283 /// swaps one child for a new thing. Returns the old child which is now parentless. 3284 Element swapNode(Element child, Element replacement) 3285 in { 3286 assert(child !is null); 3287 assert(replacement !is null); 3288 assert(child.parentNode is this); 3289 } 3290 out(ret) { 3291 assert(ret is child); 3292 assert(ret.parentNode is null); 3293 assert(replacement.parentNode is this); 3294 assert(replacement.parentDocument is this.parentDocument); 3295 } 3296 do { 3297 foreach(ref c; this.children) 3298 if(c is child) { 3299 c.parentNode = null; 3300 c = replacement; 3301 c.parentNode = this; 3302 return child; 3303 } 3304 assert(0); 3305 } 3306 3307 3308 /++ 3309 Appends the given to the node. 3310 3311 3312 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 3313 yields `<example>text <b>bold</b> hi</example>`. 3314 3315 See_Also: 3316 [firstInnerText], [directText], [innerText], [appendChild] 3317 +/ 3318 @scriptable 3319 Element appendText(string text) { 3320 Element e = new TextNode(parentDocument, text); 3321 appendChild(e); 3322 return this; 3323 } 3324 3325 /++ 3326 Returns child elements which are of a tag type (excludes text, comments, etc.). 3327 3328 3329 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 3330 3331 Params: 3332 tagName = filter results to only the child elements with the given tag name. 3333 +/ 3334 @property Element[] childElements(string tagName = null) { 3335 Element[] ret; 3336 foreach(c; children) 3337 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 3338 ret ~= c; 3339 return ret; 3340 } 3341 3342 /++ 3343 Appends the given html to the element, returning the elements appended 3344 3345 3346 This is similar to `element.innerHTML += "html string";` in Javascript. 3347 +/ 3348 @scriptable 3349 Element[] appendHtml(string html) { 3350 Document d = new Document("<root>" ~ html ~ "</root>"); 3351 return stealChildren(d.root); 3352 } 3353 3354 3355 /++ 3356 Inserts a child under this element after the element `where`. 3357 +/ 3358 void insertChildAfter(Element child, Element where) 3359 in { 3360 assert(child !is null); 3361 assert(where !is null); 3362 assert(where.parentNode is this); 3363 assert(!selfClosed); 3364 //assert(isInArray(where, children)); 3365 } 3366 out { 3367 assert(child.parentNode is this); 3368 assert(where.parentNode is this); 3369 //assert(isInArray(where, children)); 3370 //assert(isInArray(child, children)); 3371 } 3372 do { 3373 foreach(ref i, c; children) { 3374 if(c is where) { 3375 i++; 3376 if(auto frag = cast(DocumentFragment) child) { 3377 children = children[0..i] ~ child.children ~ children[i..$]; 3378 //foreach(child; frag.children) 3379 //child.parentNode = this; 3380 } else 3381 children = children[0..i] ~ child ~ children[i..$]; 3382 child.parentNode = this; 3383 break; 3384 } 3385 } 3386 } 3387 3388 /++ 3389 Reparents all the child elements of `e` to `this`, leaving `e` childless. 3390 3391 Params: 3392 e = the element whose children you want to steal 3393 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 3394 +/ 3395 Element[] stealChildren(Element e, Element position = null) 3396 in { 3397 assert(!selfClosed); 3398 assert(e !is null); 3399 //if(position !is null) 3400 //assert(isInArray(position, children)); 3401 } 3402 out (ret) { 3403 assert(e.children.length == 0); 3404 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 3405 version(none) 3406 debug foreach(child; ret) { 3407 assert(child.parentNode is this); 3408 assert(child.parentDocument is this.parentDocument); 3409 } 3410 } 3411 do { 3412 foreach(c; e.children) { 3413 c.parentNode = this; 3414 } 3415 if(position is null) 3416 children ~= e.children; 3417 else { 3418 foreach(i, child; children) { 3419 if(child is position) { 3420 children = children[0..i] ~ 3421 e.children ~ 3422 children[i..$]; 3423 break; 3424 } 3425 } 3426 } 3427 3428 auto ret = e.children[]; 3429 e.children.length = 0; 3430 3431 return ret; 3432 } 3433 3434 /// Puts the current element first in our children list. The given element must not have a parent already. 3435 Element prependChild(Element e) 3436 in { 3437 assert(e.parentNode is null); 3438 assert(!selfClosed); 3439 } 3440 out { 3441 assert(e.parentNode is this); 3442 assert(e.parentDocument is this.parentDocument); 3443 assert(children[0] is e); 3444 } 3445 do { 3446 if(auto frag = cast(DocumentFragment) e) { 3447 children = e.children ~ children; 3448 foreach(child; frag.children) 3449 child.parentNode = this; 3450 } else 3451 children = e ~ children; 3452 e.parentNode = this; 3453 return e; 3454 } 3455 3456 3457 /** 3458 Returns a string containing all child elements, formatted such that it could be pasted into 3459 an XML file. 3460 */ 3461 @property string innerHTML(Appender!string where = appender!string()) const { 3462 if(children is null) 3463 return ""; 3464 3465 auto start = where.data.length; 3466 3467 foreach(child; children) { 3468 assert(child !is null); 3469 3470 child.writeToAppender(where); 3471 } 3472 3473 return where.data[start .. $]; 3474 } 3475 3476 /** 3477 Takes some html and replaces the element's children with the tree made from the string. 3478 */ 3479 @property Element innerHTML(string html, bool strict = false) { 3480 if(html.length) 3481 selfClosed = false; 3482 3483 if(html.length == 0) { 3484 // I often say innerHTML = ""; as a shortcut to clear it out, 3485 // so let's optimize that slightly. 3486 removeAllChildren(); 3487 return this; 3488 } 3489 3490 auto doc = new Document(); 3491 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3492 3493 children = doc.root.children; 3494 foreach(c; children) { 3495 c.parentNode = this; 3496 } 3497 3498 doc.root.children = null; 3499 3500 return this; 3501 } 3502 3503 /// ditto 3504 @property Element innerHTML(Html html) { 3505 return this.innerHTML = html.source; 3506 } 3507 3508 /** 3509 Replaces this node with the given html string, which is parsed 3510 3511 Note: this invalidates the this reference, since it is removed 3512 from the tree. 3513 3514 Returns the new children that replace this. 3515 */ 3516 @property Element[] outerHTML(string html) { 3517 auto doc = new Document(); 3518 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3519 3520 children = doc.root.children; 3521 foreach(c; children) { 3522 c.parentNode = this; 3523 } 3524 3525 stripOut(); 3526 3527 return doc.root.children; 3528 } 3529 3530 /++ 3531 Returns all the html for this element, including the tag itself. 3532 3533 This is equivalent to calling toString(). 3534 +/ 3535 @property string outerHTML() { 3536 return this.toString(); 3537 } 3538 3539 /// This sets the inner content of the element *without* trying to parse it. 3540 /// You can inject any code in there; this serves as an escape hatch from the dom. 3541 /// 3542 /// The only times you might actually need it are for < style > and < script > tags in html. 3543 /// Other than that, innerHTML and/or innerText should do the job. 3544 @property void innerRawSource(string rawSource) { 3545 children.length = 0; 3546 auto rs = new RawSource(parentDocument, rawSource); 3547 children ~= rs; 3548 rs.parentNode = this; 3549 } 3550 3551 /++ 3552 Replaces the element `find`, which must be a child of `this`, with the element `replace`, which must have no parent. 3553 +/ 3554 Element replaceChild(Element find, Element replace) 3555 in { 3556 assert(find !is null); 3557 assert(find.parentNode is this); 3558 assert(replace !is null); 3559 assert(replace.parentNode is null); 3560 } 3561 out(ret) { 3562 assert(ret is replace); 3563 assert(replace.parentNode is this); 3564 assert(replace.parentDocument is this.parentDocument); 3565 assert(find.parentNode is null); 3566 } 3567 do { 3568 // FIXME 3569 //if(auto frag = cast(DocumentFragment) replace) 3570 //return this.replaceChild(frag, replace.children); 3571 for(int i = 0; i < children.length; i++) { 3572 if(children[i] is find) { 3573 replace.parentNode = this; 3574 children[i].parentNode = null; 3575 children[i] = replace; 3576 return replace; 3577 } 3578 } 3579 3580 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3581 } 3582 3583 /** 3584 Replaces the given element with a whole group. 3585 */ 3586 void replaceChild(Element find, Element[] replace) 3587 in { 3588 assert(find !is null); 3589 assert(replace !is null); 3590 assert(find.parentNode is this); 3591 debug foreach(r; replace) 3592 assert(r.parentNode is null); 3593 } 3594 out { 3595 assert(find.parentNode is null); 3596 assert(children.length >= replace.length); 3597 debug foreach(child; children) 3598 assert(child !is find); 3599 debug foreach(r; replace) 3600 assert(r.parentNode is this); 3601 } 3602 do { 3603 if(replace.length == 0) { 3604 removeChild(find); 3605 return; 3606 } 3607 assert(replace.length); 3608 for(int i = 0; i < children.length; i++) { 3609 if(children[i] is find) { 3610 children[i].parentNode = null; // this element should now be dead 3611 children[i] = replace[0]; 3612 foreach(e; replace) { 3613 e.parentNode = this; 3614 } 3615 3616 children = .insertAfter(children, i, replace[1..$]); 3617 3618 return; 3619 } 3620 } 3621 3622 throw new Exception("no such child"); 3623 } 3624 3625 3626 /** 3627 Removes the given child from this list. 3628 3629 Returns the removed element. 3630 */ 3631 Element removeChild(Element c) 3632 in { 3633 assert(c !is null); 3634 assert(c.parentNode is this); 3635 } 3636 out { 3637 debug foreach(child; children) 3638 assert(child !is c); 3639 assert(c.parentNode is null); 3640 } 3641 do { 3642 foreach(i, e; children) { 3643 if(e is c) { 3644 children = children[0..i] ~ children [i+1..$]; 3645 c.parentNode = null; 3646 return c; 3647 } 3648 } 3649 3650 throw new Exception("no such child"); 3651 } 3652 3653 /// This removes all the children from this element, returning the old list. 3654 Element[] removeChildren() 3655 out (ret) { 3656 assert(children.length == 0); 3657 debug foreach(r; ret) 3658 assert(r.parentNode is null); 3659 } 3660 do { 3661 Element[] oldChildren = children.dup; 3662 foreach(c; oldChildren) 3663 c.parentNode = null; 3664 3665 children.length = 0; 3666 3667 return oldChildren; 3668 } 3669 3670 /** 3671 Fetch the inside text, with all tags stripped out. 3672 3673 <p>cool <b>api</b> & code dude<p> 3674 innerText of that is "cool api & code dude". 3675 3676 This does not match what real innerText does! 3677 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3678 3679 It is more like [textContent]. 3680 3681 See_Also: 3682 [visibleText], which is closer to what the real `innerText` 3683 does. 3684 */ 3685 @scriptable 3686 @property string innerText() const { 3687 string s; 3688 foreach(child; children) { 3689 if(child.nodeType != NodeType.Text) 3690 s ~= child.innerText; 3691 else 3692 s ~= child.nodeValue(); 3693 } 3694 return s; 3695 } 3696 3697 /// ditto 3698 alias textContent = innerText; 3699 3700 /++ 3701 Gets the element's visible text, similar to how it would look assuming 3702 the document was HTML being displayed by a browser. This means it will 3703 attempt whitespace normalization (unless it is a `<pre>` tag), add `\n` 3704 characters for `<br>` tags, and I reserve the right to make it process 3705 additional css and tags in the future. 3706 3707 If you need specific output, use the more stable [textContent] property 3708 or iterate yourself with [tree] or a recursive function with [children]. 3709 3710 History: 3711 Added March 25, 2022 (dub v10.8) 3712 +/ 3713 string visibleText() const { 3714 return this.visibleTextHelper(this.tagName == "pre"); 3715 } 3716 3717 private string visibleTextHelper(bool pre) const { 3718 string result; 3719 foreach(thing; this.children) { 3720 if(thing.nodeType == NodeType.Text) 3721 result ~= pre ? thing.nodeValue : normalizeWhitespace(thing.nodeValue); 3722 else if(thing.tagName == "br") 3723 result ~= "\n"; 3724 else 3725 result ~= thing.visibleTextHelper(pre || thing.tagName == "pre"); 3726 } 3727 return result; 3728 } 3729 3730 /** 3731 Sets the inside text, replacing all children. You don't 3732 have to worry about entity encoding. 3733 */ 3734 @scriptable 3735 @property void innerText(string text) { 3736 selfClosed = false; 3737 Element e = new TextNode(parentDocument, text); 3738 children = [e]; 3739 e.parentNode = this; 3740 } 3741 3742 /** 3743 Strips this node out of the document, replacing it with the given text 3744 */ 3745 @property void outerText(string text) { 3746 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3747 } 3748 3749 /** 3750 Same result as innerText; the tag with all inner tags stripped out 3751 */ 3752 @property string outerText() const { 3753 return innerText; 3754 } 3755 3756 3757 /* ******************************* 3758 Miscellaneous 3759 *********************************/ 3760 3761 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3762 @property Element cloned() 3763 /+ 3764 out(ret) { 3765 // FIXME: not sure why these fail... 3766 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3767 assert(ret.tagName == this.tagName); 3768 } 3769 do { 3770 +/ 3771 { 3772 return this.cloneNode(true); 3773 } 3774 3775 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3776 Element cloneNode(bool deepClone) { 3777 auto e = Element.make(this.tagName); 3778 e.attributes = this.attributes.aadup; 3779 e.selfClosed = this.selfClosed; 3780 3781 if(deepClone) { 3782 foreach(child; children) { 3783 e.appendChild(child.cloneNode(true)); 3784 } 3785 } 3786 3787 3788 return e; 3789 } 3790 3791 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3792 string nodeValue() const { 3793 return ""; 3794 } 3795 3796 // should return int 3797 ///. 3798 @property int nodeType() const { 3799 return 1; 3800 } 3801 3802 3803 invariant () { 3804 debug assert(tagName.indexOf(" ") == -1); 3805 3806 // commented cuz it gets into recursive pain and eff dat. 3807 /+ 3808 if(children !is null) 3809 foreach(child; children) { 3810 // assert(parentNode !is null); 3811 assert(child !is null); 3812 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3813 assert(child !is this); 3814 //assert(child !is parentNode); 3815 } 3816 +/ 3817 3818 /+ 3819 // this isn't helping 3820 if(parent_ && parent_.asElement) { 3821 bool found = false; 3822 foreach(child; parent_.asElement.children) 3823 if(child is this) 3824 found = true; 3825 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3826 } 3827 +/ 3828 3829 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3830 if(parentNode !is null) { 3831 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3832 auto lol = cast(TextNode) this; 3833 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3834 } 3835 +/ 3836 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3837 // reason is so you can create these without needing a reference to the document 3838 } 3839 3840 /** 3841 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3842 an XML file. 3843 */ 3844 override string toString() const { 3845 return writeToAppender(); 3846 } 3847 3848 /++ 3849 Returns if the node would be printed to string as `<tag />` or `<tag></tag>`. In other words, if it has no non-empty text nodes and no element nodes. Please note that whitespace text nodes are NOT considered empty; `Html("<tag> </tag>").isEmpty == false`. 3850 3851 3852 The value is undefined if there are comment or processing instruction nodes. The current implementation returns false if it sees those, assuming the nodes haven't been stripped out during parsing. But I'm not married to the current implementation and reserve the right to change it without notice. 3853 3854 History: 3855 Added December 3, 2021 (dub v10.5) 3856 3857 +/ 3858 public bool isEmpty() const { 3859 foreach(child; this.children) { 3860 // any non-text node is of course not empty since that's a tag 3861 if(child.nodeType != NodeType.Text) 3862 return false; 3863 // or a text node is empty if it is is a null or empty string, so this length check fixes that 3864 if(child.nodeValue.length) 3865 return false; 3866 } 3867 3868 return true; 3869 } 3870 3871 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3872 if(indentWith is null) 3873 return null; 3874 3875 // at the top we don't have anything to really do 3876 //if(parent_ is null) 3877 //return null; 3878 3879 // I've used isEmpty before but this other check seems better.... 3880 //|| this.isEmpty()) 3881 3882 string s; 3883 3884 if(insertComments) s ~= "<!--"; 3885 s ~= "\n"; 3886 foreach(indent; 0 .. indentationLevel) 3887 s ~= indentWith; 3888 if(insertComments) s ~= "-->"; 3889 3890 return s; 3891 } 3892 3893 /++ 3894 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3895 for eyeball debugging. 3896 3897 $(PITFALL 3898 This function is not stable. Its interface and output may change without 3899 notice. The only promise I make is that it will continue to make a best- 3900 effort attempt at being useful for debugging by human eyes. 3901 3902 I have used it in the past for diffing html documents, but even then, it 3903 might change between versions. If it is useful, great, but beware; this 3904 use is at your own risk. 3905 ) 3906 3907 History: 3908 On November 19, 2021, I changed this to `final`. If you were overriding it, 3909 change our override to `toPrettyStringImpl` instead. It now just calls 3910 `toPrettyStringImpl.strip` to be an entry point for a stand-alone call. 3911 3912 If you are calling it as part of another implementation, you might want to 3913 change that call to `toPrettyStringImpl` as well. 3914 3915 I am NOT considering this a breaking change since this function is documented 3916 to only be used for eyeball debugging anyway, which means the exact format is 3917 not specified and the override behavior can generally not be relied upon. 3918 3919 (And I find it extremely unlikely anyone was subclassing anyway, but if you were, 3920 email me, and we'll see what we can do. I'd like to know at least.) 3921 3922 I reserve the right to make future changes in the future without considering 3923 them breaking as well. 3924 +/ 3925 final string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3926 return toPrettyStringImpl(insertComments, indentationLevel, indentWith).strip; 3927 } 3928 3929 string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3930 3931 // first step is to concatenate any consecutive text nodes to simplify 3932 // the white space analysis. this changes the tree! but i'm allowed since 3933 // the comment always says it changes the comments 3934 // 3935 // actually i'm not allowed cuz it is const so i will cheat and lie 3936 /+ 3937 TextNode lastTextChild = null; 3938 for(int a = 0; a < this.children.length; a++) { 3939 auto child = this.children[a]; 3940 if(auto tn = cast(TextNode) child) { 3941 if(lastTextChild) { 3942 lastTextChild.contents ~= tn.contents; 3943 for(int b = a; b < this.children.length - 1; b++) 3944 this.children[b] = this.children[b + 1]; 3945 this.children = this.children[0 .. $-1]; 3946 } else { 3947 lastTextChild = tn; 3948 } 3949 } else { 3950 lastTextChild = null; 3951 } 3952 } 3953 +/ 3954 3955 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 3956 3957 const(Element)[] children; 3958 3959 TextNode lastTextChild = null; 3960 for(int a = 0; a < this.children.length; a++) { 3961 auto child = this.children[a]; 3962 if(auto tn = cast(const(TextNode)) child) { 3963 if(lastTextChild !is null) { 3964 lastTextChild.contents ~= tn.contents; 3965 } else { 3966 lastTextChild = new TextNode(""); 3967 lastTextChild.parentNode = cast(Element) this; 3968 lastTextChild.contents ~= tn.contents; 3969 children ~= lastTextChild; 3970 } 3971 } else { 3972 lastTextChild = null; 3973 children ~= child; 3974 } 3975 } 3976 3977 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3978 3979 s ~= "<"; 3980 s ~= tagName; 3981 3982 // i sort these for consistent output. might be more legible 3983 // but especially it keeps it the same for diff purposes. 3984 import std.algorithm : sort; 3985 auto keys = sort(attributes.keys); 3986 foreach(n; keys) { 3987 auto v = attributes[n]; 3988 s ~= " "; 3989 s ~= n; 3990 s ~= "=\""; 3991 s ~= htmlEntitiesEncode(v); 3992 s ~= "\""; 3993 } 3994 3995 if(selfClosed){ 3996 s ~= " />"; 3997 return s; 3998 } 3999 4000 s ~= ">"; 4001 4002 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 4003 // just keep them on the same line 4004 4005 if(isEmpty) { 4006 // no work needed, this is empty so don't indent just for a blank line 4007 } else if(children.length == 1 && children[0].isEmpty) { 4008 // just one empty one, can put it inline too 4009 s ~= children[0].toString(); 4010 } else if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 4011 foreach(child; children) { 4012 s ~= child.toString();//toPrettyString(false, 0, null); 4013 } 4014 } else { 4015 foreach(child; children) { 4016 assert(child !is null); 4017 4018 s ~= child.toPrettyStringImpl(insertComments, indentationLevel + 1, indentWith); 4019 } 4020 4021 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4022 } 4023 4024 s ~= "</"; 4025 s ~= tagName; 4026 s ~= ">"; 4027 4028 return s; 4029 } 4030 4031 /+ 4032 /// Writes out the opening tag only, if applicable. 4033 string writeTagOnly(Appender!string where = appender!string()) const { 4034 +/ 4035 4036 /++ 4037 This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 4038 Note: the ordering of attributes in the string is undefined. 4039 Returns the string it creates. 4040 4041 Implementation_Notes: 4042 The order of attributes printed by this function is undefined, as permitted by the XML spec. You should NOT rely on any implementation detail noted here. 4043 4044 However, in practice, between June 14, 2019 and August 22, 2024, it actually did sort attributes by key name. After August 22, 2024, it changed to track attribute append order and will print them back out in the order in which the keys were first seen. 4045 4046 This is subject to change again at any time. Use [toPrettyString] if you want a defined output (toPrettyString always sorts by name for consistent diffing). 4047 +/ 4048 string writeToAppender(Appender!string where = appender!string()) const { 4049 assert(tagName !is null); 4050 4051 where.reserve((this.children.length + 1) * 512); 4052 4053 auto start = where.data.length; 4054 4055 where.put("<"); 4056 where.put(tagName); 4057 4058 /+ 4059 import std.algorithm : sort; 4060 auto keys = sort(attributes.keys); 4061 foreach(n; keys) { 4062 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 4063 +/ 4064 foreach(n, v; attributes) { 4065 //assert(v !is null); 4066 where.put(" "); 4067 where.put(n); 4068 where.put("=\""); 4069 htmlEntitiesEncode(v, where); 4070 where.put("\""); 4071 } 4072 4073 if(selfClosed){ 4074 where.put(" />"); 4075 return where.data[start .. $]; 4076 } 4077 4078 where.put('>'); 4079 4080 innerHTML(where); 4081 4082 where.put("</"); 4083 where.put(tagName); 4084 where.put('>'); 4085 4086 return where.data[start .. $]; 4087 } 4088 4089 /** 4090 Returns a lazy range of all its children, recursively. 4091 */ 4092 @property ElementStream tree() { 4093 return new ElementStream(this); 4094 } 4095 4096 // I moved these from Form because they are generally useful. 4097 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 4098 // FIXME: add overloads for other label types... 4099 /++ 4100 Adds a form field to this element, normally a `<input>` but `type` can also be `"textarea"`. 4101 4102 This is fairly html specific and the label uses my style. I recommend you view the source before you use it to better understand what it does. 4103 +/ 4104 /// Tags: HTML, HTML5 4105 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4106 auto fs = this; 4107 auto i = fs.addChild("label"); 4108 4109 if(!(type == "checkbox" || type == "radio")) 4110 i.addChild("span", label); 4111 4112 Element input; 4113 if(type == "textarea") 4114 input = i.addChild("textarea"). 4115 setAttribute("name", name). 4116 setAttribute("rows", "6"); 4117 else 4118 input = i.addChild("input"). 4119 setAttribute("name", name). 4120 setAttribute("type", type); 4121 4122 if(type == "checkbox" || type == "radio") 4123 i.addChild("span", label); 4124 4125 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4126 fieldOptions.applyToElement(input); 4127 return i; 4128 } 4129 4130 /// ditto 4131 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4132 auto fs = this; 4133 auto i = fs.addChild("label"); 4134 i.addChild(label); 4135 Element input; 4136 if(type == "textarea") 4137 input = i.addChild("textarea"). 4138 setAttribute("name", name). 4139 setAttribute("rows", "6"); 4140 else 4141 input = i.addChild("input"). 4142 setAttribute("name", name). 4143 setAttribute("type", type); 4144 4145 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4146 fieldOptions.applyToElement(input); 4147 return i; 4148 } 4149 4150 /// ditto 4151 Element addField(string label, string name, FormFieldOptions fieldOptions) { 4152 return addField(label, name, "text", fieldOptions); 4153 } 4154 4155 /// ditto 4156 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4157 auto fs = this; 4158 auto i = fs.addChild("label"); 4159 i.addChild("span", label); 4160 auto sel = i.addChild("select").setAttribute("name", name); 4161 4162 foreach(k, opt; options) 4163 sel.addChild("option", opt, k); 4164 4165 // FIXME: implement requirements somehow 4166 4167 return i; 4168 } 4169 4170 /// ditto 4171 Element addSubmitButton(string label = null) { 4172 auto t = this; 4173 auto holder = t.addChild("div"); 4174 holder.addClass("submit-holder"); 4175 auto i = holder.addChild("input"); 4176 i.type = "submit"; 4177 if(label.length) 4178 i.value = label; 4179 return holder; 4180 } 4181 4182 } 4183 4184 // computedStyle could argubaly be removed to bring size down 4185 //pragma(msg, __traits(classInstanceSize, Element)); 4186 //pragma(msg, Element.tupleof); 4187 4188 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4189 /++ 4190 Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4191 4192 History: 4193 On December 16, 2022, it disabled the special case treatment of `<script>` and `<style>` that [Document] 4194 does for HTML. To get the old behavior back, add `, true` to your constructor call. 4195 +/ 4196 /// Group: core_functionality 4197 class XmlDocument : Document { 4198 this(string data, bool enableHtmlHacks = false) { 4199 selfClosedElements = null; 4200 inlineElements = null; 4201 rawSourceElements = null; 4202 contentType = "text/xml; charset=utf-8"; 4203 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4204 4205 parseStrict(data, !enableHtmlHacks); 4206 } 4207 } 4208 4209 unittest { 4210 // FIXME: i should also make XmlDocument do different entities than just html too. 4211 auto str = "<html><style>foo {}</style><script>void function() { a < b; }</script></html>"; 4212 auto document = new Document(str, true, true); 4213 assert(document.requireSelector("style").children[0].tagName == "#raw"); 4214 assert(document.requireSelector("script").children[0].tagName == "#raw"); 4215 try { 4216 auto xml = new XmlDocument(str); 4217 assert(0); 4218 } catch(MarkupException e) { 4219 // failure expected, script special case is not valid XML without a dtd (which isn't here) 4220 } 4221 //assert(xml.requireSelector("style").children[0].tagName == "#raw"); 4222 //assert(xml.requireSelector("script").children[0].tagName == "#raw"); 4223 } 4224 4225 4226 4227 import std.string; 4228 4229 /* domconvenience follows { */ 4230 4231 /// finds comments that match the given txt. Case insensitive, strips whitespace. 4232 /// Group: core_functionality 4233 Element[] findComments(Document document, string txt) { 4234 return findComments(document.root, txt); 4235 } 4236 4237 /// ditto 4238 Element[] findComments(Element element, string txt) { 4239 txt = txt.strip().toLower(); 4240 Element[] ret; 4241 4242 foreach(comment; element.getElementsByTagName("#comment")) { 4243 string t = comment.nodeValue().strip().toLower(); 4244 if(t == txt) 4245 ret ~= comment; 4246 } 4247 4248 return ret; 4249 } 4250 4251 /// An option type that propagates null. See: [Element.optionSelector] 4252 /// Group: implementations 4253 struct MaybeNullElement(SomeElementType) { 4254 this(SomeElementType ele) { 4255 this.element = ele; 4256 } 4257 SomeElementType element; 4258 4259 /// Forwards to the element, wit a null check inserted that propagates null. 4260 auto opDispatch(string method, T...)(T args) { 4261 alias type = typeof(__traits(getMember, element, method)(args)); 4262 static if(is(type : Element)) { 4263 if(element is null) 4264 return MaybeNullElement!type(null); 4265 return __traits(getMember, element, method)(args); 4266 } else static if(is(type == string)) { 4267 if(element is null) 4268 return cast(string) null; 4269 return __traits(getMember, element, method)(args); 4270 } else static if(is(type == void)) { 4271 if(element is null) 4272 return; 4273 __traits(getMember, element, method)(args); 4274 } else { 4275 static assert(0); 4276 } 4277 } 4278 4279 /// Allows implicit casting to the wrapped element. 4280 alias element this; 4281 } 4282 4283 /++ 4284 A collection of elements which forwards methods to the children. 4285 +/ 4286 /// Group: implementations 4287 struct ElementCollection { 4288 /// 4289 this(Element e) { 4290 elements = [e]; 4291 } 4292 4293 /// 4294 this(Element e, string selector) { 4295 elements = e.querySelectorAll(selector); 4296 } 4297 4298 /// 4299 this(Element[] e) { 4300 elements = e; 4301 } 4302 4303 Element[] elements; 4304 //alias elements this; // let it implicitly convert to the underlying array 4305 4306 /// 4307 ElementCollection opIndex(string selector) { 4308 ElementCollection ec; 4309 foreach(e; elements) 4310 ec.elements ~= e.getElementsBySelector(selector); 4311 return ec; 4312 } 4313 4314 /// 4315 Element opIndex(int i) { 4316 return elements[i]; 4317 } 4318 4319 /// if you slice it, give the underlying array for easy forwarding of the 4320 /// collection to range expecting algorithms or looping over. 4321 Element[] opSlice() { 4322 return elements; 4323 } 4324 4325 /// And input range primitives so we can foreach over this 4326 void popFront() { 4327 elements = elements[1..$]; 4328 } 4329 4330 /// ditto 4331 Element front() { 4332 return elements[0]; 4333 } 4334 4335 /// ditto 4336 bool empty() { 4337 return !elements.length; 4338 } 4339 4340 /++ 4341 Collects strings from the collection, concatenating them together 4342 Kinda like running reduce and ~= on it. 4343 4344 --- 4345 document["p"].collect!"innerText"; 4346 --- 4347 +/ 4348 string collect(string method)(string separator = "") { 4349 string text; 4350 foreach(e; elements) { 4351 text ~= mixin("e." ~ method); 4352 text ~= separator; 4353 } 4354 return text; 4355 } 4356 4357 /// Forward method calls to each individual [Element|element] of the collection 4358 /// returns this so it can be chained. 4359 ElementCollection opDispatch(string name, T...)(T t) { 4360 foreach(e; elements) { 4361 mixin("e." ~ name)(t); 4362 } 4363 return this; 4364 } 4365 4366 /++ 4367 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 4368 +/ 4369 ElementCollection wrapIn(Element what) { 4370 foreach(e; elements) { 4371 e.wrapIn(what.cloneNode(false)); 4372 } 4373 4374 return this; 4375 } 4376 4377 /// Concatenates two ElementCollection together. 4378 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 4379 return ElementCollection(this.elements ~ rhs.elements); 4380 } 4381 } 4382 4383 4384 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 4385 /// Group: implementations 4386 mixin template JavascriptStyleDispatch() { 4387 /// 4388 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 4389 if(v !is null) 4390 return set(name, v); 4391 return get(name); 4392 } 4393 4394 /// 4395 string opIndex(string key) const { 4396 return get(key); 4397 } 4398 4399 /// 4400 string opIndexAssign(string value, string field) { 4401 return set(field, value); 4402 } 4403 4404 // FIXME: doesn't seem to work 4405 string* opBinary(string op)(string key) if(op == "in") { 4406 return key in fields; 4407 } 4408 } 4409 4410 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 4411 /// 4412 /// Do not create this object directly. 4413 /// Group: implementations 4414 struct DataSet { 4415 /// 4416 this(Element e) { 4417 this._element = e; 4418 } 4419 4420 private Element _element; 4421 /// 4422 string set(string name, string value) { 4423 _element.setAttribute("data-" ~ unCamelCase(name), value); 4424 return value; 4425 } 4426 4427 /// 4428 string get(string name) const { 4429 return _element.getAttribute("data-" ~ unCamelCase(name)); 4430 } 4431 4432 /// 4433 mixin JavascriptStyleDispatch!(); 4434 } 4435 4436 /// Proxy object for attributes which will replace the main opDispatch eventually 4437 /// Group: implementations 4438 struct AttributeSet { 4439 /// Generally, you shouldn't create this yourself, since you can use [Element.attrs] instead. 4440 this(Element e) { 4441 this._element = e; 4442 } 4443 4444 private Element _element; 4445 /++ 4446 Sets a `value` for attribute with `name`. If the attribute doesn't exist, this will create it, even if `value` is `null`. 4447 +/ 4448 string set(string name, string value) { 4449 _element.setAttribute(name, value); 4450 return value; 4451 } 4452 4453 /++ 4454 Provides support for testing presence of an attribute with the `in` operator. 4455 4456 History: 4457 Added December 16, 2020 (dub v10.10) 4458 +/ 4459 auto opBinaryRight(string op : "in")(string name) const 4460 { 4461 return name in _element.attributes; 4462 } 4463 /// 4464 unittest 4465 { 4466 auto doc = new XmlDocument(`<test attr="test"/>`); 4467 assert("attr" in doc.root.attrs); 4468 assert("test" !in doc.root.attrs); 4469 } 4470 4471 /++ 4472 Returns the value of attribute `name`, or `null` if doesn't exist 4473 +/ 4474 string get(string name) const { 4475 return _element.getAttribute(name); 4476 } 4477 4478 /// 4479 mixin JavascriptStyleDispatch!(); 4480 } 4481 4482 private struct InternalAttribute { 4483 // variable length structure 4484 private InternalAttribute* next; 4485 private uint totalLength; 4486 private ushort keyLength; 4487 private char[0] chars; 4488 4489 // this really should be immutable tbh 4490 inout(char)[] key() inout return { 4491 return chars.ptr[0 .. keyLength]; 4492 } 4493 4494 inout(char)[] value() inout return { 4495 return chars.ptr[keyLength .. totalLength]; 4496 } 4497 4498 static InternalAttribute* make(in char[] key, in char[] value) { 4499 // old code was 4500 //auto data = new ubyte[](InternalAttribute.sizeof + key.length + value.length); 4501 //GC.addRange(data.ptr, data.length); // MUST add the range to scan it! 4502 4503 import core.memory; 4504 // but this code is a bit better, notice we did NOT set the NO_SCAN attribute because of the presence of the next pointer 4505 // (this can sometimes be a pessimization over the separate strings but meh, most of these attributes are supposed to be small) 4506 auto obj = cast(InternalAttribute*) GC.calloc(InternalAttribute.sizeof + key.length + value.length); 4507 4508 // assert(key.length > 0); 4509 4510 obj.totalLength = cast(uint) (key.length + value.length); 4511 obj.keyLength = cast(ushort) key.length; 4512 if(key.length != obj.keyLength) 4513 throw new Exception("attribute key overflow"); 4514 if(key.length + value.length != obj.totalLength) 4515 throw new Exception("attribute length overflow"); 4516 4517 obj.key[] = key[]; 4518 obj.value[] = value[]; 4519 4520 return obj; 4521 } 4522 4523 // FIXME: disable default ctor and op new 4524 } 4525 4526 import core.exception; 4527 4528 struct AttributesHolder { 4529 private @system InternalAttribute* attributes; 4530 4531 /+ 4532 invariant() { 4533 const(InternalAttribute)* wtf = attributes; 4534 while(wtf) { 4535 assert(wtf != cast(void*) 1); 4536 assert(wtf.keyLength != 0); 4537 import std.stdio; writeln(wtf.key, "=", wtf.value); 4538 wtf = wtf.next; 4539 } 4540 } 4541 +/ 4542 4543 /+ 4544 It is legal to do foo["key", "default"] to call it with no error... 4545 +/ 4546 string opIndex(scope const char[] key) const { 4547 auto found = find(key); 4548 if(found is null) 4549 throw new RangeError(key.idup); // FIXME 4550 return cast(string) found.value; 4551 } 4552 4553 string get(scope const char[] key, string returnedIfKeyNotFound = null) const { 4554 auto attr = this.find(key); 4555 if(attr is null) 4556 return returnedIfKeyNotFound; 4557 else 4558 return cast(string) attr.value; 4559 } 4560 4561 private string[] keys() const { 4562 string[] ret; 4563 foreach(k, v; this) 4564 ret ~= k; 4565 return ret; 4566 } 4567 4568 /+ 4569 If this were to return a string* it'd be tricky cuz someone could try to rebind it, which is impossible. 4570 4571 This is a breaking change. You can get a similar result though with [get]. 4572 +/ 4573 bool opBinaryRight(string op : "in")(scope const char[] key) const { 4574 return find(key) !is null; 4575 } 4576 4577 private inout(InternalAttribute)* find(scope const char[] key) inout @trusted { 4578 inout(InternalAttribute)* current = attributes; 4579 while(current) { 4580 // assert(current > cast(void*) 1); 4581 if(current.key == key) 4582 return current; 4583 current = current.next; 4584 } 4585 return null; 4586 } 4587 4588 void remove(scope const char[] key) @trusted { 4589 if(attributes is null) 4590 return; 4591 auto current = attributes; 4592 InternalAttribute* previous; 4593 while(current) { 4594 if(current.key == key) 4595 break; 4596 previous = current; 4597 current = current.next; 4598 } 4599 if(current is null) 4600 return; 4601 if(previous is null) 4602 attributes = current.next; 4603 else 4604 previous.next = current.next; 4605 // assert(previous.next != cast(void*) 1); 4606 // assert(attributes != cast(void*) 1); 4607 } 4608 4609 void opIndexAssign(scope const char[] value, scope const char[] key) @trusted { 4610 if(attributes is null) { 4611 attributes = InternalAttribute.make(key, value); 4612 return; 4613 } 4614 auto current = attributes; 4615 4616 if(current.key == key) { 4617 if(current.value != value) { 4618 auto replacement = InternalAttribute.make(key, value); 4619 attributes = replacement; 4620 replacement.next = current.next; 4621 // assert(replacement.next != cast(void*) 1); 4622 // assert(attributes != cast(void*) 1); 4623 } 4624 return; 4625 } 4626 4627 while(current.next) { 4628 if(current.next.key == key) { 4629 if(current.next.value == value) 4630 return; // replacing immutable value with self, no change 4631 break; 4632 } 4633 current = current.next; 4634 } 4635 assert(current !is null); 4636 4637 auto replacement = InternalAttribute.make(key, value); 4638 if(current.next !is null) 4639 replacement.next = current.next.next; 4640 current.next = replacement; 4641 // assert(current.next != cast(void*) 1); 4642 // assert(replacement.next != cast(void*) 1); 4643 } 4644 4645 int opApply(int delegate(string key, string value) dg) const @trusted { 4646 const(InternalAttribute)* current = attributes; 4647 while(current !is null) { 4648 if(auto res = dg(cast(string) current.key, cast(string) current.value)) 4649 return res; 4650 current = current.next; 4651 } 4652 return 0; 4653 } 4654 } 4655 4656 unittest { 4657 AttributesHolder holder; 4658 holder["one"] = "1"; 4659 holder["two"] = "2"; 4660 holder["three"] = "3"; 4661 4662 { 4663 assert("one" in holder); 4664 assert("two" in holder); 4665 assert("three" in holder); 4666 assert("four" !in holder); 4667 4668 int count; 4669 foreach(k, v; holder) { 4670 switch(count) { 4671 case 0: assert(k == "one" && v == "1"); break; 4672 case 1: assert(k == "two" && v == "2"); break; 4673 case 2: assert(k == "three" && v == "3"); break; 4674 default: assert(0); 4675 } 4676 count++; 4677 } 4678 } 4679 4680 holder["two"] = "dos"; 4681 4682 { 4683 assert("one" in holder); 4684 assert("two" in holder); 4685 assert("three" in holder); 4686 assert("four" !in holder); 4687 4688 int count; 4689 foreach(k, v; holder) { 4690 switch(count) { 4691 case 0: assert(k == "one" && v == "1"); break; 4692 case 1: assert(k == "two" && v == "dos"); break; 4693 case 2: assert(k == "three" && v == "3"); break; 4694 default: assert(0); 4695 } 4696 count++; 4697 } 4698 } 4699 4700 holder["four"] = "4"; 4701 4702 { 4703 assert("one" in holder); 4704 assert("two" in holder); 4705 assert("three" in holder); 4706 assert("four" in holder); 4707 4708 int count; 4709 foreach(k, v; holder) { 4710 switch(count) { 4711 case 0: assert(k == "one" && v == "1"); break; 4712 case 1: assert(k == "two" && v == "dos"); break; 4713 case 2: assert(k == "three" && v == "3"); break; 4714 case 3: assert(k == "four" && v == "4"); break; 4715 default: assert(0); 4716 } 4717 count++; 4718 } 4719 } 4720 } 4721 4722 /// for style, i want to be able to set it with a string like a plain attribute, 4723 /// but also be able to do properties Javascript style. 4724 4725 /// Group: implementations 4726 struct ElementStyle { 4727 this(Element parent) { 4728 _element = parent; 4729 _attribute = _element.getAttribute("style"); 4730 originalAttribute = _attribute; 4731 } 4732 4733 ~this() { 4734 if(_attribute !is originalAttribute) 4735 _element.setAttribute("style", _attribute); 4736 } 4737 4738 Element _element; 4739 string _attribute; 4740 string originalAttribute; 4741 4742 /+ 4743 @property ref inout(string) _attribute() inout { 4744 auto s = "style" in _element.attributes; 4745 if(s is null) { 4746 auto e = cast() _element; // const_cast 4747 e.attributes["style"] = ""; // we need something to reference 4748 s = cast(inout) ("style" in e.attributes); 4749 } 4750 4751 assert(s !is null); 4752 return *s; 4753 } 4754 +/ 4755 4756 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 4757 4758 string set(string name, string value) { 4759 if(name.length == 0) 4760 return value; 4761 if(name == "cssFloat") 4762 name = "float"; 4763 else 4764 name = unCamelCase(name); 4765 auto r = rules(); 4766 r[name] = value; 4767 4768 _attribute = ""; 4769 foreach(k, v; r) { 4770 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 4771 continue; 4772 if(_attribute.length) 4773 _attribute ~= " "; 4774 _attribute ~= k ~ ": " ~ v ~ ";"; 4775 } 4776 4777 _element.setAttribute("style", _attribute); // this is to trigger the observer call 4778 4779 return value; 4780 } 4781 string get(string name) const { 4782 if(name == "cssFloat") 4783 name = "float"; 4784 else 4785 name = unCamelCase(name); 4786 auto r = rules(); 4787 if(name in r) 4788 return r[name]; 4789 return null; 4790 } 4791 4792 string[string] rules() const { 4793 string[string] ret; 4794 foreach(rule; _attribute.split(";")) { 4795 rule = rule.strip(); 4796 if(rule.length == 0) 4797 continue; 4798 auto idx = rule.indexOf(":"); 4799 if(idx == -1) 4800 ret[rule] = ""; 4801 else { 4802 auto name = rule[0 .. idx].strip(); 4803 auto value = rule[idx + 1 .. $].strip(); 4804 4805 ret[name] = value; 4806 } 4807 } 4808 4809 return ret; 4810 } 4811 4812 mixin JavascriptStyleDispatch!(); 4813 } 4814 4815 /// Converts a camel cased propertyName to a css style dashed property-name 4816 string unCamelCase(string a) { 4817 string ret; 4818 foreach(c; a) 4819 if((c >= 'A' && c <= 'Z')) 4820 ret ~= "-" ~ toLower("" ~ c)[0]; 4821 else 4822 ret ~= c; 4823 return ret; 4824 } 4825 4826 /// Translates a css style property-name to a camel cased propertyName 4827 string camelCase(string a) { 4828 string ret; 4829 bool justSawDash = false; 4830 foreach(c; a) 4831 if(c == '-') { 4832 justSawDash = true; 4833 } else { 4834 if(justSawDash) { 4835 justSawDash = false; 4836 ret ~= toUpper("" ~ c); 4837 } else 4838 ret ~= c; 4839 } 4840 return ret; 4841 } 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 // domconvenience ends } 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 // @safe: 4864 4865 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 4866 // Instead, override writeToAppender(); 4867 4868 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 4869 4870 // Stripping them is useful for reading php as html.... but adding them 4871 // is good for building php. 4872 4873 // I need to maintain compatibility with the way it is now too. 4874 4875 import std.string; 4876 import std.exception; 4877 import std.array; 4878 import std.range; 4879 4880 //import std.stdio; 4881 4882 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 4883 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 4884 // most likely a typo so I say kill kill kill. 4885 4886 4887 /++ 4888 This might belong in another module, but it represents a file with a mime type and some data. 4889 Document implements this interface with type = text/html (see Document.contentType for more info) 4890 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 4891 +/ 4892 /// Group: bonus_functionality 4893 interface FileResource { 4894 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 4895 @property string contentType() const; 4896 /// the data 4897 immutable(ubyte)[] getData() const; 4898 /++ 4899 filename, return null if none 4900 4901 History: 4902 Added December 25, 2020 4903 +/ 4904 @property string filename() const; 4905 } 4906 4907 4908 4909 4910 ///. 4911 /// Group: bonus_functionality 4912 enum NodeType { Text = 3 } 4913 4914 4915 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 4916 /// Group: core_functionality 4917 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 4918 in {} 4919 out(ret) { assert(ret !is null); } 4920 do { 4921 auto ret = cast(T) e; 4922 if(ret is null) 4923 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 4924 return ret; 4925 } 4926 4927 4928 ///. 4929 /// Group: core_functionality 4930 class DocumentFragment : Element { 4931 ///. 4932 this(Document _parentDocument) { 4933 tagName = "#fragment"; 4934 super(_parentDocument); 4935 } 4936 4937 /++ 4938 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4939 4940 Since: March 29, 2018 (or git tagged v2.1.0) 4941 +/ 4942 this(Html html) { 4943 this(null); 4944 4945 this.innerHTML = html.source; 4946 } 4947 4948 ///. 4949 override string writeToAppender(Appender!string where = appender!string()) const { 4950 return this.innerHTML(where); 4951 } 4952 4953 override string toPrettyStringImpl(bool insertComments, int indentationLevel, string indentWith) const { 4954 string s; 4955 foreach(child; children) 4956 s ~= child.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 4957 return s; 4958 } 4959 4960 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4961 /* 4962 override inout(Element) parentNode() inout { 4963 return children.length ? children[0].parentNode : null; 4964 } 4965 */ 4966 /+ 4967 override Element parentNode(Element p) { 4968 this.parentNode = p; 4969 foreach(child; children) 4970 child.parentNode = p; 4971 return p; 4972 } 4973 +/ 4974 } 4975 4976 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4977 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4978 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4979 /// 4980 /// The output parameter can be given to append to an existing buffer. You don't have to 4981 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4982 /// Group: core_functionality 4983 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4984 // if there's no entities, we can save a lot of time by not bothering with the 4985 // decoding loop. This check cuts the net toString time by better than half in my test. 4986 // let me know if it made your tests worse though, since if you use an entity in just about 4987 // every location, the check will add time... but I suspect the average experience is like mine 4988 // since the check gives up as soon as it can anyway. 4989 4990 bool shortcut = true; 4991 foreach(char c; data) { 4992 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4993 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4994 shortcut = false; // there's actual work to be done 4995 break; 4996 } 4997 } 4998 4999 if(shortcut) { 5000 output.put(data); 5001 return data; 5002 } 5003 5004 auto start = output.data.length; 5005 5006 output.reserve(data.length + 64); // grab some extra space for the encoded entities 5007 5008 foreach(dchar d; data) { 5009 if(d == '&') 5010 output.put("&"); 5011 else if (d == '<') 5012 output.put("<"); 5013 else if (d == '>') 5014 output.put(">"); 5015 else if (d == '\"') 5016 output.put("""); 5017 // else if (d == '\'') 5018 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 5019 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 5020 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 5021 // idk about apostrophes though. Might be worth it, might not. 5022 else if (!encodeNonAscii || (d < 128 && d > 0)) 5023 output.put(d); 5024 else 5025 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 5026 } 5027 5028 //assert(output !is null); // this fails on empty attributes..... 5029 return output.data[start .. $]; 5030 5031 // data = data.replace("\u00a0", " "); 5032 } 5033 5034 /// An alias for htmlEntitiesEncode; it works for xml too 5035 /// Group: core_functionality 5036 string xmlEntitiesEncode(string data) { 5037 return htmlEntitiesEncode(data); 5038 } 5039 5040 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 5041 /// Group: core_functionality 5042 dchar parseEntity(in dchar[] entity) { 5043 5044 char[128] buffer; 5045 int bpos; 5046 foreach(char c; entity[1 .. $-1]) 5047 buffer[bpos++] = c; 5048 char[] entityAsString = buffer[0 .. bpos]; 5049 5050 int min = 0; 5051 int max = cast(int) availableEntities.length; 5052 5053 keep_looking: 5054 if(min + 1 < max) { 5055 int spot = (max - min) / 2 + min; 5056 if(availableEntities[spot] == entityAsString) { 5057 return availableEntitiesValues[spot]; 5058 } else if(entityAsString < availableEntities[spot]) { 5059 max = spot; 5060 goto keep_looking; 5061 } else { 5062 min = spot; 5063 goto keep_looking; 5064 } 5065 } 5066 5067 switch(entity[1..$-1]) { 5068 case "quot": 5069 return '"'; 5070 case "apos": 5071 return '\''; 5072 case "lt": 5073 return '<'; 5074 case "gt": 5075 return '>'; 5076 case "amp": 5077 return '&'; 5078 // the next are html rather than xml 5079 5080 // and handling numeric entities 5081 default: 5082 if(entity[1] == '#') { 5083 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5084 auto hex = entity[3..$-1]; 5085 5086 auto p = intFromHex(to!string(hex).toLower()); 5087 return cast(dchar) p; 5088 } else { 5089 auto decimal = entity[2..$-1]; 5090 5091 // dealing with broken html entities 5092 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5093 decimal = decimal[1 .. $]; 5094 5095 while(decimal.length && (decimal[$-1] < '0' || decimal[$-1] > '9')) 5096 decimal = decimal[0 .. $ - 1]; 5097 5098 if(decimal.length == 0) 5099 return ' '; // this is really broken html 5100 // done with dealing with broken stuff 5101 5102 auto p = std.conv.to!int(decimal); 5103 return cast(dchar) p; 5104 } 5105 } else 5106 return '\ufffd'; // replacement character diamond thing 5107 } 5108 5109 assert(0); 5110 } 5111 5112 unittest { 5113 // not in the binary search 5114 assert(parseEntity("""d) == '"'); 5115 5116 // numeric value 5117 assert(parseEntity("Դ") == '\u0534'); 5118 5119 // not found at all 5120 assert(parseEntity("&asdasdasd;"d) == '\ufffd'); 5121 5122 // random values in the bin search 5123 assert(parseEntity("	"d) == '\t'); 5124 assert(parseEntity("»"d) == '\»'); 5125 5126 // near the middle and edges of the bin search 5127 assert(parseEntity("𝒶"d) == '\U0001d4b6'); 5128 assert(parseEntity("*"d) == '\u002a'); 5129 assert(parseEntity("Æ"d) == '\u00c6'); 5130 assert(parseEntity("‌"d) == '\u200c'); 5131 } 5132 5133 import std.utf; 5134 import std.stdio; 5135 5136 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5137 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5138 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5139 /// Group: core_functionality 5140 string htmlEntitiesDecode(string data, bool strict = false) { 5141 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5142 if(data.indexOf("&") == -1) // all html entities begin with & 5143 return data; // if there are no entities in here, we can return the original slice and save some time 5144 5145 char[] a; // this seems to do a *better* job than appender! 5146 5147 char[4] buffer; 5148 5149 bool tryingEntity = false; 5150 bool tryingNumericEntity = false; 5151 bool tryingHexEntity = false; 5152 dchar[16] entityBeingTried; 5153 int entityBeingTriedLength = 0; 5154 int entityAttemptIndex = 0; 5155 5156 foreach(dchar ch; data) { 5157 if(tryingEntity) { 5158 entityAttemptIndex++; 5159 entityBeingTried[entityBeingTriedLength++] = ch; 5160 5161 if(entityBeingTriedLength == 2 && ch == '#') { 5162 tryingNumericEntity = true; 5163 continue; 5164 } else if(tryingNumericEntity && entityBeingTriedLength == 3 && ch == 'x') { 5165 tryingHexEntity = true; 5166 continue; 5167 } 5168 5169 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5170 if(ch == '&') { 5171 if(strict) 5172 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5173 5174 // if not strict, let's try to parse both. 5175 5176 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") { 5177 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5178 } else { 5179 auto ch2 = parseEntity(entityBeingTried[0 .. entityBeingTriedLength]); 5180 if(ch2 == '\ufffd') { // either someone put this in intentionally (lol) or we failed to get it 5181 // but either way, just abort and keep the plain text 5182 foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now 5183 a ~= c; 5184 } else { 5185 a ~= buffer[0.. std.utf.encode(buffer, ch2)]; 5186 } 5187 } 5188 5189 // tryingEntity is still true 5190 goto new_entity; 5191 } else 5192 if(ch == ';') { 5193 tryingEntity = false; 5194 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5195 } else if(ch == ' ') { 5196 // e.g. you & i 5197 if(strict) 5198 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5199 else { 5200 tryingEntity = false; 5201 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]); 5202 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5203 } 5204 } else { 5205 if(tryingNumericEntity) { 5206 if(ch < '0' || ch > '9') { 5207 if(tryingHexEntity) { 5208 if(ch < 'A') 5209 goto trouble; 5210 if(ch > 'Z' && ch < 'a') 5211 goto trouble; 5212 if(ch > 'z') 5213 goto trouble; 5214 } else { 5215 trouble: 5216 if(strict) 5217 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5218 tryingEntity = false; 5219 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5220 a ~= ch; 5221 continue; 5222 } 5223 } 5224 } 5225 5226 5227 if(entityAttemptIndex >= 9) { 5228 done: 5229 if(strict) 5230 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5231 else { 5232 tryingEntity = false; 5233 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5234 } 5235 } 5236 } 5237 } else { 5238 if(ch == '&') { 5239 new_entity: 5240 tryingEntity = true; 5241 tryingNumericEntity = false; 5242 tryingHexEntity = false; 5243 entityBeingTriedLength = 0; 5244 entityBeingTried[entityBeingTriedLength++] = ch; 5245 entityAttemptIndex = 0; 5246 } else { 5247 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5248 } 5249 } 5250 } 5251 5252 if(tryingEntity) { 5253 if(strict) 5254 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5255 5256 // otherwise, let's try to recover, at least so we don't drop any data 5257 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5258 // FIXME: what if we have "cool &"? should we try to parse it? 5259 } 5260 5261 return cast(string) a; // assumeUnique is actually kinda slow, lol 5262 } 5263 5264 unittest { 5265 // error recovery 5266 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // unterminated turned back to thing 5267 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // semi-terminated... parse and carry on (is this really sane?) 5268 assert(htmlEntitiesDecode("loc=en_us&tracknum=111") == "loc=en_us&tracknum=111"); // a bit of both, seen in a real life email 5269 assert(htmlEntitiesDecode("& test") == "& test"); // unterminated, just abort 5270 5271 // in strict mode all of these should fail 5272 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5273 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5274 try { assert(htmlEntitiesDecode("loc=en_us&tracknum=111", true) == "<&foo"); assert(0); } catch(Exception e) { } 5275 try { assert(htmlEntitiesDecode("& test", true) == "& test"); assert(0); } catch(Exception e) { } 5276 5277 // correct cases that should pass the same in strict or loose mode 5278 foreach(strict; [false, true]) { 5279 assert(htmlEntitiesDecode("&hello» win", strict) == "&hello\» win"); 5280 } 5281 } 5282 5283 /// Group: implementations 5284 abstract class SpecialElement : Element { 5285 this(Document _parentDocument) { 5286 super(_parentDocument); 5287 } 5288 5289 ///. 5290 override Element appendChild(Element e) { 5291 assert(0, "Cannot append to a special node"); 5292 } 5293 5294 ///. 5295 @property override int nodeType() const { 5296 return 100; 5297 } 5298 } 5299 5300 ///. 5301 /// Group: implementations 5302 class RawSource : SpecialElement { 5303 ///. 5304 this(Document _parentDocument, string s) { 5305 super(_parentDocument); 5306 source = s; 5307 tagName = "#raw"; 5308 } 5309 5310 ///. 5311 override string nodeValue() const { 5312 return this.toString(); 5313 } 5314 5315 ///. 5316 override string writeToAppender(Appender!string where = appender!string()) const { 5317 where.put(source); 5318 return source; 5319 } 5320 5321 override string toPrettyStringImpl(bool, int, string) const { 5322 return source; 5323 } 5324 5325 5326 override RawSource cloneNode(bool deep) { 5327 return new RawSource(parentDocument, source); 5328 } 5329 5330 ///. 5331 string source; 5332 } 5333 5334 /// Group: implementations 5335 abstract class ServerSideCode : SpecialElement { 5336 this(Document _parentDocument, string type) { 5337 super(_parentDocument); 5338 tagName = "#" ~ type; 5339 } 5340 5341 ///. 5342 override string nodeValue() const { 5343 return this.source; 5344 } 5345 5346 ///. 5347 override string writeToAppender(Appender!string where = appender!string()) const { 5348 auto start = where.data.length; 5349 where.put("<"); 5350 where.put(source); 5351 where.put(">"); 5352 return where.data[start .. $]; 5353 } 5354 5355 override string toPrettyStringImpl(bool, int, string) const { 5356 return "<" ~ source ~ ">"; 5357 } 5358 5359 ///. 5360 string source; 5361 } 5362 5363 ///. 5364 /// Group: implementations 5365 class PhpCode : ServerSideCode { 5366 ///. 5367 this(Document _parentDocument, string s) { 5368 super(_parentDocument, "php"); 5369 source = s; 5370 } 5371 5372 override PhpCode cloneNode(bool deep) { 5373 return new PhpCode(parentDocument, source); 5374 } 5375 } 5376 5377 ///. 5378 /// Group: implementations 5379 class AspCode : ServerSideCode { 5380 ///. 5381 this(Document _parentDocument, string s) { 5382 super(_parentDocument, "asp"); 5383 source = s; 5384 } 5385 5386 override AspCode cloneNode(bool deep) { 5387 return new AspCode(parentDocument, source); 5388 } 5389 } 5390 5391 ///. 5392 /// Group: implementations 5393 class BangInstruction : SpecialElement { 5394 ///. 5395 this(Document _parentDocument, string s) { 5396 super(_parentDocument); 5397 source = s; 5398 tagName = "#bpi"; 5399 } 5400 5401 ///. 5402 override string nodeValue() const { 5403 return this.source; 5404 } 5405 5406 override BangInstruction cloneNode(bool deep) { 5407 return new BangInstruction(parentDocument, source); 5408 } 5409 5410 ///. 5411 override string writeToAppender(Appender!string where = appender!string()) const { 5412 auto start = where.data.length; 5413 where.put("<!"); 5414 where.put(source); 5415 where.put(">"); 5416 return where.data[start .. $]; 5417 } 5418 5419 override string toPrettyStringImpl(bool, int, string) const { 5420 string s; 5421 s ~= "<!"; 5422 s ~= source; 5423 s ~= ">"; 5424 return s; 5425 } 5426 5427 ///. 5428 string source; 5429 } 5430 5431 ///. 5432 /// Group: implementations 5433 class QuestionInstruction : SpecialElement { 5434 ///. 5435 this(Document _parentDocument, string s) { 5436 super(_parentDocument); 5437 source = s; 5438 tagName = "#qpi"; 5439 } 5440 5441 override QuestionInstruction cloneNode(bool deep) { 5442 return new QuestionInstruction(parentDocument, source); 5443 } 5444 5445 ///. 5446 override string nodeValue() const { 5447 return this.source; 5448 } 5449 5450 ///. 5451 override string writeToAppender(Appender!string where = appender!string()) const { 5452 auto start = where.data.length; 5453 where.put("<"); 5454 where.put(source); 5455 where.put(">"); 5456 return where.data[start .. $]; 5457 } 5458 5459 override string toPrettyStringImpl(bool, int, string) const { 5460 string s; 5461 s ~= "<"; 5462 s ~= source; 5463 s ~= ">"; 5464 return s; 5465 } 5466 5467 5468 ///. 5469 string source; 5470 } 5471 5472 ///. 5473 /// Group: implementations 5474 class HtmlComment : SpecialElement { 5475 ///. 5476 this(Document _parentDocument, string s) { 5477 super(_parentDocument); 5478 source = s; 5479 tagName = "#comment"; 5480 } 5481 5482 override HtmlComment cloneNode(bool deep) { 5483 return new HtmlComment(parentDocument, source); 5484 } 5485 5486 ///. 5487 override string nodeValue() const { 5488 return this.source; 5489 } 5490 5491 ///. 5492 override string writeToAppender(Appender!string where = appender!string()) const { 5493 auto start = where.data.length; 5494 where.put("<!--"); 5495 where.put(source); 5496 where.put("-->"); 5497 return where.data[start .. $]; 5498 } 5499 5500 override string toPrettyStringImpl(bool, int, string) const { 5501 string s; 5502 s ~= "<!--"; 5503 s ~= source; 5504 s ~= "-->"; 5505 return s; 5506 } 5507 5508 5509 ///. 5510 string source; 5511 } 5512 5513 5514 5515 5516 ///. 5517 /// Group: implementations 5518 class TextNode : Element { 5519 public: 5520 ///. 5521 this(Document _parentDocument, string e) { 5522 super(_parentDocument); 5523 contents = e; 5524 tagName = "#text"; 5525 } 5526 5527 /// 5528 this(string e) { 5529 this(null, e); 5530 } 5531 5532 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5533 5534 ///. 5535 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5536 auto e = new TextNode(_parentDocument, ""); 5537 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5538 return e; 5539 } 5540 5541 ///. 5542 override @property TextNode cloneNode(bool deep) { 5543 auto n = new TextNode(parentDocument, contents); 5544 return n; 5545 } 5546 5547 ///. 5548 override string nodeValue() const { 5549 return this.contents; //toString(); 5550 } 5551 5552 ///. 5553 @property override int nodeType() const { 5554 return NodeType.Text; 5555 } 5556 5557 ///. 5558 override string writeToAppender(Appender!string where = appender!string()) const { 5559 string s; 5560 if(contents.length) 5561 s = htmlEntitiesEncode(contents, where); 5562 else 5563 s = ""; 5564 5565 assert(s !is null); 5566 return s; 5567 } 5568 5569 override string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5570 string s; 5571 5572 string contents = this.contents; 5573 // we will first collapse the whitespace per html 5574 // sort of. note this can break stuff yo!!!! 5575 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5576 string n = ""; 5577 bool lastWasWhitespace = indentationLevel > 0; 5578 foreach(char c; contents) { 5579 if(c.isSimpleWhite) { 5580 if(!lastWasWhitespace) 5581 n ~= ' '; 5582 lastWasWhitespace = true; 5583 } else { 5584 n ~= c; 5585 lastWasWhitespace = false; 5586 } 5587 } 5588 5589 contents = n; 5590 } 5591 5592 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5593 contents = contents.strip; 5594 } 5595 5596 auto e = htmlEntitiesEncode(contents); 5597 import std.algorithm.iteration : splitter; 5598 bool first = true; 5599 foreach(line; splitter(e, "\n")) { 5600 if(first) { 5601 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5602 first = false; 5603 } else { 5604 s ~= "\n"; 5605 if(insertComments) 5606 s ~= "<!--"; 5607 foreach(i; 0 .. indentationLevel) 5608 s ~= "\t"; 5609 if(insertComments) 5610 s ~= "-->"; 5611 } 5612 s ~= line.stripRight; 5613 } 5614 return s; 5615 } 5616 5617 ///. 5618 override Element appendChild(Element e) { 5619 assert(0, "Cannot append to a text node"); 5620 } 5621 5622 ///. 5623 string contents; 5624 // alias contents content; // I just mistype this a lot, 5625 } 5626 5627 /** 5628 There are subclasses of Element offering improved helper 5629 functions for the element in HTML. 5630 */ 5631 5632 /++ 5633 Represents a HTML link. This provides some convenience methods for manipulating query strings, but otherwise is sthe same Element interface. 5634 5635 Please note this object may not be used for all `<a>` tags. 5636 +/ 5637 /// Group: implementations 5638 class Link : Element { 5639 5640 /++ 5641 Constructs `<a href="that href">that text</a>`. 5642 +/ 5643 this(string href, string text) { 5644 super("a"); 5645 setAttribute("href", href); 5646 innerText = text; 5647 } 5648 5649 /// ditto 5650 this(Document _parentDocument) { 5651 super(_parentDocument); 5652 this.tagName = "a"; 5653 } 5654 5655 /+ 5656 /// Returns everything in the href EXCEPT the query string 5657 @property string targetSansQuery() { 5658 5659 } 5660 5661 ///. 5662 @property string domainName() { 5663 5664 } 5665 5666 ///. 5667 @property string path 5668 +/ 5669 /// This gets a variable from the URL's query string. 5670 string getValue(string name) { 5671 auto vars = variablesHash(); 5672 if(name in vars) 5673 return vars[name]; 5674 return null; 5675 } 5676 5677 private string[string] variablesHash() { 5678 string href = getAttribute("href"); 5679 if(href is null) 5680 return null; 5681 5682 auto ques = href.indexOf("?"); 5683 string str = ""; 5684 if(ques != -1) { 5685 str = href[ques+1..$]; 5686 5687 auto fragment = str.indexOf("#"); 5688 if(fragment != -1) 5689 str = str[0..fragment]; 5690 } 5691 5692 string[] variables = str.split("&"); 5693 5694 string[string] hash; 5695 5696 foreach(var; variables) { 5697 auto index = var.indexOf("="); 5698 if(index == -1) 5699 hash[var] = ""; 5700 else { 5701 hash[decodeUriComponent(var[0..index])] = decodeUriComponent(var[index + 1 .. $]); 5702 } 5703 } 5704 5705 return hash; 5706 } 5707 5708 /// Replaces all the stuff after a ? in the link at once with the given assoc array values. 5709 /*private*/ void updateQueryString(string[string] vars) { 5710 string href = getAttribute("href"); 5711 5712 auto question = href.indexOf("?"); 5713 if(question != -1) 5714 href = href[0..question]; 5715 5716 string frag = ""; 5717 auto fragment = href.indexOf("#"); 5718 if(fragment != -1) { 5719 frag = href[fragment..$]; 5720 href = href[0..fragment]; 5721 } 5722 5723 string query = "?"; 5724 bool first = true; 5725 foreach(name, value; vars) { 5726 if(!first) 5727 query ~= "&"; 5728 else 5729 first = false; 5730 5731 query ~= encodeUriComponent(name); 5732 if(value.length) 5733 query ~= "=" ~ encodeUriComponent(value); 5734 } 5735 5736 if(query != "?") 5737 href ~= query; 5738 5739 href ~= frag; 5740 5741 setAttribute("href", href); 5742 } 5743 5744 /// Sets or adds the variable with the given name to the given value 5745 /// It automatically URI encodes the values and takes care of the ? and &. 5746 override void setValue(string name, string variable) { 5747 auto vars = variablesHash(); 5748 vars[name] = variable; 5749 5750 updateQueryString(vars); 5751 } 5752 5753 /// Removes the given variable from the query string 5754 void removeValue(string name) { 5755 auto vars = variablesHash(); 5756 vars.remove(name); 5757 5758 updateQueryString(vars); 5759 } 5760 5761 /* 5762 ///. 5763 override string toString() { 5764 5765 } 5766 5767 ///. 5768 override string getAttribute(string name) { 5769 if(name == "href") { 5770 5771 } else 5772 return super.getAttribute(name); 5773 } 5774 */ 5775 } 5776 5777 /++ 5778 Represents a HTML form. This slightly specializes Element to add a few more convenience methods for adding and extracting form data. 5779 5780 Please note this object may not be used for all `<form>` tags. 5781 +/ 5782 /// Group: implementations 5783 class Form : Element { 5784 5785 ///. 5786 this(Document _parentDocument) { 5787 super(_parentDocument); 5788 tagName = "form"; 5789 } 5790 5791 /// Overrides of the base class implementations that more confirm to *my* conventions when writing form html. 5792 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 5793 auto t = this.querySelector("fieldset div"); 5794 if(t is null) 5795 return super.addField(label, name, type, fieldOptions); 5796 else 5797 return t.addField(label, name, type, fieldOptions); 5798 } 5799 5800 /// ditto 5801 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 5802 auto type = "text"; 5803 auto t = this.querySelector("fieldset div"); 5804 if(t is null) 5805 return super.addField(label, name, type, fieldOptions); 5806 else 5807 return t.addField(label, name, type, fieldOptions); 5808 } 5809 5810 /// ditto 5811 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 5812 auto t = this.querySelector("fieldset div"); 5813 if(t is null) 5814 return super.addField(label, name, options, fieldOptions); 5815 else 5816 return t.addField(label, name, options, fieldOptions); 5817 } 5818 5819 /// ditto 5820 override void setValue(string field, string value) { 5821 setValue(field, value, true); 5822 } 5823 5824 // FIXME: doesn't handle arrays; multiple fields can have the same name 5825 5826 /// Set's the form field's value. For input boxes, this sets the value attribute. For 5827 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 5828 /// the checked/selected attribute from all, and adds it to the one matching the value. 5829 /// For checkboxes, if the value is non-null and not empty, it checks the box. 5830 5831 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 5832 /// Otherwise, it makes a new input with type=hidden to keep the value. 5833 void setValue(string field, string value, bool makeNew) { 5834 auto eles = getField(field); 5835 if(eles.length == 0) { 5836 if(makeNew) { 5837 addInput(field, value); 5838 return; 5839 } else 5840 throw new Exception("form field does not exist"); 5841 } 5842 5843 if(eles.length == 1) { 5844 auto e = eles[0]; 5845 switch(e.tagName) { 5846 default: assert(0); 5847 case "textarea": 5848 e.innerText = value; 5849 break; 5850 case "input": 5851 string type = e.getAttribute("type"); 5852 if(type is null) { 5853 e.value = value; 5854 return; 5855 } 5856 switch(type) { 5857 case "checkbox": 5858 case "radio": 5859 if(value.length && value != "false") 5860 e.setAttribute("checked", "checked"); 5861 else 5862 e.removeAttribute("checked"); 5863 break; 5864 default: 5865 e.value = value; 5866 return; 5867 } 5868 break; 5869 case "select": 5870 bool found = false; 5871 foreach(child; e.tree) { 5872 if(child.tagName != "option") 5873 continue; 5874 string val = child.getAttribute("value"); 5875 if(val is null) 5876 val = child.innerText; 5877 if(val == value) { 5878 child.setAttribute("selected", "selected"); 5879 found = true; 5880 } else 5881 child.removeAttribute("selected"); 5882 } 5883 5884 if(!found) { 5885 e.addChild("option", value) 5886 .setAttribute("selected", "selected"); 5887 } 5888 break; 5889 } 5890 } else { 5891 // assume radio boxes 5892 foreach(e; eles) { 5893 string val = e.getAttribute("value"); 5894 //if(val is null) 5895 // throw new Exception("don't know what to do with radio boxes with null value"); 5896 if(val == value) 5897 e.setAttribute("checked", "checked"); 5898 else 5899 e.removeAttribute("checked"); 5900 } 5901 } 5902 } 5903 5904 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 5905 /// it makes no attempt to find and modify existing elements in the form to the new values. 5906 void addValueArray(string key, string[] arrayOfValues) { 5907 foreach(arr; arrayOfValues) 5908 addChild("input", key, arr); 5909 } 5910 5911 /// Gets the value of the field; what would be given if it submitted right now. (so 5912 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 5913 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 5914 string getValue(string field) { 5915 auto eles = getField(field); 5916 if(eles.length == 0) 5917 return ""; 5918 if(eles.length == 1) { 5919 auto e = eles[0]; 5920 switch(e.tagName) { 5921 default: assert(0); 5922 case "input": 5923 if(e.type == "checkbox") { 5924 if(e.checked) 5925 return e.value.length ? e.value : "checked"; 5926 return ""; 5927 } else 5928 return e.value; 5929 case "textarea": 5930 return e.innerText; 5931 case "select": 5932 foreach(child; e.tree) { 5933 if(child.tagName != "option") 5934 continue; 5935 if(child.selected) 5936 return child.value; 5937 } 5938 break; 5939 } 5940 } else { 5941 // assuming radio 5942 foreach(e; eles) { 5943 if(e.checked) 5944 return e.value; 5945 } 5946 } 5947 5948 return ""; 5949 } 5950 5951 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 5952 /++ 5953 Returns the form's contents in application/x-www-form-urlencoded format. 5954 5955 Bugs: 5956 Doesn't handle repeated elements of the same name nor files. 5957 +/ 5958 string getPostableData() { 5959 bool[string] namesDone; 5960 5961 string ret; 5962 bool outputted = false; 5963 5964 foreach(e; getElementsBySelector("[name]")) { 5965 if(e.name in namesDone) 5966 continue; 5967 5968 if(outputted) 5969 ret ~= "&"; 5970 else 5971 outputted = true; 5972 5973 ret ~= encodeUriComponent(e.name) ~ "=" ~ encodeUriComponent(getValue(e.name)); 5974 5975 namesDone[e.name] = true; 5976 } 5977 5978 return ret; 5979 } 5980 5981 /// Gets the actual elements with the given name 5982 Element[] getField(string name) { 5983 Element[] ret; 5984 foreach(e; tree) { 5985 if(e.name == name) 5986 ret ~= e; 5987 } 5988 return ret; 5989 } 5990 5991 /// Grabs the <label> with the given for tag, if there is one. 5992 Element getLabel(string forId) { 5993 foreach(e; tree) 5994 if(e.tagName == "label" && e.getAttribute("for") == forId) 5995 return e; 5996 return null; 5997 } 5998 5999 /// Adds a new INPUT field to the end of the form with the given attributes. 6000 Element addInput(string name, string value, string type = "hidden") { 6001 auto e = new Element(parentDocument, "input", null, true); 6002 e.name = name; 6003 e.value = value; 6004 e.type = type; 6005 6006 appendChild(e); 6007 6008 return e; 6009 } 6010 6011 /// Removes the given field from the form. It finds the element and knocks it right out. 6012 void removeField(string name) { 6013 foreach(e; getField(name)) 6014 e.parentNode.removeChild(e); 6015 } 6016 6017 /+ 6018 /// Returns all form members. 6019 @property Element[] elements() { 6020 6021 } 6022 6023 ///. 6024 string opDispatch(string name)(string v = null) 6025 // filter things that should actually be attributes on the form 6026 if( name != "method" && name != "action" && name != "enctype" 6027 && name != "style" && name != "name" && name != "id" && name != "class") 6028 { 6029 6030 } 6031 +/ 6032 /+ 6033 void submit() { 6034 // take its elements and submit them through http 6035 } 6036 +/ 6037 } 6038 6039 import std.conv; 6040 6041 /++ 6042 Represents a HTML table. Has some convenience methods for working with tabular data. 6043 +/ 6044 /// Group: implementations 6045 class Table : Element { 6046 6047 /// You can make this yourself but you'd generally get one of these object out of a html parse or [Element.make] call. 6048 this(Document _parentDocument) { 6049 super(_parentDocument); 6050 tagName = "table"; 6051 } 6052 6053 /++ 6054 Creates an element with the given type and content. The argument can be an Element, Html, or other data which is converted to text with `to!string` 6055 6056 The element is $(I not) appended to the table. 6057 +/ 6058 Element th(T)(T t) { 6059 Element e; 6060 if(parentDocument !is null) 6061 e = parentDocument.createElement("th"); 6062 else 6063 e = Element.make("th"); 6064 static if(is(T == Html)) 6065 e.innerHTML = t; 6066 else static if(is(T : Element)) 6067 e.appendChild(t); 6068 else 6069 e.innerText = to!string(t); 6070 return e; 6071 } 6072 6073 /// ditto 6074 Element td(T)(T t) { 6075 Element e; 6076 if(parentDocument !is null) 6077 e = parentDocument.createElement("td"); 6078 else 6079 e = Element.make("td"); 6080 static if(is(T == Html)) 6081 e.innerHTML = t; 6082 else static if(is(T : Element)) 6083 e.appendChild(t); 6084 else 6085 e.innerText = to!string(t); 6086 return e; 6087 } 6088 6089 /++ 6090 Passes each argument to the [th] method for `appendHeaderRow` or [td] method for the others, appends them all to the `<tbody>` element for `appendRow`, `<thead>` element for `appendHeaderRow`, or a `<tfoot>` element for `appendFooterRow`, and ensures it is appended it to the table. 6091 +/ 6092 Element appendHeaderRow(T...)(T t) { 6093 return appendRowInternal("th", "thead", t); 6094 } 6095 6096 /// ditto 6097 Element appendFooterRow(T...)(T t) { 6098 return appendRowInternal("td", "tfoot", t); 6099 } 6100 6101 /// ditto 6102 Element appendRow(T...)(T t) { 6103 return appendRowInternal("td", "tbody", t); 6104 } 6105 6106 /++ 6107 Takes each argument as a class name and calls [Element.addClass] for each element in the column associated with that index. 6108 6109 Please note this does not use the html `<col>` element. 6110 +/ 6111 void addColumnClasses(string[] classes...) { 6112 auto grid = getGrid(); 6113 foreach(row; grid) 6114 foreach(i, cl; classes) { 6115 if(cl.length) 6116 if(i < row.length) 6117 row[i].addClass(cl); 6118 } 6119 } 6120 6121 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6122 Element row = Element.make("tr"); 6123 6124 foreach(e; t) { 6125 static if(is(typeof(e) : Element)) { 6126 if(e.tagName == "td" || e.tagName == "th") 6127 row.appendChild(e); 6128 else { 6129 Element a = Element.make(innerType); 6130 6131 a.appendChild(e); 6132 6133 row.appendChild(a); 6134 } 6135 } else static if(is(typeof(e) == Html)) { 6136 Element a = Element.make(innerType); 6137 a.innerHTML = e.source; 6138 row.appendChild(a); 6139 } else static if(is(typeof(e) == Element[])) { 6140 Element a = Element.make(innerType); 6141 foreach(ele; e) 6142 a.appendChild(ele); 6143 row.appendChild(a); 6144 } else static if(is(typeof(e) == string[])) { 6145 foreach(ele; e) { 6146 Element a = Element.make(innerType); 6147 a.innerText = to!string(ele); 6148 row.appendChild(a); 6149 } 6150 } else { 6151 Element a = Element.make(innerType); 6152 a.innerText = to!string(e); 6153 row.appendChild(a); 6154 } 6155 } 6156 6157 foreach(e; children) { 6158 if(e.tagName == findType) { 6159 e.appendChild(row); 6160 return row; 6161 } 6162 } 6163 6164 // the type was not found if we are here... let's add it so it is well-formed 6165 auto lol = this.addChild(findType); 6166 lol.appendChild(row); 6167 6168 return row; 6169 } 6170 6171 /// Returns the `<caption>` element of the table, creating one if it isn't there. 6172 Element captionElement() { 6173 Element cap; 6174 foreach(c; children) { 6175 if(c.tagName == "caption") { 6176 cap = c; 6177 break; 6178 } 6179 } 6180 6181 if(cap is null) { 6182 cap = Element.make("caption"); 6183 appendChild(cap); 6184 } 6185 6186 return cap; 6187 } 6188 6189 /// Returns or sets the text inside the `<caption>` element, creating that element if it isnt' there. 6190 @property string caption() { 6191 return captionElement().innerText; 6192 } 6193 6194 /// ditto 6195 @property void caption(string text) { 6196 captionElement().innerText = text; 6197 } 6198 6199 /// Gets the logical layout of the table as a rectangular grid of 6200 /// cells. It considers rowspan and colspan. A cell with a large 6201 /// span is represented in the grid by being referenced several times. 6202 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6203 /// <tfoot> portion if you pass one. 6204 /// 6205 /// Note: the rectangular grid might include null cells. 6206 /// 6207 /// This is kinda expensive so you should call once when you want the grid, 6208 /// then do lookups on the returned array. 6209 TableCell[][] getGrid(Element tablePortition = null) 6210 in { 6211 if(tablePortition is null) 6212 assert(tablePortition is null); 6213 else { 6214 assert(tablePortition !is null); 6215 assert(tablePortition.parentNode is this); 6216 assert( 6217 tablePortition.tagName == "tbody" 6218 || 6219 tablePortition.tagName == "tfoot" 6220 || 6221 tablePortition.tagName == "thead" 6222 ); 6223 } 6224 } 6225 do { 6226 if(tablePortition is null) 6227 tablePortition = this; 6228 6229 TableCell[][] ret; 6230 6231 // FIXME: will also return rows of sub tables! 6232 auto rows = tablePortition.getElementsByTagName("tr"); 6233 ret.length = rows.length; 6234 6235 int maxLength = 0; 6236 6237 int insertCell(int row, int position, TableCell cell) { 6238 if(row >= ret.length) 6239 return position; // not supposed to happen - a rowspan is prolly too big. 6240 6241 if(position == -1) { 6242 position++; 6243 foreach(item; ret[row]) { 6244 if(item is null) 6245 break; 6246 position++; 6247 } 6248 } 6249 6250 if(position < ret[row].length) 6251 ret[row][position] = cell; 6252 else 6253 foreach(i; ret[row].length .. position + 1) { 6254 if(i == position) 6255 ret[row] ~= cell; 6256 else 6257 ret[row] ~= null; 6258 } 6259 return position; 6260 } 6261 6262 foreach(i, rowElement; rows) { 6263 auto row = cast(TableRow) rowElement; 6264 assert(row !is null); 6265 assert(i < ret.length); 6266 6267 int position = 0; 6268 foreach(cellElement; rowElement.childNodes) { 6269 auto cell = cast(TableCell) cellElement; 6270 if(cell is null) 6271 continue; 6272 6273 // FIXME: colspan == 0 or rowspan == 0 6274 // is supposed to mean fill in the rest of 6275 // the table, not skip it 6276 foreach(int j; 0 .. cell.colspan) { 6277 foreach(int k; 0 .. cell.rowspan) 6278 // if the first row, always append. 6279 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6280 position++; 6281 } 6282 } 6283 6284 if(ret[i].length > maxLength) 6285 maxLength = cast(int) ret[i].length; 6286 } 6287 6288 // want to ensure it's rectangular 6289 foreach(ref r; ret) { 6290 foreach(i; r.length .. maxLength) 6291 r ~= null; 6292 } 6293 6294 return ret; 6295 } 6296 } 6297 6298 /// Represents a table row element - a <tr> 6299 /// Group: implementations 6300 class TableRow : Element { 6301 ///. 6302 this(Document _parentDocument) { 6303 super(_parentDocument); 6304 tagName = "tr"; 6305 } 6306 6307 // FIXME: the standard says there should be a lot more in here, 6308 // but meh, I never use it and it's a pain to implement. 6309 } 6310 6311 /// Represents anything that can be a table cell - <td> or <th> html. 6312 /// Group: implementations 6313 class TableCell : Element { 6314 ///. 6315 this(Document _parentDocument, string _tagName) { 6316 super(_parentDocument, _tagName); 6317 } 6318 6319 /// Gets and sets the row/colspan attributes as integers 6320 @property int rowspan() const { 6321 int ret = 1; 6322 auto it = getAttribute("rowspan"); 6323 if(it.length) 6324 ret = to!int(it); 6325 return ret; 6326 } 6327 6328 /// ditto 6329 @property int colspan() const { 6330 int ret = 1; 6331 auto it = getAttribute("colspan"); 6332 if(it.length) 6333 ret = to!int(it); 6334 return ret; 6335 } 6336 6337 /// ditto 6338 @property int rowspan(int i) { 6339 setAttribute("rowspan", to!string(i)); 6340 return i; 6341 } 6342 6343 /// ditto 6344 @property int colspan(int i) { 6345 setAttribute("colspan", to!string(i)); 6346 return i; 6347 } 6348 6349 } 6350 6351 6352 /// This is thrown on parse errors. 6353 /// Group: implementations 6354 class MarkupException : Exception { 6355 6356 ///. 6357 this(string message, string file = __FILE__, size_t line = __LINE__) { 6358 super(message, file, line); 6359 } 6360 } 6361 6362 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6363 /// Group: implementations 6364 class ElementNotFoundException : Exception { 6365 6366 /// type == kind of element you were looking for and search == a selector describing the search. 6367 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6368 this.searchContext = searchContext; 6369 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6370 } 6371 6372 Element searchContext; 6373 } 6374 6375 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6376 /// 6377 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6378 /// Group: core_functionality 6379 struct Html { 6380 /// This string holds the actual html. Use it to retrieve the contents. 6381 string source; 6382 } 6383 6384 // for the observers 6385 enum DomMutationOperations { 6386 setAttribute, 6387 removeAttribute, 6388 appendChild, // tagname, attributes[], innerHTML 6389 insertBefore, 6390 truncateChildren, 6391 removeChild, 6392 appendHtml, 6393 replaceHtml, 6394 appendText, 6395 replaceText, 6396 replaceTextOnly 6397 } 6398 6399 // and for observers too 6400 struct DomMutationEvent { 6401 DomMutationOperations operation; 6402 Element target; 6403 Element related; // what this means differs with the operation 6404 Element related2; 6405 string relatedString; 6406 string relatedString2; 6407 } 6408 6409 6410 private immutable static string[] htmlSelfClosedElements = [ 6411 // html 4 6412 "area","base","br","col","hr","img","input","link","meta","param", 6413 6414 // html 5 6415 "embed","source","track","wbr" 6416 ]; 6417 6418 private immutable static string[] htmlRawSourceElements = [ 6419 "script", "style" 6420 ]; 6421 6422 private immutable static string[] htmlInlineElements = [ 6423 "span", "strong", "em", "b", "i", "a" 6424 ]; 6425 6426 6427 static import std.conv; 6428 6429 /// helper function for decoding html entities 6430 int intFromHex(string hex) { 6431 int place = 1; 6432 int value = 0; 6433 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6434 int v; 6435 char q = hex[a]; 6436 if( q >= '0' && q <= '9') 6437 v = q - '0'; 6438 else if (q >= 'a' && q <= 'f') 6439 v = q - 'a' + 10; 6440 else if (q >= 'A' && q <= 'F') 6441 v = q - 'A' + 10; 6442 else throw new Exception("Illegal hex character: " ~ q); 6443 6444 value += v * place; 6445 6446 place *= 16; 6447 } 6448 6449 return value; 6450 } 6451 6452 6453 // CSS selector handling 6454 6455 // EXTENSIONS 6456 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6457 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6458 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6459 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6460 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6461 6462 6463 6464 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6465 // That might be useful to implement, though I do have parent selectors too. 6466 6467 ///. 6468 static immutable string[] selectorTokens = [ 6469 // It is important that the 2 character possibilities go first here for accurate lexing 6470 "~=", "*=", "|=", "^=", "$=", "!=", 6471 "::", ">>", 6472 "<<", // my any-parent extension (reciprocal of whitespace) 6473 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6474 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6475 ]; // other is white space or a name. 6476 6477 ///. 6478 sizediff_t idToken(string str, sizediff_t position) { 6479 sizediff_t tid = -1; 6480 char c = str[position]; 6481 foreach(a, token; selectorTokens) 6482 6483 if(c == token[0]) { 6484 if(token.length > 1) { 6485 if(position + 1 >= str.length || str[position+1] != token[1]) 6486 continue; // not this token 6487 } 6488 tid = a; 6489 break; 6490 } 6491 return tid; 6492 } 6493 6494 /// Parts of the CSS selector implementation 6495 // look, ma, no phobos! 6496 // new lexer by ketmar 6497 string[] lexSelector (string selstr) { 6498 6499 static sizediff_t idToken (string str, size_t stpos) { 6500 char c = str[stpos]; 6501 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6502 if (c == token[0]) { 6503 if (token.length > 1) { 6504 assert(token.length == 2, token); // we don't have 3-char tokens yet 6505 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6506 } 6507 return tidx; 6508 } 6509 } 6510 return -1; 6511 } 6512 6513 // skip spaces and comments 6514 static string removeLeadingBlanks (string str) { 6515 size_t curpos = 0; 6516 while (curpos < str.length) { 6517 immutable char ch = str[curpos]; 6518 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6519 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6520 // comment 6521 curpos += 2; 6522 while (curpos < str.length) { 6523 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6524 curpos += 2; 6525 break; 6526 } 6527 ++curpos; 6528 } 6529 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6530 ++curpos; 6531 6532 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6533 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6534 // That is not the same as ".foo.bar". If the space is stripped, important 6535 // information is lost, despite the tokens being separatable anyway. 6536 // 6537 // The parser really needs to be aware of the presence of a space. 6538 } else { 6539 break; 6540 } 6541 } 6542 return str[curpos..$]; 6543 } 6544 6545 static bool isBlankAt() (string str, size_t pos) { 6546 // we should consider unicode spaces too, but... unicode sux anyway. 6547 return 6548 (pos < str.length && // in string 6549 (str[pos] <= 32 || // space 6550 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6551 } 6552 6553 string[] tokens; 6554 // lexx it! 6555 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6556 if(selstr[0] == '\"' || selstr[0] == '\'') { 6557 auto end = selstr[0]; 6558 auto pos = 1; 6559 bool escaping; 6560 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6561 if(escaping) 6562 escaping = false; 6563 else if(selstr[pos] == '\\') 6564 escaping = true; 6565 pos++; 6566 } 6567 6568 // FIXME: do better unescaping 6569 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6570 if(pos+1 >= selstr.length) 6571 assert(0, selstr); 6572 selstr = selstr[pos + 1.. $]; 6573 continue; 6574 } 6575 6576 6577 // no tokens starts with escape 6578 immutable tid = idToken(selstr, 0); 6579 if (tid >= 0) { 6580 // special token 6581 tokens ~= selectorTokens[tid]; // it's funnier this way 6582 selstr = selstr[selectorTokens[tid].length..$]; 6583 continue; 6584 } 6585 // from start to space or special token 6586 size_t escapePos = size_t.max; 6587 size_t curpos = 0; // i can has chizburger^w escape at the start 6588 while (curpos < selstr.length) { 6589 if (selstr[curpos] == '\\') { 6590 // this is escape, just skip it and next char 6591 if (escapePos == size_t.max) escapePos = curpos; 6592 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6593 } else { 6594 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6595 ++curpos; 6596 } 6597 } 6598 // identifier 6599 if (escapePos != size_t.max) { 6600 // i hate it when it happens 6601 string id = selstr[0..escapePos]; 6602 while (escapePos < curpos) { 6603 if (curpos-escapePos < 2) break; 6604 id ~= selstr[escapePos+1]; // escaped char 6605 escapePos += 2; 6606 immutable stp = escapePos; 6607 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6608 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6609 } 6610 if (id.length > 0) tokens ~= id; 6611 } else { 6612 tokens ~= selstr[0..curpos]; 6613 } 6614 selstr = selstr[curpos..$]; 6615 } 6616 return tokens; 6617 } 6618 version(unittest_domd_lexer) unittest { 6619 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6620 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6621 assert(lexSelector(r" < <") == ["<", "<"]); 6622 assert(lexSelector(r" <<") == ["<<"]); 6623 assert(lexSelector(r" <</") == ["<<", "/"]); 6624 assert(lexSelector(r" <</*") == ["<<"]); 6625 assert(lexSelector(r" <\</*") == ["<", "<"]); 6626 assert(lexSelector(r"heh\") == ["heh"]); 6627 assert(lexSelector(r"alice \") == ["alice"]); 6628 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6629 } 6630 6631 /// ditto 6632 struct SelectorPart { 6633 string tagNameFilter; ///. 6634 string[] attributesPresent; /// [attr] 6635 string[2][] attributesEqual; /// [attr=value] 6636 string[2][] attributesStartsWith; /// [attr^=value] 6637 string[2][] attributesEndsWith; /// [attr$=value] 6638 // split it on space, then match to these 6639 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6640 // split it on dash, then match to these 6641 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6642 string[2][] attributesInclude; /// [attr*=value] 6643 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6644 6645 string[] hasSelectors; /// :has(this) 6646 string[] notSelectors; /// :not(this) 6647 6648 string[] isSelectors; /// :is(this) 6649 string[] whereSelectors; /// :where(this) 6650 6651 ParsedNth[] nthOfType; /// . 6652 ParsedNth[] nthLastOfType; /// . 6653 ParsedNth[] nthChild; /// . 6654 6655 bool firstChild; ///. 6656 bool lastChild; ///. 6657 6658 bool firstOfType; /// . 6659 bool lastOfType; /// . 6660 6661 bool emptyElement; ///. 6662 bool whitespaceOnly; /// 6663 bool oddChild; ///. 6664 bool evenChild; ///. 6665 6666 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6667 6668 bool rootElement; ///. 6669 6670 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6671 6672 bool isCleanSlateExceptSeparation() { 6673 auto cp = this; 6674 cp.separation = -1; 6675 return cp is SelectorPart.init; 6676 } 6677 6678 ///. 6679 string toString() { 6680 string ret; 6681 switch(separation) { 6682 default: assert(0); 6683 case -1: break; 6684 case 0: ret ~= " "; break; 6685 case 1: ret ~= " > "; break; 6686 case 2: ret ~= " + "; break; 6687 case 3: ret ~= " ~ "; break; 6688 case 4: ret ~= " < "; break; 6689 } 6690 ret ~= tagNameFilter; 6691 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6692 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6693 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6694 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6695 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6696 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6697 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6698 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6699 6700 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6701 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6702 6703 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 6704 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 6705 6706 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6707 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6708 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6709 6710 if(firstChild) ret ~= ":first-child"; 6711 if(lastChild) ret ~= ":last-child"; 6712 if(firstOfType) ret ~= ":first-of-type"; 6713 if(lastOfType) ret ~= ":last-of-type"; 6714 if(emptyElement) ret ~= ":empty"; 6715 if(whitespaceOnly) ret ~= ":whitespace-only"; 6716 if(oddChild) ret ~= ":odd-child"; 6717 if(evenChild) ret ~= ":even-child"; 6718 if(rootElement) ret ~= ":root"; 6719 if(scopeElement) ret ~= ":scope"; 6720 6721 return ret; 6722 } 6723 6724 // USEFUL 6725 /// Returns true if the given element matches this part 6726 bool matchElement(Element e, Element scopeElementNow = null) { 6727 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 6728 // Each individual call is reasonably fast already, but it adds up. 6729 if(e is null) return false; 6730 if(e.nodeType != 1) return false; 6731 6732 if(tagNameFilter != "" && tagNameFilter != "*") 6733 if(e.tagName != tagNameFilter) 6734 return false; 6735 if(firstChild) { 6736 if(e.parentNode is null) 6737 return false; 6738 if(e.parentNode.childElements[0] !is e) 6739 return false; 6740 } 6741 if(lastChild) { 6742 if(e.parentNode is null) 6743 return false; 6744 auto ce = e.parentNode.childElements; 6745 if(ce[$-1] !is e) 6746 return false; 6747 } 6748 if(firstOfType) { 6749 if(e.parentNode is null) 6750 return false; 6751 auto ce = e.parentNode.childElements; 6752 foreach(c; ce) { 6753 if(c.tagName == e.tagName) { 6754 if(c is e) 6755 return true; 6756 else 6757 return false; 6758 } 6759 } 6760 } 6761 if(lastOfType) { 6762 if(e.parentNode is null) 6763 return false; 6764 auto ce = e.parentNode.childElements; 6765 foreach_reverse(c; ce) { 6766 if(c.tagName == e.tagName) { 6767 if(c is e) 6768 return true; 6769 else 6770 return false; 6771 } 6772 } 6773 } 6774 if(scopeElement) { 6775 if(e !is scopeElementNow) 6776 return false; 6777 } 6778 if(emptyElement) { 6779 if(e.isEmpty()) 6780 return false; 6781 } 6782 if(whitespaceOnly) { 6783 if(e.innerText.strip.length) 6784 return false; 6785 } 6786 if(rootElement) { 6787 if(e.parentNode !is null) 6788 return false; 6789 } 6790 if(oddChild || evenChild) { 6791 if(e.parentNode is null) 6792 return false; 6793 foreach(i, child; e.parentNode.childElements) { 6794 if(child is e) { 6795 if(oddChild && !(i&1)) 6796 return false; 6797 if(evenChild && (i&1)) 6798 return false; 6799 break; 6800 } 6801 } 6802 } 6803 6804 bool matchWithSeparator(string attr, string value, string separator) { 6805 foreach(s; attr.split(separator)) 6806 if(s == value) 6807 return true; 6808 return false; 6809 } 6810 6811 foreach(a; attributesPresent) 6812 if(a !in e.attributes) 6813 return false; 6814 foreach(a; attributesEqual) 6815 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 6816 return false; 6817 foreach(a; attributesNotEqual) 6818 // FIXME: maybe it should say null counts... this just bit me. 6819 // I did [attr][attr!=value] to work around. 6820 // 6821 // if it's null, it's not equal, right? 6822 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 6823 if(e.getAttribute(a[0]) == a[1]) 6824 return false; 6825 foreach(a; attributesInclude) 6826 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 6827 return false; 6828 foreach(a; attributesStartsWith) 6829 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 6830 return false; 6831 foreach(a; attributesEndsWith) 6832 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 6833 return false; 6834 foreach(a; attributesIncludesSeparatedBySpaces) 6835 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 6836 return false; 6837 foreach(a; attributesIncludesSeparatedByDashes) 6838 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 6839 return false; 6840 foreach(a; hasSelectors) { 6841 if(e.querySelector(a) is null) 6842 return false; 6843 } 6844 foreach(a; notSelectors) { 6845 auto sel = Selector(a); 6846 if(sel.matchesElement(e)) 6847 return false; 6848 } 6849 foreach(a; isSelectors) { 6850 auto sel = Selector(a); 6851 if(!sel.matchesElement(e)) 6852 return false; 6853 } 6854 foreach(a; whereSelectors) { 6855 auto sel = Selector(a); 6856 if(!sel.matchesElement(e)) 6857 return false; 6858 } 6859 6860 foreach(a; nthChild) { 6861 if(e.parentNode is null) 6862 return false; 6863 6864 auto among = e.parentNode.childElements; 6865 6866 if(!a.solvesFor(among, e)) 6867 return false; 6868 } 6869 foreach(a; nthOfType) { 6870 if(e.parentNode is null) 6871 return false; 6872 6873 auto among = e.parentNode.childElements(e.tagName); 6874 6875 if(!a.solvesFor(among, e)) 6876 return false; 6877 } 6878 foreach(a; nthLastOfType) { 6879 if(e.parentNode is null) 6880 return false; 6881 6882 auto among = retro(e.parentNode.childElements(e.tagName)); 6883 6884 if(!a.solvesFor(among, e)) 6885 return false; 6886 } 6887 6888 return true; 6889 } 6890 } 6891 6892 struct ParsedNth { 6893 int multiplier; 6894 int adder; 6895 6896 string of; 6897 6898 this(string text) { 6899 auto original = text; 6900 consumeWhitespace(text); 6901 if(text.startsWith("odd")) { 6902 multiplier = 2; 6903 adder = 1; 6904 6905 text = text[3 .. $]; 6906 } else if(text.startsWith("even")) { 6907 multiplier = 2; 6908 adder = 1; 6909 6910 text = text[4 .. $]; 6911 } else { 6912 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 6913 consumeWhitespace(text); 6914 if(text.length && text[0] == 'n') { 6915 multiplier = n; 6916 text = text[1 .. $]; 6917 consumeWhitespace(text); 6918 if(text.length) { 6919 if(text[0] == '+') { 6920 text = text[1 .. $]; 6921 adder = parseNumber(text); 6922 } else if(text[0] == '-') { 6923 text = text[1 .. $]; 6924 adder = -parseNumber(text); 6925 } else if(text[0] == 'o') { 6926 // continue, this is handled below 6927 } else 6928 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 6929 } 6930 } else { 6931 adder = n; 6932 } 6933 } 6934 6935 consumeWhitespace(text); 6936 if(text.startsWith("of")) { 6937 text = text[2 .. $]; 6938 consumeWhitespace(text); 6939 of = text[0 .. $]; 6940 } 6941 } 6942 6943 string toString() { 6944 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 6945 } 6946 6947 bool solvesFor(R)(R elements, Element e) { 6948 int idx = 1; 6949 bool found = false; 6950 foreach(ele; elements) { 6951 if(of.length) { 6952 auto sel = Selector(of); 6953 if(!sel.matchesElement(ele)) 6954 continue; 6955 } 6956 if(ele is e) { 6957 found = true; 6958 break; 6959 } 6960 idx++; 6961 } 6962 if(!found) return false; 6963 6964 // multiplier* n + adder = idx 6965 // if there is a solution for integral n, it matches 6966 6967 idx -= adder; 6968 if(multiplier) { 6969 if(idx % multiplier == 0) 6970 return true; 6971 } else { 6972 return idx == 0; 6973 } 6974 return false; 6975 } 6976 6977 private void consumeWhitespace(ref string text) { 6978 while(text.length && text[0] == ' ') 6979 text = text[1 .. $]; 6980 } 6981 6982 private int parseNumber(ref string text) { 6983 consumeWhitespace(text); 6984 if(text.length == 0) return 0; 6985 bool negative = text[0] == '-'; 6986 if(text[0] == '+') 6987 text = text[1 .. $]; 6988 if(negative) text = text[1 .. $]; 6989 int i = 0; 6990 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 6991 i++; 6992 if(i == 0) 6993 return 0; 6994 int cool = to!int(text[0 .. i]); 6995 text = text[i .. $]; 6996 return negative ? -cool : cool; 6997 } 6998 } 6999 7000 // USEFUL 7001 /// ditto 7002 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts, Element scopeElementNow = null) { 7003 Element[] ret; 7004 if(!parts.length) { 7005 return [start]; // the null selector only matches the start point; it 7006 // is what terminates the recursion 7007 } 7008 7009 auto part = parts[0]; 7010 //writeln("checking ", part, " against ", start, " with ", part.separation); 7011 switch(part.separation) { 7012 default: assert(0); 7013 case -1: 7014 case 0: // tree 7015 foreach(e; start.tree) { 7016 if(part.separation == 0 && start is e) 7017 continue; // space doesn't match itself! 7018 if(part.matchElement(e, scopeElementNow)) { 7019 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7020 } 7021 } 7022 break; 7023 case 1: // children 7024 foreach(e; start.childNodes) { 7025 if(part.matchElement(e, scopeElementNow)) { 7026 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7027 } 7028 } 7029 break; 7030 case 2: // next-sibling 7031 auto e = start.nextSibling("*"); 7032 if(part.matchElement(e, scopeElementNow)) 7033 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7034 break; 7035 case 3: // younger sibling 7036 auto tmp = start.parentNode; 7037 if(tmp !is null) { 7038 sizediff_t pos = -1; 7039 auto children = tmp.childElements; 7040 foreach(i, child; children) { 7041 if(child is start) { 7042 pos = i; 7043 break; 7044 } 7045 } 7046 assert(pos != -1); 7047 foreach(e; children[pos+1..$]) { 7048 if(part.matchElement(e, scopeElementNow)) 7049 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7050 } 7051 } 7052 break; 7053 case 4: // immediate parent node, an extension of mine to walk back up the tree 7054 auto e = start.parentNode; 7055 if(part.matchElement(e, scopeElementNow)) { 7056 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7057 } 7058 /* 7059 Example of usefulness: 7060 7061 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7062 7063 table th < tr 7064 7065 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7066 */ 7067 break; 7068 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7069 /* 7070 Like with the < operator, this is best used to find some parent of a particular known element. 7071 7072 Say you have an anchor inside a 7073 */ 7074 } 7075 7076 return ret; 7077 } 7078 7079 /++ 7080 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7081 7082 See_Also: 7083 $(LIST 7084 * [Element.querySelector] 7085 * [Element.querySelectorAll] 7086 * [Element.matches] 7087 * [Element.closest] 7088 * [Document.querySelector] 7089 * [Document.querySelectorAll] 7090 ) 7091 +/ 7092 /// Group: core_functionality 7093 struct Selector { 7094 SelectorComponent[] components; 7095 string original; 7096 /++ 7097 Parses the selector string and constructs the usable structure. 7098 +/ 7099 this(string cssSelector) { 7100 components = parseSelectorString(cssSelector); 7101 original = cssSelector; 7102 } 7103 7104 /++ 7105 Returns true if the given element matches this selector, 7106 considered relative to an arbitrary element. 7107 7108 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7109 with [std.algorithm.iteration.filter]: 7110 7111 --- 7112 Selector sel = Selector("foo > bar"); 7113 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7114 --- 7115 +/ 7116 bool matchesElement(Element e, Element relativeTo = null) { 7117 foreach(component; components) 7118 if(component.matchElement(e, relativeTo)) 7119 return true; 7120 7121 return false; 7122 } 7123 7124 /++ 7125 Reciprocal of [Element.querySelectorAll] 7126 +/ 7127 Element[] getMatchingElements(Element start, Element relativeTo = null) { 7128 Element[] ret; 7129 foreach(component; components) 7130 ret ~= getElementsBySelectorParts(start, component.parts, relativeTo); 7131 return removeDuplicates(ret); 7132 } 7133 7134 /++ 7135 Like [getMatchingElements], but returns a lazy range. Be careful 7136 about mutating the dom as you iterate through this. 7137 +/ 7138 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7139 import std.algorithm.iteration; 7140 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7141 } 7142 7143 7144 /// Returns the string this was built from 7145 string toString() { 7146 return original; 7147 } 7148 7149 /++ 7150 Returns a string from the parsed result 7151 7152 7153 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7154 +/ 7155 string parsedToString() { 7156 string ret; 7157 7158 foreach(idx, component; components) { 7159 if(idx) ret ~= ", "; 7160 ret ~= component.toString(); 7161 } 7162 7163 return ret; 7164 } 7165 } 7166 7167 ///. 7168 struct SelectorComponent { 7169 ///. 7170 SelectorPart[] parts; 7171 7172 ///. 7173 string toString() { 7174 string ret; 7175 foreach(part; parts) 7176 ret ~= part.toString(); 7177 return ret; 7178 } 7179 7180 // USEFUL 7181 ///. 7182 Element[] getElements(Element start, Element relativeTo = null) { 7183 return removeDuplicates(getElementsBySelectorParts(start, parts, relativeTo)); 7184 } 7185 7186 // USEFUL (but not implemented) 7187 /// If relativeTo == null, it assumes the root of the parent document. 7188 bool matchElement(Element e, Element relativeTo = null) { 7189 if(e is null) return false; 7190 Element where = e; 7191 int lastSeparation = -1; 7192 7193 auto lparts = parts; 7194 7195 if(parts.length && parts[0].separation > 0) { 7196 throw new Exception("invalid selector"); 7197 /+ 7198 // if it starts with a non-trivial separator, inject 7199 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7200 // which implies html 7201 7202 // however, if it is a child-matching selector and there are no children, 7203 // bail out early as it obviously cannot match. 7204 bool hasNonTextChildren = false; 7205 foreach(c; e.children) 7206 if(c.nodeType != 3) { 7207 hasNonTextChildren = true; 7208 break; 7209 } 7210 if(!hasNonTextChildren) 7211 return false; 7212 7213 // there is probably a MUCH better way to do this. 7214 auto dummy = SelectorPart.init; 7215 dummy.tagNameFilter = "*"; 7216 dummy.separation = 0; 7217 lparts = dummy ~ lparts; 7218 +/ 7219 } 7220 7221 foreach(part; retro(lparts)) { 7222 7223 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7224 // writeln(parts); 7225 7226 if(lastSeparation == -1) { 7227 if(!part.matchElement(where, relativeTo)) 7228 return false; 7229 } else if(lastSeparation == 0) { // generic parent 7230 // need to go up the whole chain 7231 where = where.parentNode; 7232 7233 while(where !is null) { 7234 if(part.matchElement(where, relativeTo)) 7235 break; 7236 7237 if(where is relativeTo) 7238 return false; 7239 7240 where = where.parentNode; 7241 } 7242 7243 if(where is null) 7244 return false; 7245 } else if(lastSeparation == 1) { // the > operator 7246 where = where.parentNode; 7247 7248 if(!part.matchElement(where, relativeTo)) 7249 return false; 7250 } else if(lastSeparation == 2) { // the + operator 7251 //writeln("WHERE", where, " ", part); 7252 where = where.previousSibling("*"); 7253 7254 if(!part.matchElement(where, relativeTo)) 7255 return false; 7256 } else if(lastSeparation == 3) { // the ~ operator 7257 where = where.previousSibling("*"); 7258 while(where !is null) { 7259 if(part.matchElement(where, relativeTo)) 7260 break; 7261 7262 if(where is relativeTo) 7263 return false; 7264 7265 where = where.previousSibling("*"); 7266 } 7267 7268 if(where is null) 7269 return false; 7270 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7271 // FIXME 7272 } 7273 7274 lastSeparation = part.separation; 7275 7276 /* 7277 /+ 7278 I commented this to magically make unittest pass and I think the reason it works 7279 when commented is that I inject a :scope iff there's a selector at top level now 7280 and if not, it follows the (frankly stupid) w3c standard behavior at arbitrary id 7281 asduiwh . but me injecting the :scope also acts as a terminating condition. 7282 7283 tbh this prolly needs like a trillion more tests. 7284 +/ 7285 if(where is relativeTo) 7286 return false; // at end of line, if we aren't done by now, the match fails 7287 */ 7288 } 7289 return true; // if we got here, it is a success 7290 } 7291 7292 // the string should NOT have commas. Use parseSelectorString for that instead 7293 ///. 7294 static SelectorComponent fromString(string selector) { 7295 return parseSelector(lexSelector(selector)); 7296 } 7297 } 7298 7299 ///. 7300 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7301 SelectorComponent[] ret; 7302 auto tokens = lexSelector(selector); // this will parse commas too 7303 // and now do comma-separated slices (i haz phobosophobia!) 7304 int parensCount = 0; 7305 while (tokens.length > 0) { 7306 size_t end = 0; 7307 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7308 if(tokens[end] == "(") parensCount++; 7309 if(tokens[end] == ")") parensCount--; 7310 ++end; 7311 } 7312 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7313 if (tokens.length-end < 2) break; 7314 tokens = tokens[end+1..$]; 7315 } 7316 return ret; 7317 } 7318 7319 ///. 7320 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7321 SelectorComponent s; 7322 7323 SelectorPart current; 7324 void commit() { 7325 // might as well skip null items 7326 if(!current.isCleanSlateExceptSeparation()) { 7327 s.parts ~= current; 7328 current = current.init; // start right over 7329 } 7330 } 7331 enum State { 7332 Starting, 7333 ReadingClass, 7334 ReadingId, 7335 ReadingAttributeSelector, 7336 ReadingAttributeComparison, 7337 ExpectingAttributeCloser, 7338 ReadingPseudoClass, 7339 ReadingAttributeValue, 7340 7341 SkippingFunctionalSelector, 7342 } 7343 State state = State.Starting; 7344 string attributeName, attributeValue, attributeComparison; 7345 int parensCount; 7346 foreach(idx, token; tokens) { 7347 string readFunctionalSelector() { 7348 string s; 7349 if(tokens[idx + 1] != "(") 7350 throw new Exception("parse error"); 7351 int pc = 1; 7352 foreach(t; tokens[idx + 2 .. $]) { 7353 if(t == "(") 7354 pc++; 7355 if(t == ")") 7356 pc--; 7357 if(pc == 0) 7358 break; 7359 s ~= t; 7360 } 7361 7362 return s; 7363 } 7364 7365 sizediff_t tid = -1; 7366 foreach(i, item; selectorTokens) 7367 if(token == item) { 7368 tid = i; 7369 break; 7370 } 7371 final switch(state) { 7372 case State.Starting: // fresh, might be reading an operator or a tagname 7373 if(tid == -1) { 7374 if(!caseSensitiveTags) 7375 token = token.toLower(); 7376 7377 if(current.isCleanSlateExceptSeparation()) { 7378 current.tagNameFilter = token; 7379 // default thing, see comment under "*" below 7380 if(current.separation == -1) current.separation = 0; 7381 } else { 7382 // if it was already set, we must see two thingies 7383 // separated by whitespace... 7384 commit(); 7385 current.separation = 0; // tree 7386 current.tagNameFilter = token; 7387 } 7388 } else { 7389 // Selector operators 7390 switch(token) { 7391 case "*": 7392 current.tagNameFilter = "*"; 7393 // the idea here is if we haven't actually set a separation 7394 // yet (e.g. the > operator), it should assume the generic 7395 // whitespace (descendant) mode to avoid matching self with -1 7396 if(current.separation == -1) current.separation = 0; 7397 break; 7398 case " ": 7399 // If some other separation has already been set, 7400 // this is irrelevant whitespace, so we should skip it. 7401 // this happens in the case of "foo > bar" for example. 7402 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7403 continue; 7404 commit(); 7405 current.separation = 0; // tree 7406 break; 7407 case ">>": 7408 commit(); 7409 current.separation = 0; // alternate syntax for tree from html5 css 7410 break; 7411 case ">": 7412 commit(); 7413 current.separation = 1; // child 7414 break; 7415 case "+": 7416 commit(); 7417 current.separation = 2; // sibling directly after 7418 break; 7419 case "~": 7420 commit(); 7421 current.separation = 3; // any sibling after 7422 break; 7423 case "<": 7424 commit(); 7425 current.separation = 4; // immediate parent of 7426 break; 7427 case "[": 7428 state = State.ReadingAttributeSelector; 7429 if(current.separation == -1) current.separation = 0; 7430 break; 7431 case ".": 7432 state = State.ReadingClass; 7433 if(current.separation == -1) current.separation = 0; 7434 break; 7435 case "#": 7436 state = State.ReadingId; 7437 if(current.separation == -1) current.separation = 0; 7438 break; 7439 case ":": 7440 case "::": 7441 state = State.ReadingPseudoClass; 7442 if(current.separation == -1) current.separation = 0; 7443 break; 7444 7445 default: 7446 import arsd.core; 7447 throw ArsdException!"CSS Selector Problem"(token, tokens, cast(int) state); 7448 } 7449 } 7450 break; 7451 case State.ReadingClass: 7452 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7453 state = State.Starting; 7454 break; 7455 case State.ReadingId: 7456 current.attributesEqual ~= ["id", token]; 7457 state = State.Starting; 7458 break; 7459 case State.ReadingPseudoClass: 7460 switch(token) { 7461 case "first-of-type": 7462 current.firstOfType = true; 7463 break; 7464 case "last-of-type": 7465 current.lastOfType = true; 7466 break; 7467 case "only-of-type": 7468 current.firstOfType = true; 7469 current.lastOfType = true; 7470 break; 7471 case "first-child": 7472 current.firstChild = true; 7473 break; 7474 case "last-child": 7475 current.lastChild = true; 7476 break; 7477 case "only-child": 7478 current.firstChild = true; 7479 current.lastChild = true; 7480 break; 7481 case "scope": 7482 current.scopeElement = true; 7483 break; 7484 case "empty": 7485 // one with no children 7486 current.emptyElement = true; 7487 break; 7488 case "whitespace-only": 7489 current.whitespaceOnly = true; 7490 break; 7491 case "link": 7492 current.attributesPresent ~= "href"; 7493 break; 7494 case "root": 7495 current.rootElement = true; 7496 break; 7497 case "lang": 7498 state = State.SkippingFunctionalSelector; 7499 continue; 7500 case "nth-child": 7501 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7502 state = State.SkippingFunctionalSelector; 7503 continue; 7504 case "nth-of-type": 7505 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7506 state = State.SkippingFunctionalSelector; 7507 continue; 7508 case "nth-last-of-type": 7509 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7510 state = State.SkippingFunctionalSelector; 7511 continue; 7512 case "nth-last-child": 7513 // FIXME 7514 //current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7515 state = State.SkippingFunctionalSelector; 7516 continue; 7517 case "is": 7518 state = State.SkippingFunctionalSelector; 7519 current.isSelectors ~= readFunctionalSelector(); 7520 continue; // now the rest of the parser skips past the parens we just handled 7521 case "where": 7522 state = State.SkippingFunctionalSelector; 7523 current.whereSelectors ~= readFunctionalSelector(); 7524 continue; // now the rest of the parser skips past the parens we just handled 7525 case "not": 7526 state = State.SkippingFunctionalSelector; 7527 current.notSelectors ~= readFunctionalSelector(); 7528 continue; // now the rest of the parser skips past the parens we just handled 7529 case "has": 7530 state = State.SkippingFunctionalSelector; 7531 current.hasSelectors ~= readFunctionalSelector(); 7532 continue; // now the rest of the parser skips past the parens we just handled 7533 // back to standards though not quite right lol 7534 case "disabled": 7535 current.attributesPresent ~= "disabled"; 7536 break; 7537 case "checked": 7538 current.attributesPresent ~= "checked"; 7539 break; 7540 7541 case "visited", "active", "hover", "target", "focus", "selected": 7542 current.attributesPresent ~= "nothing"; 7543 // FIXME 7544 /+ 7545 // extensions not implemented 7546 //case "text": // takes the text in the element and wraps it in an element, returning it 7547 +/ 7548 goto case; 7549 case "before", "after": 7550 current.attributesPresent ~= "FIXME"; 7551 7552 break; 7553 // My extensions 7554 case "odd-child": 7555 current.oddChild = true; 7556 break; 7557 case "even-child": 7558 current.evenChild = true; 7559 break; 7560 default: 7561 //if(token.indexOf("lang") == -1) 7562 //assert(0, token); 7563 break; 7564 } 7565 state = State.Starting; 7566 break; 7567 case State.SkippingFunctionalSelector: 7568 if(token == "(") { 7569 parensCount++; 7570 } else if(token == ")") { 7571 parensCount--; 7572 } 7573 7574 if(parensCount == 0) 7575 state = State.Starting; 7576 break; 7577 case State.ReadingAttributeSelector: 7578 attributeName = token; 7579 attributeComparison = null; 7580 attributeValue = null; 7581 state = State.ReadingAttributeComparison; 7582 break; 7583 case State.ReadingAttributeComparison: 7584 // FIXME: these things really should be quotable in the proper lexer... 7585 if(token != "]") { 7586 if(token.indexOf("=") == -1) { 7587 // not a comparison; consider it 7588 // part of the attribute 7589 attributeValue ~= token; 7590 } else { 7591 attributeComparison = token; 7592 state = State.ReadingAttributeValue; 7593 } 7594 break; 7595 } 7596 goto case; 7597 case State.ExpectingAttributeCloser: 7598 if(token != "]") { 7599 // not the closer; consider it part of comparison 7600 if(attributeComparison == "") 7601 attributeName ~= token; 7602 else 7603 attributeValue ~= token; 7604 break; 7605 } 7606 7607 // Selector operators 7608 switch(attributeComparison) { 7609 default: assert(0); 7610 case "": 7611 current.attributesPresent ~= attributeName; 7612 break; 7613 case "=": 7614 current.attributesEqual ~= [attributeName, attributeValue]; 7615 break; 7616 case "|=": 7617 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7618 break; 7619 case "~=": 7620 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7621 break; 7622 case "$=": 7623 current.attributesEndsWith ~= [attributeName, attributeValue]; 7624 break; 7625 case "^=": 7626 current.attributesStartsWith ~= [attributeName, attributeValue]; 7627 break; 7628 case "*=": 7629 current.attributesInclude ~= [attributeName, attributeValue]; 7630 break; 7631 case "!=": 7632 current.attributesNotEqual ~= [attributeName, attributeValue]; 7633 break; 7634 } 7635 7636 state = State.Starting; 7637 break; 7638 case State.ReadingAttributeValue: 7639 attributeValue = token; 7640 state = State.ExpectingAttributeCloser; 7641 break; 7642 } 7643 } 7644 7645 commit(); 7646 7647 return s; 7648 } 7649 7650 ///. 7651 Element[] removeDuplicates(Element[] input) { 7652 Element[] ret; 7653 7654 bool[Element] already; 7655 foreach(e; input) { 7656 if(e in already) continue; 7657 already[e] = true; 7658 ret ~= e; 7659 } 7660 7661 return ret; 7662 } 7663 7664 // done with CSS selector handling 7665 7666 /++ 7667 This delegate is called if you call [Element.computedStyle] to attach an object to the element 7668 that holds stylesheet information. You can rebind it to something else to return a subclass 7669 if you want to hold more per-element extension data than the normal computed style object holds 7670 (e.g. layout info as well). 7671 7672 The default is `return new CssStyle(null, element.style);` 7673 7674 History: 7675 Added September 13, 2024 (dub v11.6) 7676 +/ 7677 CssStyle function(Element e) computedStyleFactory = &defaultComputedStyleFactory; 7678 7679 /// ditto 7680 CssStyle defaultComputedStyleFactory(Element e) { 7681 return new CssStyle(null, e.style); // gives at least something to work with 7682 } 7683 7684 7685 // FIXME: use the better parser from html.d 7686 /// This is probably not useful to you unless you're writing a browser or something like that. 7687 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7688 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7689 class CssStyle { 7690 ///. 7691 this(string rule, string content) { 7692 rule = rule.strip(); 7693 content = content.strip(); 7694 7695 if(content.length == 0) 7696 return; 7697 7698 originatingRule = rule; 7699 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7700 7701 foreach(part; content.split(";")) { 7702 part = part.strip(); 7703 if(part.length == 0) 7704 continue; 7705 auto idx = part.indexOf(":"); 7706 if(idx == -1) 7707 continue; 7708 //throw new Exception("Bad css rule (no colon): " ~ part); 7709 7710 Property p; 7711 7712 p.name = part[0 .. idx].strip(); 7713 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7714 p.givenExplicitly = true; 7715 p.specificity = originatingSpecificity; 7716 7717 properties ~= p; 7718 7719 } 7720 7721 foreach(property; properties) 7722 expandShortForm(property, originatingSpecificity); 7723 } 7724 7725 ///. 7726 Specificity getSpecificityOfRule(string rule) { 7727 Specificity s; 7728 if(rule.length == 0) { // inline 7729 s.important = 2; 7730 } else { 7731 // SO. WRONG. 7732 foreach(ch; rule) { 7733 if(ch == '.') 7734 s.classes++; 7735 if(ch == '#') 7736 s.ids++; 7737 if(ch == ' ') 7738 s.tags++; 7739 if(ch == ',') 7740 break; 7741 } 7742 // FIXME 7743 } 7744 7745 return s; 7746 } 7747 7748 string originatingRule; ///. 7749 Specificity originatingSpecificity; ///. 7750 7751 ///. 7752 union Specificity { 7753 uint score; ///. 7754 // version(little_endian) 7755 ///. 7756 struct { 7757 ubyte tags; ///. 7758 ubyte classes; ///. 7759 ubyte ids; ///. 7760 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 7761 } 7762 } 7763 7764 ///. 7765 struct Property { 7766 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 7767 string name; ///. 7768 string value; ///. 7769 Specificity specificity; ///. 7770 // do we care about the original source rule? 7771 } 7772 7773 ///. 7774 Property[] properties; 7775 7776 ///. 7777 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 7778 string name = unCamelCase(nameGiven); 7779 if(value is null) 7780 return getValue(name); 7781 else 7782 return setValue(name, value, Specificity(0x02000000) /* inline specificity */); 7783 } 7784 7785 /// takes dash style name 7786 string getValue(string name) { 7787 foreach(property; properties) 7788 if(property.name == name) 7789 return property.value; 7790 return null; 7791 } 7792 7793 /// takes dash style name 7794 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 7795 value = value.replace("! important", "!important"); 7796 if(value.indexOf("!important") != -1) { 7797 newSpecificity.important = 1; // FIXME 7798 value = value.replace("!important", "").strip(); 7799 } 7800 7801 foreach(ref property; properties) 7802 if(property.name == name) { 7803 if(newSpecificity.score >= property.specificity.score) { 7804 property.givenExplicitly = explicit; 7805 expandShortForm(property, newSpecificity); 7806 property.specificity = newSpecificity; 7807 return (property.value = value); 7808 } else { 7809 if(name == "display") 7810 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 7811 return value; // do nothing - the specificity is too low 7812 } 7813 } 7814 7815 // it's not here... 7816 7817 Property p; 7818 p.givenExplicitly = true; 7819 p.name = name; 7820 p.value = value; 7821 p.specificity = originatingSpecificity; 7822 7823 properties ~= p; 7824 expandShortForm(p, originatingSpecificity); 7825 7826 return value; 7827 } 7828 7829 private void expandQuadShort(string name, string value, Specificity specificity) { 7830 auto parts = value.split(" "); 7831 switch(parts.length) { 7832 case 1: 7833 setValue(name ~"-left", parts[0], specificity, false); 7834 setValue(name ~"-right", parts[0], specificity, false); 7835 setValue(name ~"-top", parts[0], specificity, false); 7836 setValue(name ~"-bottom", parts[0], specificity, false); 7837 break; 7838 case 2: 7839 setValue(name ~"-left", parts[1], specificity, false); 7840 setValue(name ~"-right", parts[1], specificity, false); 7841 setValue(name ~"-top", parts[0], specificity, false); 7842 setValue(name ~"-bottom", parts[0], specificity, false); 7843 break; 7844 case 3: 7845 setValue(name ~"-top", parts[0], specificity, false); 7846 setValue(name ~"-right", parts[1], specificity, false); 7847 setValue(name ~"-bottom", parts[2], specificity, false); 7848 setValue(name ~"-left", parts[2], specificity, false); 7849 7850 break; 7851 case 4: 7852 setValue(name ~"-top", parts[0], specificity, false); 7853 setValue(name ~"-right", parts[1], specificity, false); 7854 setValue(name ~"-bottom", parts[2], specificity, false); 7855 setValue(name ~"-left", parts[3], specificity, false); 7856 break; 7857 default: 7858 // assert(0, value); 7859 } 7860 } 7861 7862 ///. 7863 void expandShortForm(Property p, Specificity specificity) { 7864 switch(p.name) { 7865 case "margin": 7866 case "padding": 7867 expandQuadShort(p.name, p.value, specificity); 7868 break; 7869 case "border": 7870 case "outline": 7871 setValue(p.name ~ "-left", p.value, specificity, false); 7872 setValue(p.name ~ "-right", p.value, specificity, false); 7873 setValue(p.name ~ "-top", p.value, specificity, false); 7874 setValue(p.name ~ "-bottom", p.value, specificity, false); 7875 break; 7876 7877 case "border-top": 7878 case "border-bottom": 7879 case "border-left": 7880 case "border-right": 7881 case "outline-top": 7882 case "outline-bottom": 7883 case "outline-left": 7884 case "outline-right": 7885 7886 default: {} 7887 } 7888 } 7889 7890 ///. 7891 override string toString() { 7892 string ret; 7893 if(originatingRule.length) 7894 ret = originatingRule ~ " {"; 7895 7896 foreach(property; properties) { 7897 if(!property.givenExplicitly) 7898 continue; // skip the inferred shit 7899 7900 if(originatingRule.length) 7901 ret ~= "\n\t"; 7902 else 7903 ret ~= " "; 7904 7905 ret ~= property.name ~ ": " ~ property.value ~ ";"; 7906 } 7907 7908 if(originatingRule.length) 7909 ret ~= "\n}\n"; 7910 7911 return ret; 7912 } 7913 } 7914 7915 string cssUrl(string url) { 7916 return "url(\"" ~ url ~ "\")"; 7917 } 7918 7919 /// This probably isn't useful, unless you're writing a browser or something like that. 7920 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 7921 /// as text. 7922 /// 7923 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 7924 /// that you can apply to your documents to build the complete computedStyle object. 7925 class StyleSheet { 7926 ///. 7927 CssStyle[] rules; 7928 7929 ///. 7930 this(string source) { 7931 // FIXME: handle @ rules and probably could improve lexer 7932 // add nesting? 7933 int state; 7934 string currentRule; 7935 string currentValue; 7936 7937 string* currentThing = ¤tRule; 7938 foreach(c; source) { 7939 handle: switch(state) { 7940 default: assert(0); 7941 case 0: // starting - we assume we're reading a rule 7942 switch(c) { 7943 case '@': 7944 state = 4; 7945 break; 7946 case '/': 7947 state = 1; 7948 break; 7949 case '{': 7950 currentThing = ¤tValue; 7951 break; 7952 case '}': 7953 if(currentThing is ¤tValue) { 7954 rules ~= new CssStyle(currentRule, currentValue); 7955 7956 currentRule = ""; 7957 currentValue = ""; 7958 7959 currentThing = ¤tRule; 7960 } else { 7961 // idk what is going on here. 7962 // check sveit.com to reproduce 7963 currentRule = ""; 7964 currentValue = ""; 7965 } 7966 break; 7967 default: 7968 (*currentThing) ~= c; 7969 } 7970 break; 7971 case 1: // expecting * 7972 if(c == '*') 7973 state = 2; 7974 else { 7975 state = 0; 7976 (*currentThing) ~= "/" ~ c; 7977 } 7978 break; 7979 case 2: // inside comment 7980 if(c == '*') 7981 state = 3; 7982 break; 7983 case 3: // expecting / to end comment 7984 if(c == '/') 7985 state = 0; 7986 else 7987 state = 2; // it's just a comment so no need to append 7988 break; 7989 case 4: 7990 if(c == '{') 7991 state = 5; 7992 if(c == ';') 7993 state = 0; // just skipping import 7994 break; 7995 case 5: 7996 if(c == '}') 7997 state = 0; // skipping font face probably 7998 } 7999 } 8000 } 8001 8002 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8003 void apply(Document document) { 8004 foreach(rule; rules) { 8005 if(rule.originatingRule.length == 0) 8006 continue; // this shouldn't happen here in a stylesheet 8007 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8008 // note: this should be a different object than the inline style 8009 // since givenExplicitly is likely destroyed here 8010 auto current = element.computedStyle; 8011 8012 foreach(item; rule.properties) 8013 current.setValue(item.name, item.value, item.specificity); 8014 } 8015 } 8016 } 8017 } 8018 8019 8020 /// This is kinda private; just a little utility container for use by the ElementStream class. 8021 final class Stack(T) { 8022 this() { 8023 internalLength = 0; 8024 arr = initialBuffer[]; 8025 } 8026 8027 ///. 8028 void push(T t) { 8029 if(internalLength >= arr.length) { 8030 auto oldarr = arr; 8031 if(arr.length < 4096) 8032 arr = new T[arr.length * 2]; 8033 else 8034 arr = new T[arr.length + 4096]; 8035 arr[0 .. oldarr.length] = oldarr[]; 8036 } 8037 8038 arr[internalLength] = t; 8039 internalLength++; 8040 } 8041 8042 ///. 8043 T pop() { 8044 assert(internalLength); 8045 internalLength--; 8046 return arr[internalLength]; 8047 } 8048 8049 ///. 8050 T peek() { 8051 assert(internalLength); 8052 return arr[internalLength - 1]; 8053 } 8054 8055 ///. 8056 @property bool empty() { 8057 return internalLength ? false : true; 8058 } 8059 8060 ///. 8061 private T[] arr; 8062 private size_t internalLength; 8063 private T[64] initialBuffer; 8064 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8065 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8066 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8067 } 8068 8069 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8070 final class ElementStream { 8071 8072 ///. 8073 @property Element front() { 8074 return current.element; 8075 } 8076 8077 /// Use Element.tree instead. 8078 this(Element start) { 8079 current.element = start; 8080 current.childPosition = -1; 8081 isEmpty = false; 8082 stack = new Stack!(Current); 8083 } 8084 8085 /* 8086 Handle it 8087 handle its children 8088 8089 */ 8090 8091 ///. 8092 void popFront() { 8093 more: 8094 if(isEmpty) return; 8095 8096 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8097 8098 current.childPosition++; 8099 if(current.childPosition >= current.element.children.length) { 8100 if(stack.empty()) 8101 isEmpty = true; 8102 else { 8103 current = stack.pop(); 8104 goto more; 8105 } 8106 } else { 8107 stack.push(current); 8108 current.element = current.element.children[current.childPosition]; 8109 current.childPosition = -1; 8110 } 8111 } 8112 8113 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8114 void currentKilled() { 8115 if(stack.empty) // should never happen 8116 isEmpty = true; 8117 else { 8118 current = stack.pop(); 8119 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8120 } 8121 } 8122 8123 ///. 8124 @property bool empty() { 8125 return isEmpty; 8126 } 8127 8128 private: 8129 8130 struct Current { 8131 Element element; 8132 int childPosition; 8133 } 8134 8135 Current current; 8136 8137 Stack!(Current) stack; 8138 8139 bool isEmpty; 8140 } 8141 8142 8143 8144 // unbelievable. 8145 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8146 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8147 static import std.algorithm; 8148 auto found = std.algorithm.find(haystack, needle); 8149 if(found.length == 0) 8150 return -1; 8151 return haystack.length - found.length; 8152 } 8153 8154 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8155 assert(position < arr.length); 8156 T[] ret; 8157 ret.length = arr.length + what.length; 8158 int a = 0; 8159 foreach(i; arr[0..position+1]) 8160 ret[a++] = i; 8161 8162 foreach(i; what) 8163 ret[a++] = i; 8164 8165 foreach(i; arr[position+1..$]) 8166 ret[a++] = i; 8167 8168 return ret; 8169 } 8170 8171 package bool isInArray(T)(T item, T[] arr) { 8172 foreach(i; arr) 8173 if(item == i) 8174 return true; 8175 return false; 8176 } 8177 8178 private string[string] aadup(in string[string] arr) { 8179 string[string] ret; 8180 foreach(k, v; arr) 8181 ret[k] = v; 8182 return ret; 8183 } 8184 8185 private AttributesHolder aadup(const AttributesHolder arr) { 8186 AttributesHolder ret; 8187 foreach(k, v; arr) 8188 ret[k] = v; 8189 return ret; 8190 } 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 // These MUST be sorted. See generatedomcases.d for a program to generate it if you need to add more than a few (otherwise maybe you can work it in yourself but yikes) 8207 8208 immutable string[] availableEntities = 8209 ["AElig", "AElig", "AMP", "AMP", "Aacute", "Aacute", "Abreve", "Abreve", "Acirc", "Acirc", "Acy", "Acy", "Afr", "Afr", "Agrave", "Agrave", "Alpha", "Alpha", "Amacr", "Amacr", "And", "And", "Aogon", "Aogon", "Aopf", "Aopf", "ApplyFunction", "ApplyFunction", "Aring", "Aring", "Ascr", "Ascr", "Assign", "Assign", "Atilde", 8210 "Atilde", "Auml", "Auml", "Backslash", "Backslash", "Barv", "Barv", "Barwed", "Barwed", "Bcy", "Bcy", "Because", "Because", "Bernoullis", "Bernoullis", "Beta", "Beta", "Bfr", "Bfr", "Bopf", "Bopf", "Breve", "Breve", "Bscr", "Bscr", "Bumpeq", "Bumpeq", "CHcy", "CHcy", "COPY", "COPY", "Cacute", "Cacute", "Cap", "Cap", "CapitalDifferentialD", 8211 "CapitalDifferentialD", "Cayleys", "Cayleys", "Ccaron", "Ccaron", "Ccedil", "Ccedil", "Ccirc", "Ccirc", "Cconint", "Cconint", "Cdot", "Cdot", "Cedilla", "Cedilla", "CenterDot", "CenterDot", "Cfr", "Cfr", "Chi", "Chi", "CircleDot", "CircleDot", "CircleMinus", "CircleMinus", "CirclePlus", "CirclePlus", "CircleTimes", "CircleTimes", 8212 "ClockwiseContourIntegral", "ClockwiseContourIntegral", "CloseCurlyDoubleQuote", "CloseCurlyDoubleQuote", "CloseCurlyQuote", "CloseCurlyQuote", "Colon", "Colon", "Colone", "Colone", "Congruent", "Congruent", "Conint", "Conint", "ContourIntegral", "ContourIntegral", "Copf", "Copf", "Coproduct", "Coproduct", "CounterClockwiseContourIntegral", 8213 "CounterClockwiseContourIntegral", "Cross", "Cross", "Cscr", "Cscr", "Cup", "Cup", "CupCap", "CupCap", "DD", "DD", "DDotrahd", "DDotrahd", "DJcy", "DJcy", "DScy", "DScy", "DZcy", "DZcy", "Dagger", "Dagger", "Darr", "Darr", "Dashv", "Dashv", "Dcaron", "Dcaron", "Dcy", "Dcy", "Del", "Del", "Delta", "Delta", "Dfr", "Dfr", 8214 "DiacriticalAcute", "DiacriticalAcute", "DiacriticalDot", "DiacriticalDot", "DiacriticalDoubleAcute", "DiacriticalDoubleAcute", "DiacriticalGrave", "DiacriticalGrave", "DiacriticalTilde", "DiacriticalTilde", "Diamond", "Diamond", "DifferentialD", "DifferentialD", "Dopf", "Dopf", "Dot", "Dot", "DotDot", "DotDot", "DotEqual", 8215 "DotEqual", "DoubleContourIntegral", "DoubleContourIntegral", "DoubleDot", "DoubleDot", "DoubleDownArrow", "DoubleDownArrow", "DoubleLeftArrow", "DoubleLeftArrow", "DoubleLeftRightArrow", "DoubleLeftRightArrow", "DoubleLeftTee", "DoubleLeftTee", "DoubleLongLeftArrow", "DoubleLongLeftArrow", "DoubleLongLeftRightArrow", 8216 "DoubleLongLeftRightArrow", "DoubleLongRightArrow", "DoubleLongRightArrow", "DoubleRightArrow", "DoubleRightArrow", "DoubleRightTee", "DoubleRightTee", "DoubleUpArrow", "DoubleUpArrow", "DoubleUpDownArrow", "DoubleUpDownArrow", "DoubleVerticalBar", "DoubleVerticalBar", "DownArrow", "DownArrow", "DownArrowBar", "DownArrowBar", 8217 "DownArrowUpArrow", "DownArrowUpArrow", "DownBreve", "DownBreve", "DownLeftRightVector", "DownLeftRightVector", "DownLeftTeeVector", "DownLeftTeeVector", "DownLeftVector", "DownLeftVector", "DownLeftVectorBar", "DownLeftVectorBar", "DownRightTeeVector", "DownRightTeeVector", "DownRightVector", "DownRightVector", "DownRightVectorBar", 8218 "DownRightVectorBar", "DownTee", "DownTee", "DownTeeArrow", "DownTeeArrow", "Downarrow", "Downarrow", "Dscr", "Dscr", "Dstrok", "Dstrok", "ENG", "ENG", "ETH", "ETH", "Eacute", "Eacute", "Ecaron", "Ecaron", "Ecirc", "Ecirc", "Ecy", "Ecy", "Edot", "Edot", "Efr", "Efr", "Egrave", "Egrave", "Element", "Element", "Emacr", "Emacr", 8219 "EmptySmallSquare", "EmptySmallSquare", "EmptyVerySmallSquare", "EmptyVerySmallSquare", "Eogon", "Eogon", "Eopf", "Eopf", "Epsilon", "Epsilon", "Equal", "Equal", "EqualTilde", "EqualTilde", "Equilibrium", "Equilibrium", "Escr", "Escr", "Esim", "Esim", "Eta", "Eta", "Euml", "Euml", "Exists", "Exists", "ExponentialE", "ExponentialE", 8220 "Fcy", "Fcy", "Ffr", "Ffr", "FilledSmallSquare", "FilledSmallSquare", "FilledVerySmallSquare", "FilledVerySmallSquare", "Fopf", "Fopf", "ForAll", "ForAll", "Fouriertrf", "Fouriertrf", "Fscr", "Fscr", "GJcy", "GJcy", "GT", "GT", "Gamma", "Gamma", "Gammad", "Gammad", "Gbreve", "Gbreve", "Gcedil", "Gcedil", "Gcirc", "Gcirc", 8221 "Gcy", "Gcy", "Gdot", "Gdot", "Gfr", "Gfr", "Gg", "Gg", "Gopf", "Gopf", "GreaterEqual", "GreaterEqual", "GreaterEqualLess", "GreaterEqualLess", "GreaterFullEqual", "GreaterFullEqual", "GreaterGreater", "GreaterGreater", "GreaterLess", "GreaterLess", "GreaterSlantEqual", "GreaterSlantEqual", "GreaterTilde", "GreaterTilde", 8222 "Gscr", "Gscr", "Gt", "Gt", "HARDcy", "HARDcy", "Hacek", "Hacek", "Hat", "Hat", "Hcirc", "Hcirc", "Hfr", "Hfr", "HilbertSpace", "HilbertSpace", "Hopf", "Hopf", "HorizontalLine", "HorizontalLine", "Hscr", "Hscr", "Hstrok", "Hstrok", "HumpDownHump", "HumpDownHump", "HumpEqual", "HumpEqual", "IEcy", "IEcy", "IJlig", "IJlig", 8223 "IOcy", "IOcy", "Iacute", "Iacute", "Icirc", "Icirc", "Icy", "Icy", "Idot", "Idot", "Ifr", "Ifr", "Igrave", "Igrave", "Im", "Im", "Imacr", "Imacr", "ImaginaryI", "ImaginaryI", "Implies", "Implies", "Int", "Int", "Integral", "Integral", "Intersection", "Intersection", "InvisibleComma", "InvisibleComma", "InvisibleTimes", 8224 "InvisibleTimes", "Iogon", "Iogon", "Iopf", "Iopf", "Iota", "Iota", "Iscr", "Iscr", "Itilde", "Itilde", "Iukcy", "Iukcy", "Iuml", "Iuml", "Jcirc", "Jcirc", "Jcy", "Jcy", "Jfr", "Jfr", "Jopf", "Jopf", "Jscr", "Jscr", "Jsercy", "Jsercy", "Jukcy", "Jukcy", "KHcy", "KHcy", "KJcy", "KJcy", "Kappa", "Kappa", "Kcedil", "Kcedil", 8225 "Kcy", "Kcy", "Kfr", "Kfr", "Kopf", "Kopf", "Kscr", "Kscr", "LJcy", "LJcy", "LT", "LT", "Lacute", "Lacute", "Lambda", "Lambda", "Lang", "Lang", "Laplacetrf", "Laplacetrf", "Larr", "Larr", "Lcaron", "Lcaron", "Lcedil", "Lcedil", "Lcy", "Lcy", "LeftAngleBracket", "LeftAngleBracket", "LeftArrow", "LeftArrow", "LeftArrowBar", 8226 "LeftArrowBar", "LeftArrowRightArrow", "LeftArrowRightArrow", "LeftCeiling", "LeftCeiling", "LeftDoubleBracket", "LeftDoubleBracket", "LeftDownTeeVector", "LeftDownTeeVector", "LeftDownVector", "LeftDownVector", "LeftDownVectorBar", "LeftDownVectorBar", "LeftFloor", "LeftFloor", "LeftRightArrow", "LeftRightArrow", "LeftRightVector", 8227 "LeftRightVector", "LeftTee", "LeftTee", "LeftTeeArrow", "LeftTeeArrow", "LeftTeeVector", "LeftTeeVector", "LeftTriangle", "LeftTriangle", "LeftTriangleBar", "LeftTriangleBar", "LeftTriangleEqual", "LeftTriangleEqual", "LeftUpDownVector", "LeftUpDownVector", "LeftUpTeeVector", "LeftUpTeeVector", "LeftUpVector", "LeftUpVector", 8228 "LeftUpVectorBar", "LeftUpVectorBar", "LeftVector", "LeftVector", "LeftVectorBar", "LeftVectorBar", "Leftarrow", "Leftarrow", "Leftrightarrow", "Leftrightarrow", "LessEqualGreater", "LessEqualGreater", "LessFullEqual", "LessFullEqual", "LessGreater", "LessGreater", "LessLess", "LessLess", "LessSlantEqual", "LessSlantEqual", 8229 "LessTilde", "LessTilde", "Lfr", "Lfr", "Ll", "Ll", "Lleftarrow", "Lleftarrow", "Lmidot", "Lmidot", "LongLeftArrow", "LongLeftArrow", "LongLeftRightArrow", "LongLeftRightArrow", "LongRightArrow", "LongRightArrow", "Longleftarrow", "Longleftarrow", "Longleftrightarrow", "Longleftrightarrow", "Longrightarrow", "Longrightarrow", 8230 "Lopf", "Lopf", "LowerLeftArrow", "LowerLeftArrow", "LowerRightArrow", "LowerRightArrow", "Lscr", "Lscr", "Lsh", "Lsh", "Lstrok", "Lstrok", "Lt", "Lt", "Map", "Map", "Mcy", "Mcy", "MediumSpace", "MediumSpace", "Mellintrf", "Mellintrf", "Mfr", "Mfr", "MinusPlus", "MinusPlus", "Mopf", "Mopf", "Mscr", "Mscr", "Mu", "Mu", 8231 "NJcy", "NJcy", "Nacute", "Nacute", "Ncaron", "Ncaron", "Ncedil", "Ncedil", "Ncy", "Ncy", "NegativeMediumSpace", "NegativeMediumSpace", "NegativeThickSpace", "NegativeThickSpace", "NegativeThinSpace", "NegativeThinSpace", "NegativeVeryThinSpace", "NegativeVeryThinSpace", "NestedGreaterGreater", "NestedGreaterGreater", 8232 "NestedLessLess", "NestedLessLess", "NewLine", "NewLine", "Nfr", "Nfr", "NoBreak", "NoBreak", "NonBreakingSpace", "NonBreakingSpace", "Nopf", "Nopf", "Not", "Not", "NotCongruent", "NotCongruent", "NotCupCap", "NotCupCap", "NotDoubleVerticalBar", "NotDoubleVerticalBar", "NotElement", "NotElement", "NotEqual", "NotEqual", 8233 "NotExists", "NotExists", "NotGreater", "NotGreater", "NotGreaterEqual", "NotGreaterEqual", "NotGreaterLess", "NotGreaterLess", "NotGreaterTilde", "NotGreaterTilde", "NotLeftTriangle", "NotLeftTriangle", "NotLeftTriangleEqual", "NotLeftTriangleEqual", "NotLess", "NotLess", "NotLessEqual", "NotLessEqual", "NotLessGreater", 8234 "NotLessGreater", "NotLessTilde", "NotLessTilde", "NotPrecedes", "NotPrecedes", "NotPrecedesSlantEqual", "NotPrecedesSlantEqual", "NotReverseElement", "NotReverseElement", "NotRightTriangle", "NotRightTriangle", "NotRightTriangleEqual", "NotRightTriangleEqual", "NotSquareSubsetEqual", "NotSquareSubsetEqual", "NotSquareSupersetEqual", 8235 "NotSquareSupersetEqual", "NotSubsetEqual", "NotSubsetEqual", "NotSucceeds", "NotSucceeds", "NotSucceedsSlantEqual", "NotSucceedsSlantEqual", "NotSupersetEqual", "NotSupersetEqual", "NotTilde", "NotTilde", "NotTildeEqual", "NotTildeEqual", "NotTildeFullEqual", "NotTildeFullEqual", "NotTildeTilde", "NotTildeTilde", "NotVerticalBar", 8236 "NotVerticalBar", "Nscr", "Nscr", "Ntilde", "Ntilde", "Nu", "Nu", "OElig", "OElig", "Oacute", "Oacute", "Ocirc", "Ocirc", "Ocy", "Ocy", "Odblac", "Odblac", "Ofr", "Ofr", "Ograve", "Ograve", "Omacr", "Omacr", "Omega", "Omega", "Omicron", "Omicron", "Oopf", "Oopf", "OpenCurlyDoubleQuote", "OpenCurlyDoubleQuote", "OpenCurlyQuote", 8237 "OpenCurlyQuote", "Or", "Or", "Oscr", "Oscr", "Oslash", "Oslash", "Otilde", "Otilde", "Otimes", "Otimes", "Ouml", "Ouml", "OverBar", "OverBar", "OverBrace", "OverBrace", "OverBracket", "OverBracket", "OverParenthesis", "OverParenthesis", "PartialD", "PartialD", "Pcy", "Pcy", "Pfr", "Pfr", "Phi", "Phi", "Pi", "Pi", "PlusMinus", 8238 "PlusMinus", "Poincareplane", "Poincareplane", "Popf", "Popf", "Pr", "Pr", "Precedes", "Precedes", "PrecedesEqual", "PrecedesEqual", "PrecedesSlantEqual", "PrecedesSlantEqual", "PrecedesTilde", "PrecedesTilde", "Prime", "Prime", "Product", "Product", "Proportion", "Proportion", "Proportional", "Proportional", "Pscr", "Pscr", 8239 "Psi", "Psi", "QUOT", "QUOT", "Qfr", "Qfr", "Qopf", "Qopf", "Qscr", "Qscr", "RBarr", "RBarr", "REG", "REG", "Racute", "Racute", "Rang", "Rang", "Rarr", "Rarr", "Rarrtl", "Rarrtl", "Rcaron", "Rcaron", "Rcedil", "Rcedil", "Rcy", "Rcy", "Re", "Re", "ReverseElement", "ReverseElement", "ReverseEquilibrium", "ReverseEquilibrium", 8240 "ReverseUpEquilibrium", "ReverseUpEquilibrium", "Rfr", "Rfr", "Rho", "Rho", "RightAngleBracket", "RightAngleBracket", "RightArrow", "RightArrow", "RightArrowBar", "RightArrowBar", "RightArrowLeftArrow", "RightArrowLeftArrow", "RightCeiling", "RightCeiling", "RightDoubleBracket", "RightDoubleBracket", "RightDownTeeVector", 8241 "RightDownTeeVector", "RightDownVector", "RightDownVector", "RightDownVectorBar", "RightDownVectorBar", "RightFloor", "RightFloor", "RightTee", "RightTee", "RightTeeArrow", "RightTeeArrow", "RightTeeVector", "RightTeeVector", "RightTriangle", "RightTriangle", "RightTriangleBar", "RightTriangleBar", "RightTriangleEqual", 8242 "RightTriangleEqual", "RightUpDownVector", "RightUpDownVector", "RightUpTeeVector", "RightUpTeeVector", "RightUpVector", "RightUpVector", "RightUpVectorBar", "RightUpVectorBar", "RightVector", "RightVector", "RightVectorBar", "RightVectorBar", "Rightarrow", "Rightarrow", "Ropf", "Ropf", "RoundImplies", "RoundImplies", 8243 "Rrightarrow", "Rrightarrow", "Rscr", "Rscr", "Rsh", "Rsh", "RuleDelayed", "RuleDelayed", "SHCHcy", "SHCHcy", "SHcy", "SHcy", "SOFTcy", "SOFTcy", "Sacute", "Sacute", "Sc", "Sc", "Scaron", "Scaron", "Scedil", "Scedil", "Scirc", "Scirc", "Scy", "Scy", "Sfr", "Sfr", "ShortDownArrow", "ShortDownArrow", "ShortLeftArrow", "ShortLeftArrow", 8244 "ShortRightArrow", "ShortRightArrow", "ShortUpArrow", "ShortUpArrow", "Sigma", "Sigma", "SmallCircle", "SmallCircle", "Sopf", "Sopf", "Sqrt", "Sqrt", "Square", "Square", "SquareIntersection", "SquareIntersection", "SquareSubset", "SquareSubset", "SquareSubsetEqual", "SquareSubsetEqual", "SquareSuperset", "SquareSuperset", 8245 "SquareSupersetEqual", "SquareSupersetEqual", "SquareUnion", "SquareUnion", "Sscr", "Sscr", "Star", "Star", "Sub", "Sub", "Subset", "Subset", "SubsetEqual", "SubsetEqual", "Succeeds", "Succeeds", "SucceedsEqual", "SucceedsEqual", "SucceedsSlantEqual", "SucceedsSlantEqual", "SucceedsTilde", "SucceedsTilde", "SuchThat", 8246 "SuchThat", "Sum", "Sum", "Sup", "Sup", "Superset", "Superset", "SupersetEqual", "SupersetEqual", "Supset", "Supset", "THORN", "THORN", "TRADE", "TRADE", "TSHcy", "TSHcy", "TScy", "TScy", "Tab", "Tab", "Tau", "Tau", "Tcaron", "Tcaron", "Tcedil", "Tcedil", "Tcy", "Tcy", "Tfr", "Tfr", "Therefore", "Therefore", "Theta", "Theta", 8247 "ThinSpace", "ThinSpace", "Tilde", "Tilde", "TildeEqual", "TildeEqual", "TildeFullEqual", "TildeFullEqual", "TildeTilde", "TildeTilde", "Topf", "Topf", "TripleDot", "TripleDot", "Tscr", "Tscr", "Tstrok", "Tstrok", "Uacute", "Uacute", "Uarr", "Uarr", "Uarrocir", "Uarrocir", "Ubrcy", "Ubrcy", "Ubreve", "Ubreve", "Ucirc", 8248 "Ucirc", "Ucy", "Ucy", "Udblac", "Udblac", "Ufr", "Ufr", "Ugrave", "Ugrave", "Umacr", "Umacr", "UnderBar", "UnderBar", "UnderBrace", "UnderBrace", "UnderBracket", "UnderBracket", "UnderParenthesis", "UnderParenthesis", "Union", "Union", "UnionPlus", "UnionPlus", "Uogon", "Uogon", "Uopf", "Uopf", "UpArrow", "UpArrow", "UpArrowBar", 8249 "UpArrowBar", "UpArrowDownArrow", "UpArrowDownArrow", "UpDownArrow", "UpDownArrow", "UpEquilibrium", "UpEquilibrium", "UpTee", "UpTee", "UpTeeArrow", "UpTeeArrow", "Uparrow", "Uparrow", "Updownarrow", "Updownarrow", "UpperLeftArrow", "UpperLeftArrow", "UpperRightArrow", "UpperRightArrow", "Upsi", "Upsi", "Upsilon", "Upsilon", 8250 "Uring", "Uring", "Uscr", "Uscr", "Utilde", "Utilde", "Uuml", "Uuml", "VDash", "VDash", "Vbar", "Vbar", "Vcy", "Vcy", "Vdash", "Vdash", "Vdashl", "Vdashl", "Vee", "Vee", "Verbar", "Verbar", "Vert", "Vert", "VerticalBar", "VerticalBar", "VerticalLine", "VerticalLine", "VerticalSeparator", "VerticalSeparator", "VerticalTilde", 8251 "VerticalTilde", "VeryThinSpace", "VeryThinSpace", "Vfr", "Vfr", "Vopf", "Vopf", "Vscr", "Vscr", "Vvdash", "Vvdash", "Wcirc", "Wcirc", "Wedge", "Wedge", "Wfr", "Wfr", "Wopf", "Wopf", "Wscr", "Wscr", "Xfr", "Xfr", "Xi", "Xi", "Xopf", "Xopf", "Xscr", "Xscr", "YAcy", "YAcy", "YIcy", "YIcy", "YUcy", "YUcy", "Yacute", "Yacute", 8252 "Ycirc", "Ycirc", "Ycy", "Ycy", "Yfr", "Yfr", "Yopf", "Yopf", "Yscr", "Yscr", "Yuml", "Yuml", "ZHcy", "ZHcy", "Zacute", "Zacute", "Zcaron", "Zcaron", "Zcy", "Zcy", "Zdot", "Zdot", "ZeroWidthSpace", "ZeroWidthSpace", "Zeta", "Zeta", "Zfr", "Zfr", "Zopf", "Zopf", "Zscr", "Zscr", "aacute", "aacute", "abreve", "abreve", "ac", 8253 "ac", "acd", "acd", "acirc", "acirc", "acute", "acute", "acy", "acy", "aelig", "aelig", "af", "af", "afr", "afr", "agrave", "agrave", "alefsym", "alefsym", "aleph", "aleph", "alpha", "alpha", "amacr", "amacr", "amalg", "amalg", "and", "and", "andand", "andand", "andd", "andd", "andslope", "andslope", "andv", "andv", "ang", 8254 "ang", "ange", "ange", "angle", "angle", "angmsd", "angmsd", "angmsdaa", "angmsdaa", "angmsdab", "angmsdab", "angmsdac", "angmsdac", "angmsdad", "angmsdad", "angmsdae", "angmsdae", "angmsdaf", "angmsdaf", "angmsdag", "angmsdag", "angmsdah", "angmsdah", "angrt", "angrt", "angrtvb", "angrtvb", "angrtvbd", "angrtvbd", "angsph", 8255 "angsph", "angst", "angst", "angzarr", "angzarr", "aogon", "aogon", "aopf", "aopf", "ap", "ap", "apE", "apE", "apacir", "apacir", "ape", "ape", "apid", "apid", "approx", "approx", "approxeq", "approxeq", "aring", "aring", "ascr", "ascr", "ast", "ast", "asymp", "asymp", "asympeq", "asympeq", "atilde", "atilde", "auml", 8256 "auml", "awconint", "awconint", "awint", "awint", "bNot", "bNot", "backcong", "backcong", "backepsilon", "backepsilon", "backprime", "backprime", "backsim", "backsim", "backsimeq", "backsimeq", "barvee", "barvee", "barwed", "barwed", "barwedge", "barwedge", "bbrk", "bbrk", "bbrktbrk", "bbrktbrk", "bcong", "bcong", "bcy", 8257 "bcy", "bdquo", "bdquo", "becaus", "becaus", "because", "because", "bemptyv", "bemptyv", "bepsi", "bepsi", "bernou", "bernou", "beta", "beta", "beth", "beth", "between", "between", "bfr", "bfr", "bigcap", "bigcap", "bigcirc", "bigcirc", "bigcup", "bigcup", "bigodot", "bigodot", "bigoplus", "bigoplus", "bigotimes", "bigotimes", 8258 "bigsqcup", "bigsqcup", "bigstar", "bigstar", "bigtriangledown", "bigtriangledown", "bigtriangleup", "bigtriangleup", "biguplus", "biguplus", "bigvee", "bigvee", "bigwedge", "bigwedge", "bkarow", "bkarow", "blacklozenge", "blacklozenge", "blacksquare", "blacksquare", "blacktriangle", "blacktriangle", "blacktriangledown", 8259 "blacktriangledown", "blacktriangleleft", "blacktriangleleft", "blacktriangleright", "blacktriangleright", "blank", "blank", "blk12", "blk12", "blk14", "blk14", "blk34", "blk34", "block", "block", "bnot", "bnot", "bopf", "bopf", "bot", "bot", "bottom", "bottom", "bowtie", "bowtie", "boxDL", "boxDL", "boxDR", "boxDR", "boxDl", 8260 "boxDl", "boxDr", "boxDr", "boxH", "boxH", "boxHD", "boxHD", "boxHU", "boxHU", "boxHd", "boxHd", "boxHu", "boxHu", "boxUL", "boxUL", "boxUR", "boxUR", "boxUl", "boxUl", "boxUr", "boxUr", "boxV", "boxV", "boxVH", "boxVH", "boxVL", "boxVL", "boxVR", "boxVR", "boxVh", "boxVh", "boxVl", "boxVl", "boxVr", "boxVr", "boxbox", 8261 "boxbox", "boxdL", "boxdL", "boxdR", "boxdR", "boxdl", "boxdl", "boxdr", "boxdr", "boxh", "boxh", "boxhD", "boxhD", "boxhU", "boxhU", "boxhd", "boxhd", "boxhu", "boxhu", "boxminus", "boxminus", "boxplus", "boxplus", "boxtimes", "boxtimes", "boxuL", "boxuL", "boxuR", "boxuR", "boxul", "boxul", "boxur", "boxur", "boxv", 8262 "boxv", "boxvH", "boxvH", "boxvL", "boxvL", "boxvR", "boxvR", "boxvh", "boxvh", "boxvl", "boxvl", "boxvr", "boxvr", "bprime", "bprime", "breve", "breve", "brvbar", "brvbar", "bscr", "bscr", "bsemi", "bsemi", "bsim", "bsim", "bsime", "bsime", "bsol", "bsol", "bsolb", "bsolb", "bsolhsub", "bsolhsub", "bull", "bull", "bullet", 8263 "bullet", "bump", "bump", "bumpE", "bumpE", "bumpe", "bumpe", "bumpeq", "bumpeq", "cacute", "cacute", "cap", "cap", "capand", "capand", "capbrcup", "capbrcup", "capcap", "capcap", "capcup", "capcup", "capdot", "capdot", "caret", "caret", "caron", "caron", "ccaps", "ccaps", "ccaron", "ccaron", "ccedil", "ccedil", "ccirc", 8264 "ccirc", "ccups", "ccups", "ccupssm", "ccupssm", "cdot", "cdot", "cedil", "cedil", "cemptyv", "cemptyv", "cent", "cent", "centerdot", "centerdot", "cfr", "cfr", "chcy", "chcy", "check", "check", "checkmark", "checkmark", "chi", "chi", "cir", "cir", "cirE", "cirE", "circ", "circ", "circeq", "circeq", "circlearrowleft", 8265 "circlearrowleft", "circlearrowright", "circlearrowright", "circledR", "circledR", "circledS", "circledS", "circledast", "circledast", "circledcirc", "circledcirc", "circleddash", "circleddash", "cire", "cire", "cirfnint", "cirfnint", "cirmid", "cirmid", "cirscir", "cirscir", "clubs", "clubs", "clubsuit", "clubsuit", "colon", 8266 "colon", "colone", "colone", "coloneq", "coloneq", "comma", "comma", "commat", "commat", "comp", "comp", "compfn", "compfn", "complement", "complement", "complexes", "complexes", "cong", "cong", "congdot", "congdot", "conint", "conint", "copf", "copf", "coprod", "coprod", "copy", "copy", "copysr", "copysr", "crarr", "crarr", 8267 "cross", "cross", "cscr", "cscr", "csub", "csub", "csube", "csube", "csup", "csup", "csupe", "csupe", "ctdot", "ctdot", "cudarrl", "cudarrl", "cudarrr", "cudarrr", "cuepr", "cuepr", "cuesc", "cuesc", "cularr", "cularr", "cularrp", "cularrp", "cup", "cup", "cupbrcap", "cupbrcap", "cupcap", "cupcap", "cupcup", "cupcup", 8268 "cupdot", "cupdot", "cupor", "cupor", "curarr", "curarr", "curarrm", "curarrm", "curlyeqprec", "curlyeqprec", "curlyeqsucc", "curlyeqsucc", "curlyvee", "curlyvee", "curlywedge", "curlywedge", "curren", "curren", "curvearrowleft", "curvearrowleft", "curvearrowright", "curvearrowright", "cuvee", "cuvee", "cuwed", "cuwed", 8269 "cwconint", "cwconint", "cwint", "cwint", "cylcty", "cylcty", "dArr", "dArr", "dHar", "dHar", "dagger", "dagger", "daleth", "daleth", "darr", "darr", "dash", "dash", "dashv", "dashv", "dbkarow", "dbkarow", "dblac", "dblac", "dcaron", "dcaron", "dcy", "dcy", "dd", "dd", "ddagger", "ddagger", "ddarr", "ddarr", "ddotseq", 8270 "ddotseq", "deg", "deg", "delta", "delta", "demptyv", "demptyv", "dfisht", "dfisht", "dfr", "dfr", "dharl", "dharl", "dharr", "dharr", "diam", "diam", "diamond", "diamond", "diamondsuit", "diamondsuit", "diams", "diams", "die", "die", "digamma", "digamma", "disin", "disin", "div", "div", "divide", "divide", "divideontimes", 8271 "divideontimes", "divonx", "divonx", "djcy", "djcy", "dlcorn", "dlcorn", "dlcrop", "dlcrop", "dollar", "dollar", "dopf", "dopf", "dot", "dot", "doteq", "doteq", "doteqdot", "doteqdot", "dotminus", "dotminus", "dotplus", "dotplus", "dotsquare", "dotsquare", "doublebarwedge", "doublebarwedge", "downarrow", "downarrow", "downdownarrows", 8272 "downdownarrows", "downharpoonleft", "downharpoonleft", "downharpoonright", "downharpoonright", "drbkarow", "drbkarow", "drcorn", "drcorn", "drcrop", "drcrop", "dscr", "dscr", "dscy", "dscy", "dsol", "dsol", "dstrok", "dstrok", "dtdot", "dtdot", "dtri", "dtri", "dtrif", "dtrif", "duarr", "duarr", "duhar", "duhar", "dwangle", 8273 "dwangle", "dzcy", "dzcy", "dzigrarr", "dzigrarr", "eDDot", "eDDot", "eDot", "eDot", "eacute", "eacute", "easter", "easter", "ecaron", "ecaron", "ecir", "ecir", "ecirc", "ecirc", "ecolon", "ecolon", "ecy", "ecy", "edot", "edot", "ee", "ee", "efDot", "efDot", "efr", "efr", "eg", "eg", "egrave", "egrave", "egs", "egs", "egsdot", 8274 "egsdot", "el", "el", "elinters", "elinters", "ell", "ell", "els", "els", "elsdot", "elsdot", "emacr", "emacr", "empty", "empty", "emptyset", "emptyset", "emptyv", "emptyv", "emsp", "emsp", "emsp13", "emsp13", "emsp14", "emsp14", "eng", "eng", "ensp", "ensp", "eogon", "eogon", "eopf", "eopf", "epar", "epar", "eparsl", 8275 "eparsl", "eplus", "eplus", "epsi", "epsi", "epsilon", "epsilon", "epsiv", "epsiv", "eqcirc", "eqcirc", "eqcolon", "eqcolon", "eqsim", "eqsim", "eqslantgtr", "eqslantgtr", "eqslantless", "eqslantless", "equals", "equals", "equest", "equest", "equiv", "equiv", "equivDD", "equivDD", "eqvparsl", "eqvparsl", "erDot", "erDot", 8276 "erarr", "erarr", "escr", "escr", "esdot", "esdot", "esim", "esim", "eta", "eta", "eth", "eth", "euml", "euml", "euro", "euro", "excl", "excl", "exist", "exist", "expectation", "expectation", "exponentiale", "exponentiale", "fallingdotseq", "fallingdotseq", "fcy", "fcy", "female", "female", "ffilig", "ffilig", "fflig", 8277 "fflig", "ffllig", "ffllig", "ffr", "ffr", "filig", "filig", "flat", "flat", "fllig", "fllig", "fltns", "fltns", "fnof", "fnof", "fopf", "fopf", "forall", "forall", "fork", "fork", "forkv", "forkv", "fpartint", "fpartint", "frac12", "frac12", "frac13", "frac13", "frac14", "frac14", "frac15", "frac15", "frac16", "frac16", 8278 "frac18", "frac18", "frac23", "frac23", "frac25", "frac25", "frac34", "frac34", "frac35", "frac35", "frac38", "frac38", "frac45", "frac45", "frac56", "frac56", "frac58", "frac58", "frac78", "frac78", "frasl", "frasl", "frown", "frown", "fscr", "fscr", "gE", "gE", "gEl", "gEl", "gacute", "gacute", "gamma", "gamma", "gammad", 8279 "gammad", "gap", "gap", "gbreve", "gbreve", "gcirc", "gcirc", "gcy", "gcy", "gdot", "gdot", "ge", "ge", "gel", "gel", "geq", "geq", "geqq", "geqq", "geqslant", "geqslant", "ges", "ges", "gescc", "gescc", "gesdot", "gesdot", "gesdoto", "gesdoto", "gesdotol", "gesdotol", "gesles", "gesles", "gfr", "gfr", "gg", "gg", "ggg", 8280 "ggg", "gimel", "gimel", "gjcy", "gjcy", "gl", "gl", "glE", "glE", "gla", "gla", "glj", "glj", "gnE", "gnE", "gnap", "gnap", "gnapprox", "gnapprox", "gne", "gne", "gneq", "gneq", "gneqq", "gneqq", "gnsim", "gnsim", "gopf", "gopf", "grave", "grave", "gscr", "gscr", "gsim", "gsim", "gsime", "gsime", "gsiml", "gsiml", "gtcc", 8281 "gtcc", "gtcir", "gtcir", "gtdot", "gtdot", "gtlPar", "gtlPar", "gtquest", "gtquest", "gtrapprox", "gtrapprox", "gtrarr", "gtrarr", "gtrdot", "gtrdot", "gtreqless", "gtreqless", "gtreqqless", "gtreqqless", "gtrless", "gtrless", "gtrsim", "gtrsim", "hArr", "hArr", "hairsp", "hairsp", "half", "half", "hamilt", "hamilt", 8282 "hardcy", "hardcy", "harr", "harr", "harrcir", "harrcir", "harrw", "harrw", "hbar", "hbar", "hcirc", "hcirc", "hearts", "hearts", "heartsuit", "heartsuit", "hellip", "hellip", "hercon", "hercon", "hfr", "hfr", "hksearow", "hksearow", "hkswarow", "hkswarow", "hoarr", "hoarr", "homtht", "homtht", "hookleftarrow", "hookleftarrow", 8283 "hookrightarrow", "hookrightarrow", "hopf", "hopf", "horbar", "horbar", "hscr", "hscr", "hslash", "hslash", "hstrok", "hstrok", "hybull", "hybull", "hyphen", "hyphen", "iacute", "iacute", "ic", "ic", "icirc", "icirc", "icy", "icy", "iecy", "iecy", "iexcl", "iexcl", "iff", "iff", "ifr", "ifr", "igrave", "igrave", "ii", 8284 "ii", "iiiint", "iiiint", "iiint", "iiint", "iinfin", "iinfin", "iiota", "iiota", "ijlig", "ijlig", "imacr", "imacr", "image", "image", "imagline", "imagline", "imagpart", "imagpart", "imath", "imath", "imof", "imof", "imped", "imped", "in", "in", "incare", "incare", "infin", "infin", "infintie", "infintie", "inodot", 8285 "inodot", "int", "int", "intcal", "intcal", "integers", "integers", "intercal", "intercal", "intlarhk", "intlarhk", "intprod", "intprod", "iocy", "iocy", "iogon", "iogon", "iopf", "iopf", "iota", "iota", "iprod", "iprod", "iquest", "iquest", "iscr", "iscr", "isin", "isin", "isinE", "isinE", "isindot", "isindot", "isins", 8286 "isins", "isinsv", "isinsv", "isinv", "isinv", "it", "it", "itilde", "itilde", "iukcy", "iukcy", "iuml", "iuml", "jcirc", "jcirc", "jcy", "jcy", "jfr", "jfr", "jmath", "jmath", "jopf", "jopf", "jscr", "jscr", "jsercy", "jsercy", "jukcy", "jukcy", "kappa", "kappa", "kappav", "kappav", "kcedil", "kcedil", "kcy", "kcy", "kfr", 8287 "kfr", "kgreen", "kgreen", "khcy", "khcy", "kjcy", "kjcy", "kopf", "kopf", "kscr", "kscr", "lAarr", "lAarr", "lArr", "lArr", "lAtail", "lAtail", "lBarr", "lBarr", "lE", "lE", "lEg", "lEg", "lHar", "lHar", "lacute", "lacute", "laemptyv", "laemptyv", "lagran", "lagran", "lambda", "lambda", "lang", "lang", "langd", "langd", 8288 "langle", "langle", "lap", "lap", "laquo", "laquo", "larr", "larr", "larrb", "larrb", "larrbfs", "larrbfs", "larrfs", "larrfs", "larrhk", "larrhk", "larrlp", "larrlp", "larrpl", "larrpl", "larrsim", "larrsim", "larrtl", "larrtl", "lat", "lat", "latail", "latail", "late", "late", "lbarr", "lbarr", "lbbrk", "lbbrk", "lbrace", 8289 "lbrace", "lbrack", "lbrack", "lbrke", "lbrke", "lbrksld", "lbrksld", "lbrkslu", "lbrkslu", "lcaron", "lcaron", "lcedil", "lcedil", "lceil", "lceil", "lcub", "lcub", "lcy", "lcy", "ldca", "ldca", "ldquo", "ldquo", "ldquor", "ldquor", "ldrdhar", "ldrdhar", "ldrushar", "ldrushar", "ldsh", "ldsh", "le", "le", "leftarrow", 8290 "leftarrow", "leftarrowtail", "leftarrowtail", "leftharpoondown", "leftharpoondown", "leftharpoonup", "leftharpoonup", "leftleftarrows", "leftleftarrows", "leftrightarrow", "leftrightarrow", "leftrightarrows", "leftrightarrows", "leftrightharpoons", "leftrightharpoons", "leftrightsquigarrow", "leftrightsquigarrow", "leftthreetimes", 8291 "leftthreetimes", "leg", "leg", "leq", "leq", "leqq", "leqq", "leqslant", "leqslant", "les", "les", "lescc", "lescc", "lesdot", "lesdot", "lesdoto", "lesdoto", "lesdotor", "lesdotor", "lesges", "lesges", "lessapprox", "lessapprox", "lessdot", "lessdot", "lesseqgtr", "lesseqgtr", "lesseqqgtr", "lesseqqgtr", "lessgtr", "lessgtr", 8292 "lesssim", "lesssim", "lfisht", "lfisht", "lfloor", "lfloor", "lfr", "lfr", "lg", "lg", "lgE", "lgE", "lhard", "lhard", "lharu", "lharu", "lharul", "lharul", "lhblk", "lhblk", "ljcy", "ljcy", "ll", "ll", "llarr", "llarr", "llcorner", "llcorner", "llhard", "llhard", "lltri", "lltri", "lmidot", "lmidot", "lmoust", "lmoust", 8293 "lmoustache", "lmoustache", "lnE", "lnE", "lnap", "lnap", "lnapprox", "lnapprox", "lne", "lne", "lneq", "lneq", "lneqq", "lneqq", "lnsim", "lnsim", "loang", "loang", "loarr", "loarr", "lobrk", "lobrk", "longleftarrow", "longleftarrow", "longleftrightarrow", "longleftrightarrow", "longmapsto", "longmapsto", "longrightarrow", 8294 "longrightarrow", "looparrowleft", "looparrowleft", "looparrowright", "looparrowright", "lopar", "lopar", "lopf", "lopf", "loplus", "loplus", "lotimes", "lotimes", "lowast", "lowast", "lowbar", "lowbar", "loz", "loz", "lozenge", "lozenge", "lozf", "lozf", "lpar", "lpar", "lparlt", "lparlt", "lrarr", "lrarr", "lrcorner", 8295 "lrcorner", "lrhar", "lrhar", "lrhard", "lrhard", "lrm", "lrm", "lrtri", "lrtri", "lsaquo", "lsaquo", "lscr", "lscr", "lsh", "lsh", "lsim", "lsim", "lsime", "lsime", "lsimg", "lsimg", "lsqb", "lsqb", "lsquo", "lsquo", "lsquor", "lsquor", "lstrok", "lstrok", "ltcc", "ltcc", "ltcir", "ltcir", "ltdot", "ltdot", "lthree", 8296 "lthree", "ltimes", "ltimes", "ltlarr", "ltlarr", "ltquest", "ltquest", "ltrPar", "ltrPar", "ltri", "ltri", "ltrie", "ltrie", "ltrif", "ltrif", "lurdshar", "lurdshar", "luruhar", "luruhar", "mDDot", "mDDot", "macr", "macr", "male", "male", "malt", "malt", "maltese", "maltese", "map", "map", "mapsto", "mapsto", "mapstodown", 8297 "mapstodown", "mapstoleft", "mapstoleft", "mapstoup", "mapstoup", "marker", "marker", "mcomma", "mcomma", "mcy", "mcy", "mdash", "mdash", "measuredangle", "measuredangle", "mfr", "mfr", "mho", "mho", "micro", "micro", "mid", "mid", "midast", "midast", "midcir", "midcir", "middot", "middot", "minus", "minus", "minusb", 8298 "minusb", "minusd", "minusd", "minusdu", "minusdu", "mlcp", "mlcp", "mldr", "mldr", "mnplus", "mnplus", "models", "models", "mopf", "mopf", "mp", "mp", "mscr", "mscr", "mstpos", "mstpos", "mu", "mu", "multimap", "multimap", "mumap", "mumap", "nLeftarrow", "nLeftarrow", "nLeftrightarrow", "nLeftrightarrow", "nRightarrow", 8299 "nRightarrow", "nVDash", "nVDash", "nVdash", "nVdash", "nabla", "nabla", "nacute", "nacute", "nap", "nap", "napos", "napos", "napprox", "napprox", "natur", "natur", "natural", "natural", "naturals", "naturals", "nbsp", "nbsp", "ncap", "ncap", "ncaron", "ncaron", "ncedil", "ncedil", "ncong", "ncong", "ncup", "ncup", "ncy", 8300 "ncy", "ndash", "ndash", "ne", "ne", "neArr", "neArr", "nearhk", "nearhk", "nearr", "nearr", "nearrow", "nearrow", "nequiv", "nequiv", "nesear", "nesear", "nexist", "nexist", "nexists", "nexists", "nfr", "nfr", "nge", "nge", "ngeq", "ngeq", "ngsim", "ngsim", "ngt", "ngt", "ngtr", "ngtr", "nhArr", "nhArr", "nharr", "nharr", 8301 "nhpar", "nhpar", "ni", "ni", "nis", "nis", "nisd", "nisd", "niv", "niv", "njcy", "njcy", "nlArr", "nlArr", "nlarr", "nlarr", "nldr", "nldr", "nle", "nle", "nleftarrow", "nleftarrow", "nleftrightarrow", "nleftrightarrow", "nleq", "nleq", "nless", "nless", "nlsim", "nlsim", "nlt", "nlt", "nltri", "nltri", "nltrie", "nltrie", 8302 "nmid", "nmid", "nopf", "nopf", "not", "not", "notin", "notin", "notinva", "notinva", "notinvb", "notinvb", "notinvc", "notinvc", "notni", "notni", "notniva", "notniva", "notnivb", "notnivb", "notnivc", "notnivc", "npar", "npar", "nparallel", "nparallel", "npolint", "npolint", "npr", "npr", "nprcue", "nprcue", "nprec", 8303 "nprec", "nrArr", "nrArr", "nrarr", "nrarr", "nrightarrow", "nrightarrow", "nrtri", "nrtri", "nrtrie", "nrtrie", "nsc", "nsc", "nsccue", "nsccue", "nscr", "nscr", "nshortmid", "nshortmid", "nshortparallel", "nshortparallel", "nsim", "nsim", "nsime", "nsime", "nsimeq", "nsimeq", "nsmid", "nsmid", "nspar", "nspar", "nsqsube", 8304 "nsqsube", "nsqsupe", "nsqsupe", "nsub", "nsub", "nsube", "nsube", "nsubseteq", "nsubseteq", "nsucc", "nsucc", "nsup", "nsup", "nsupe", "nsupe", "nsupseteq", "nsupseteq", "ntgl", "ntgl", "ntilde", "ntilde", "ntlg", "ntlg", "ntriangleleft", "ntriangleleft", "ntrianglelefteq", "ntrianglelefteq", "ntriangleright", "ntriangleright", 8305 "ntrianglerighteq", "ntrianglerighteq", "nu", "nu", "num", "num", "numero", "numero", "numsp", "numsp", "nvDash", "nvDash", "nvHarr", "nvHarr", "nvdash", "nvdash", "nvinfin", "nvinfin", "nvlArr", "nvlArr", "nvrArr", "nvrArr", "nwArr", "nwArr", "nwarhk", "nwarhk", "nwarr", "nwarr", "nwarrow", "nwarrow", "nwnear", "nwnear", 8306 "oS", "oS", "oacute", "oacute", "oast", "oast", "ocir", "ocir", "ocirc", "ocirc", "ocy", "ocy", "odash", "odash", "odblac", "odblac", "odiv", "odiv", "odot", "odot", "odsold", "odsold", "oelig", "oelig", "ofcir", "ofcir", "ofr", "ofr", "ogon", "ogon", "ograve", "ograve", "ogt", "ogt", "ohbar", "ohbar", "ohm", "ohm", "oint", 8307 "oint", "olarr", "olarr", "olcir", "olcir", "olcross", "olcross", "oline", "oline", "olt", "olt", "omacr", "omacr", "omega", "omega", "omicron", "omicron", "omid", "omid", "ominus", "ominus", "oopf", "oopf", "opar", "opar", "operp", "operp", "oplus", "oplus", "or", "or", "orarr", "orarr", "ord", "ord", "order", "order", 8308 "orderof", "orderof", "ordf", "ordf", "ordm", "ordm", "origof", "origof", "oror", "oror", "orslope", "orslope", "orv", "orv", "oscr", "oscr", "oslash", "oslash", "osol", "osol", "otilde", "otilde", "otimes", "otimes", "otimesas", "otimesas", "ouml", "ouml", "ovbar", "ovbar", "par", "par", "para", "para", "parallel", "parallel", 8309 "parsim", "parsim", "parsl", "parsl", "part", "part", "pcy", "pcy", "percnt", "percnt", "period", "period", "permil", "permil", "perp", "perp", "pertenk", "pertenk", "pfr", "pfr", "phi", "phi", "phiv", "phiv", "phmmat", "phmmat", "phone", "phone", "pi", "pi", "pitchfork", "pitchfork", "piv", "piv", "planck", "planck", 8310 "planckh", "planckh", "plankv", "plankv", "plus", "plus", "plusacir", "plusacir", "plusb", "plusb", "pluscir", "pluscir", "plusdo", "plusdo", "plusdu", "plusdu", "pluse", "pluse", "plusmn", "plusmn", "plussim", "plussim", "plustwo", "plustwo", "pm", "pm", "pointint", "pointint", "popf", "popf", "pound", "pound", "pr", 8311 "pr", "prE", "prE", "prap", "prap", "prcue", "prcue", "pre", "pre", "prec", "prec", "precapprox", "precapprox", "preccurlyeq", "preccurlyeq", "preceq", "preceq", "precnapprox", "precnapprox", "precneqq", "precneqq", "precnsim", "precnsim", "precsim", "precsim", "prime", "prime", "primes", "primes", "prnE", "prnE", "prnap", 8312 "prnap", "prnsim", "prnsim", "prod", "prod", "profalar", "profalar", "profline", "profline", "profsurf", "profsurf", "prop", "prop", "propto", "propto", "prsim", "prsim", "prurel", "prurel", "pscr", "pscr", "psi", "psi", "puncsp", "puncsp", "qfr", "qfr", "qint", "qint", "qopf", "qopf", "qprime", "qprime", "qscr", "qscr", 8313 "quaternions", "quaternions", "quatint", "quatint", "quest", "quest", "questeq", "questeq", "rAarr", "rAarr", "rArr", "rArr", "rAtail", "rAtail", "rBarr", "rBarr", "rHar", "rHar", "racute", "racute", "radic", "radic", "raemptyv", "raemptyv", "rang", "rang", "rangd", "rangd", "range", "range", "rangle", "rangle", "raquo", 8314 "raquo", "rarr", "rarr", "rarrap", "rarrap", "rarrb", "rarrb", "rarrbfs", "rarrbfs", "rarrc", "rarrc", "rarrfs", "rarrfs", "rarrhk", "rarrhk", "rarrlp", "rarrlp", "rarrpl", "rarrpl", "rarrsim", "rarrsim", "rarrtl", "rarrtl", "rarrw", "rarrw", "ratail", "ratail", "ratio", "ratio", "rationals", "rationals", "rbarr", "rbarr", 8315 "rbbrk", "rbbrk", "rbrace", "rbrace", "rbrack", "rbrack", "rbrke", "rbrke", "rbrksld", "rbrksld", "rbrkslu", "rbrkslu", "rcaron", "rcaron", "rcedil", "rcedil", "rceil", "rceil", "rcub", "rcub", "rcy", "rcy", "rdca", "rdca", "rdldhar", "rdldhar", "rdquo", "rdquo", "rdquor", "rdquor", "rdsh", "rdsh", "real", "real", "realine", 8316 "realine", "realpart", "realpart", "reals", "reals", "rect", "rect", "reg", "reg", "rfisht", "rfisht", "rfloor", "rfloor", "rfr", "rfr", "rhard", "rhard", "rharu", "rharu", "rharul", "rharul", "rho", "rho", "rhov", "rhov", "rightarrow", "rightarrow", "rightarrowtail", "rightarrowtail", "rightharpoondown", "rightharpoondown", 8317 "rightharpoonup", "rightharpoonup", "rightleftarrows", "rightleftarrows", "rightleftharpoons", "rightleftharpoons", "rightrightarrows", "rightrightarrows", "rightsquigarrow", "rightsquigarrow", "rightthreetimes", "rightthreetimes", "ring", "ring", "risingdotseq", "risingdotseq", "rlarr", "rlarr", "rlhar", "rlhar", "rlm", 8318 "rlm", "rmoust", "rmoust", "rmoustache", "rmoustache", "rnmid", "rnmid", "roang", "roang", "roarr", "roarr", "robrk", "robrk", "ropar", "ropar", "ropf", "ropf", "roplus", "roplus", "rotimes", "rotimes", "rpar", "rpar", "rpargt", "rpargt", "rppolint", "rppolint", "rrarr", "rrarr", "rsaquo", "rsaquo", "rscr", "rscr", "rsh", 8319 "rsh", "rsqb", "rsqb", "rsquo", "rsquo", "rsquor", "rsquor", "rthree", "rthree", "rtimes", "rtimes", "rtri", "rtri", "rtrie", "rtrie", "rtrif", "rtrif", "rtriltri", "rtriltri", "ruluhar", "ruluhar", "rx", "rx", "sacute", "sacute", "sbquo", "sbquo", "sc", "sc", "scE", "scE", "scap", "scap", "scaron", "scaron", "sccue", 8320 "sccue", "sce", "sce", "scedil", "scedil", "scirc", "scirc", "scnE", "scnE", "scnap", "scnap", "scnsim", "scnsim", "scpolint", "scpolint", "scsim", "scsim", "scy", "scy", "sdot", "sdot", "sdotb", "sdotb", "sdote", "sdote", "seArr", "seArr", "searhk", "searhk", "searr", "searr", "searrow", "searrow", "sect", "sect", "semi", 8321 "semi", "seswar", "seswar", "setminus", "setminus", "setmn", "setmn", "sext", "sext", "sfr", "sfr", "sfrown", "sfrown", "sharp", "sharp", "shchcy", "shchcy", "shcy", "shcy", "shortmid", "shortmid", "shortparallel", "shortparallel", "shy", "shy", "sigma", "sigma", "sigmaf", "sigmaf", "sigmav", "sigmav", "sim", "sim", "simdot", 8322 "simdot", "sime", "sime", "simeq", "simeq", "simg", "simg", "simgE", "simgE", "siml", "siml", "simlE", "simlE", "simne", "simne", "simplus", "simplus", "simrarr", "simrarr", "slarr", "slarr", "smallsetminus", "smallsetminus", "smashp", "smashp", "smeparsl", "smeparsl", "smid", "smid", "smile", "smile", "smt", "smt", "smte", 8323 "smte", "softcy", "softcy", "sol", "sol", "solb", "solb", "solbar", "solbar", "sopf", "sopf", "spades", "spades", "spadesuit", "spadesuit", "spar", "spar", "sqcap", "sqcap", "sqcup", "sqcup", "sqsub", "sqsub", "sqsube", "sqsube", "sqsubset", "sqsubset", "sqsubseteq", "sqsubseteq", "sqsup", "sqsup", "sqsupe", "sqsupe", 8324 "sqsupset", "sqsupset", "sqsupseteq", "sqsupseteq", "squ", "squ", "square", "square", "squarf", "squarf", "squf", "squf", "srarr", "srarr", "sscr", "sscr", "ssetmn", "ssetmn", "ssmile", "ssmile", "sstarf", "sstarf", "star", "star", "starf", "starf", "straightepsilon", "straightepsilon", "straightphi", "straightphi", "strns", 8325 "strns", "sub", "sub", "subE", "subE", "subdot", "subdot", "sube", "sube", "subedot", "subedot", "submult", "submult", "subnE", "subnE", "subne", "subne", "subplus", "subplus", "subrarr", "subrarr", "subset", "subset", "subseteq", "subseteq", "subseteqq", "subseteqq", "subsetneq", "subsetneq", "subsetneqq", "subsetneqq", 8326 "subsim", "subsim", "subsub", "subsub", "subsup", "subsup", "succ", "succ", "succapprox", "succapprox", "succcurlyeq", "succcurlyeq", "succeq", "succeq", "succnapprox", "succnapprox", "succneqq", "succneqq", "succnsim", "succnsim", "succsim", "succsim", "sum", "sum", "sung", "sung", "sup", "sup", "sup1", "sup1", "sup2", 8327 "sup2", "sup3", "sup3", "supE", "supE", "supdot", "supdot", "supdsub", "supdsub", "supe", "supe", "supedot", "supedot", "suphsol", "suphsol", "suphsub", "suphsub", "suplarr", "suplarr", "supmult", "supmult", "supnE", "supnE", "supne", "supne", "supplus", "supplus", "supset", "supset", "supseteq", "supseteq", "supseteqq", 8328 "supseteqq", "supsetneq", "supsetneq", "supsetneqq", "supsetneqq", "supsim", "supsim", "supsub", "supsub", "supsup", "supsup", "swArr", "swArr", "swarhk", "swarhk", "swarr", "swarr", "swarrow", "swarrow", "swnwar", "swnwar", "szlig", "szlig", "target", "target", "tau", "tau", "tbrk", "tbrk", "tcaron", "tcaron", "tcedil", 8329 "tcedil", "tcy", "tcy", "tdot", "tdot", "telrec", "telrec", "tfr", "tfr", "there4", "there4", "therefore", "therefore", "theta", "theta", "thetasym", "thetasym", "thetav", "thetav", "thickapprox", "thickapprox", "thicksim", "thicksim", "thinsp", "thinsp", "thkap", "thkap", "thksim", "thksim", "thorn", "thorn", "tilde", 8330 "tilde", "times", "times", "timesb", "timesb", "timesbar", "timesbar", "timesd", "timesd", "tint", "tint", "toea", "toea", "top", "top", "topbot", "topbot", "topcir", "topcir", "topf", "topf", "topfork", "topfork", "tosa", "tosa", "tprime", "tprime", "trade", "trade", "triangle", "triangle", "triangledown", "triangledown", 8331 "triangleleft", "triangleleft", "trianglelefteq", "trianglelefteq", "triangleq", "triangleq", "triangleright", "triangleright", "trianglerighteq", "trianglerighteq", "tridot", "tridot", "trie", "trie", "triminus", "triminus", "triplus", "triplus", "trisb", "trisb", "tritime", "tritime", "trpezium", "trpezium", "tscr", 8332 "tscr", "tscy", "tscy", "tshcy", "tshcy", "tstrok", "tstrok", "twixt", "twixt", "twoheadleftarrow", "twoheadleftarrow", "twoheadrightarrow", "twoheadrightarrow", "uArr", "uArr", "uHar", "uHar", "uacute", "uacute", "uarr", "uarr", "ubrcy", "ubrcy", "ubreve", "ubreve", "ucirc", "ucirc", "ucy", "ucy", "udarr", "udarr", "udblac", 8333 "udblac", "udhar", "udhar", "ufisht", "ufisht", "ufr", "ufr", "ugrave", "ugrave", "uharl", "uharl", "uharr", "uharr", "uhblk", "uhblk", "ulcorn", "ulcorn", "ulcorner", "ulcorner", "ulcrop", "ulcrop", "ultri", "ultri", "umacr", "umacr", "uml", "uml", "uogon", "uogon", "uopf", "uopf", "uparrow", "uparrow", "updownarrow", 8334 "updownarrow", "upharpoonleft", "upharpoonleft", "upharpoonright", "upharpoonright", "uplus", "uplus", "upsi", "upsi", "upsih", "upsih", "upsilon", "upsilon", "upuparrows", "upuparrows", "urcorn", "urcorn", "urcorner", "urcorner", "urcrop", "urcrop", "uring", "uring", "urtri", "urtri", "uscr", "uscr", "utdot", "utdot", 8335 "utilde", "utilde", "utri", "utri", "utrif", "utrif", "uuarr", "uuarr", "uuml", "uuml", "uwangle", "uwangle", "vArr", "vArr", "vBar", "vBar", "vBarv", "vBarv", "vDash", "vDash", "vangrt", "vangrt", "varepsilon", "varepsilon", "varkappa", "varkappa", "varnothing", "varnothing", "varphi", "varphi", "varpi", "varpi", "varpropto", 8336 "varpropto", "varr", "varr", "varrho", "varrho", "varsigma", "varsigma", "vartheta", "vartheta", "vartriangleleft", "vartriangleleft", "vartriangleright", "vartriangleright", "vcy", "vcy", "vdash", "vdash", "vee", "vee", "veebar", "veebar", "veeeq", "veeeq", "vellip", "vellip", "verbar", "verbar", "vert", "vert", "vfr", 8337 "vfr", "vltri", "vltri", "vopf", "vopf", "vprop", "vprop", "vrtri", "vrtri", "vscr", "vscr", "vzigzag", "vzigzag", "wcirc", "wcirc", "wedbar", "wedbar", "wedge", "wedge", "wedgeq", "wedgeq", "weierp", "weierp", "wfr", "wfr", "wopf", "wopf", "wp", "wp", "wr", "wr", "wreath", "wreath", "wscr", "wscr", "xcap", "xcap", "xcirc", 8338 "xcirc", "xcup", "xcup", "xdtri", "xdtri", "xfr", "xfr", "xhArr", "xhArr", "xharr", "xharr", "xi", "xi", "xlArr", "xlArr", "xlarr", "xlarr", "xmap", "xmap", "xnis", "xnis", "xodot", "xodot", "xopf", "xopf", "xoplus", "xoplus", "xotime", "xotime", "xrArr", "xrArr", "xrarr", "xrarr", "xscr", "xscr", "xsqcup", "xsqcup", "xuplus", 8339 "xuplus", "xutri", "xutri", "xvee", "xvee", "xwedge", "xwedge", "yacute", "yacute", "yacy", "yacy", "ycirc", "ycirc", "ycy", "ycy", "yen", "yen", "yfr", "yfr", "yicy", "yicy", "yopf", "yopf", "yscr", "yscr", "yucy", "yucy", "yuml", "yuml", "zacute", "zacute", "zcaron", "zcaron", "zcy", "zcy", "zdot", "zdot", "zeetrf", 8340 "zeetrf", "zeta", "zeta", "zfr", "zfr", "zhcy", "zhcy", "zigrarr", "zigrarr", "zopf", "zopf", "zscr", "zscr", "zwj", "zwj", "zwnj", "zwnj", ]; 8341 8342 immutable dchar[] availableEntitiesValues = 8343 ['\u00c6', '\u00c6', '\u0026', '\u0026', '\u00c1', '\u00c1', '\u0102', '\u0102', '\u00c2', '\u00c2', '\u0410', '\u0410', '\U0001d504', '\U0001d504', '\u00c0', '\u00c0', '\u0391', '\u0391', '\u0100', '\u0100', '\u2a53', '\u2a53', '\u0104', '\u0104', '\U0001d538', '\U0001d538', '\u2061', '\u2061', '\u00c5', '\u00c5', '\U0001d49c', '\U0001d49c', '\u2254', '\u2254', '\u00c3', 8344 '\u00c3', '\u00c4', '\u00c4', '\u2216', '\u2216', '\u2ae7', '\u2ae7', '\u2306', '\u2306', '\u0411', '\u0411', '\u2235', '\u2235', '\u212c', '\u212c', '\u0392', '\u0392', '\U0001d505', '\U0001d505', '\U0001d539', '\U0001d539', '\u02d8', '\u02d8', '\u212c', '\u212c', '\u224e', '\u224e', '\u0427', '\u0427', '\u00a9', '\u00a9', '\u0106', '\u0106', '\u22d2', '\u22d2', '\u2145', 8345 '\u2145', '\u212d', '\u212d', '\u010c', '\u010c', '\u00c7', '\u00c7', '\u0108', '\u0108', '\u2230', '\u2230', '\u010a', '\u010a', '\u00b8', '\u00b8', '\u00b7', '\u00b7', '\u212d', '\u212d', '\u03a7', '\u03a7', '\u2299', '\u2299', '\u2296', '\u2296', '\u2295', '\u2295', '\u2297', '\u2297', 8346 '\u2232', '\u2232', '\u201d', '\u201d', '\u2019', '\u2019', '\u2237', '\u2237', '\u2a74', '\u2a74', '\u2261', '\u2261', '\u222f', '\u222f', '\u222e', '\u222e', '\u2102', '\u2102', '\u2210', '\u2210', '\u2233', 8347 '\u2233', '\u2a2f', '\u2a2f', '\U0001d49e', '\U0001d49e', '\u22d3', '\u22d3', '\u224d', '\u224d', '\u2145', '\u2145', '\u2911', '\u2911', '\u0402', '\u0402', '\u0405', '\u0405', '\u040f', '\u040f', '\u2021', '\u2021', '\u21a1', '\u21a1', '\u2ae4', '\u2ae4', '\u010e', '\u010e', '\u0414', '\u0414', '\u2207', '\u2207', '\u0394', '\u0394', '\U0001d507', '\U0001d507', 8348 '\u00b4', '\u00b4', '\u02d9', '\u02d9', '\u02dd', '\u02dd', '\u0060', '\u0060', '\u02dc', '\u02dc', '\u22c4', '\u22c4', '\u2146', '\u2146', '\U0001d53b', '\U0001d53b', '\u00a8', '\u00a8', '\u20dc', '\u20dc', '\u2250', 8349 '\u2250', '\u222f', '\u222f', '\u00a8', '\u00a8', '\u21d3', '\u21d3', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u2ae4', '\u2ae4', '\u27f8', '\u27f8', '\u27fa', 8350 '\u27fa', '\u27f9', '\u27f9', '\u21d2', '\u21d2', '\u22a8', '\u22a8', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2225', '\u2225', '\u2193', '\u2193', '\u2913', '\u2913', 8351 '\u21f5', '\u21f5', '\u0311', '\u0311', '\u2950', '\u2950', '\u295e', '\u295e', '\u21bd', '\u21bd', '\u2956', '\u2956', '\u295f', '\u295f', '\u21c1', '\u21c1', '\u2957', 8352 '\u2957', '\u22a4', '\u22a4', '\u21a7', '\u21a7', '\u21d3', '\u21d3', '\U0001d49f', '\U0001d49f', '\u0110', '\u0110', '\u014a', '\u014a', '\u00d0', '\u00d0', '\u00c9', '\u00c9', '\u011a', '\u011a', '\u00ca', '\u00ca', '\u042d', '\u042d', '\u0116', '\u0116', '\U0001d508', '\U0001d508', '\u00c8', '\u00c8', '\u2208', '\u2208', '\u0112', '\u0112', 8353 '\u25fb', '\u25fb', '\u25ab', '\u25ab', '\u0118', '\u0118', '\U0001d53c', '\U0001d53c', '\u0395', '\u0395', '\u2a75', '\u2a75', '\u2242', '\u2242', '\u21cc', '\u21cc', '\u2130', '\u2130', '\u2a73', '\u2a73', '\u0397', '\u0397', '\u00cb', '\u00cb', '\u2203', '\u2203', '\u2147', '\u2147', 8354 '\u0424', '\u0424', '\U0001d509', '\U0001d509', '\u25fc', '\u25fc', '\u25aa', '\u25aa', '\U0001d53d', '\U0001d53d', '\u2200', '\u2200', '\u2131', '\u2131', '\u2131', '\u2131', '\u0403', '\u0403', '\u003e', '\u003e', '\u0393', '\u0393', '\u03dc', '\u03dc', '\u011e', '\u011e', '\u0122', '\u0122', '\u011c', '\u011c', 8355 '\u0413', '\u0413', '\u0120', '\u0120', '\U0001d50a', '\U0001d50a', '\u22d9', '\u22d9', '\U0001d53e', '\U0001d53e', '\u2265', '\u2265', '\u22db', '\u22db', '\u2267', '\u2267', '\u2aa2', '\u2aa2', '\u2277', '\u2277', '\u2a7e', '\u2a7e', '\u2273', '\u2273', 8356 '\U0001d4a2', '\U0001d4a2', '\u226b', '\u226b', '\u042a', '\u042a', '\u02c7', '\u02c7', '\u005e', '\u005e', '\u0124', '\u0124', '\u210c', '\u210c', '\u210b', '\u210b', '\u210d', '\u210d', '\u2500', '\u2500', '\u210b', '\u210b', '\u0126', '\u0126', '\u224e', '\u224e', '\u224f', '\u224f', '\u0415', '\u0415', '\u0132', '\u0132', 8357 '\u0401', '\u0401', '\u00cd', '\u00cd', '\u00ce', '\u00ce', '\u0418', '\u0418', '\u0130', '\u0130', '\u2111', '\u2111', '\u00cc', '\u00cc', '\u2111', '\u2111', '\u012a', '\u012a', '\u2148', '\u2148', '\u21d2', '\u21d2', '\u222c', '\u222c', '\u222b', '\u222b', '\u22c2', '\u22c2', '\u2063', '\u2063', '\u2062', 8358 '\u2062', '\u012e', '\u012e', '\U0001d540', '\U0001d540', '\u0399', '\u0399', '\u2110', '\u2110', '\u0128', '\u0128', '\u0406', '\u0406', '\u00cf', '\u00cf', '\u0134', '\u0134', '\u0419', '\u0419', '\U0001d50d', '\U0001d50d', '\U0001d541', '\U0001d541', '\U0001d4a5', '\U0001d4a5', '\u0408', '\u0408', '\u0404', '\u0404', '\u0425', '\u0425', '\u040c', '\u040c', '\u039a', '\u039a', '\u0136', '\u0136', 8359 '\u041a', '\u041a', '\U0001d50e', '\U0001d50e', '\U0001d542', '\U0001d542', '\U0001d4a6', '\U0001d4a6', '\u0409', '\u0409', '\u003c', '\u003c', '\u0139', '\u0139', '\u039b', '\u039b', '\u27ea', '\u27ea', '\u2112', '\u2112', '\u219e', '\u219e', '\u013d', '\u013d', '\u013b', '\u013b', '\u041b', '\u041b', '\u27e8', '\u27e8', '\u2190', '\u2190', '\u21e4', 8360 '\u21e4', '\u21c6', '\u21c6', '\u2308', '\u2308', '\u27e6', '\u27e6', '\u2961', '\u2961', '\u21c3', '\u21c3', '\u2959', '\u2959', '\u230a', '\u230a', '\u2194', '\u2194', '\u294e', 8361 '\u294e', '\u22a3', '\u22a3', '\u21a4', '\u21a4', '\u295a', '\u295a', '\u22b2', '\u22b2', '\u29cf', '\u29cf', '\u22b4', '\u22b4', '\u2951', '\u2951', '\u2960', '\u2960', '\u21bf', '\u21bf', 8362 '\u2958', '\u2958', '\u21bc', '\u21bc', '\u2952', '\u2952', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u22da', '\u22da', '\u2266', '\u2266', '\u2276', '\u2276', '\u2aa1', '\u2aa1', '\u2a7d', '\u2a7d', 8363 '\u2272', '\u2272', '\U0001d50f', '\U0001d50f', '\u22d8', '\u22d8', '\u21da', '\u21da', '\u013f', '\u013f', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27f6', '\u27f6', '\u27f8', '\u27f8', '\u27fa', '\u27fa', '\u27f9', '\u27f9', 8364 '\U0001d543', '\U0001d543', '\u2199', '\u2199', '\u2198', '\u2198', '\u2112', '\u2112', '\u21b0', '\u21b0', '\u0141', '\u0141', '\u226a', '\u226a', '\u2905', '\u2905', '\u041c', '\u041c', '\u205f', '\u205f', '\u2133', '\u2133', '\U0001d510', '\U0001d510', '\u2213', '\u2213', '\U0001d544', '\U0001d544', '\u2133', '\u2133', '\u039c', '\u039c', 8365 '\u040a', '\u040a', '\u0143', '\u0143', '\u0147', '\u0147', '\u0145', '\u0145', '\u041d', '\u041d', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u226b', '\u226b', 8366 '\u226a', '\u226a', '\u000a', '\u000a', '\U0001d511', '\U0001d511', '\u2060', '\u2060', '\u00a0', '\u00a0', '\u2115', '\u2115', '\u2aec', '\u2aec', '\u2262', '\u2262', '\u226d', '\u226d', '\u2226', '\u2226', '\u2209', '\u2209', '\u2260', '\u2260', 8367 '\u2204', '\u2204', '\u226f', '\u226f', '\u2271', '\u2271', '\u2279', '\u2279', '\u2275', '\u2275', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u226e', '\u226e', '\u2270', '\u2270', '\u2278', 8368 '\u2278', '\u2274', '\u2274', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u220c', '\u220c', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u22e2', '\u22e2', '\u22e3', 8369 '\u22e3', '\u2288', '\u2288', '\u2281', '\u2281', '\u22e1', '\u22e1', '\u2289', '\u2289', '\u2241', '\u2241', '\u2244', '\u2244', '\u2247', '\u2247', '\u2249', '\u2249', '\u2224', 8370 '\u2224', '\U0001d4a9', '\U0001d4a9', '\u00d1', '\u00d1', '\u039d', '\u039d', '\u0152', '\u0152', '\u00d3', '\u00d3', '\u00d4', '\u00d4', '\u041e', '\u041e', '\u0150', '\u0150', '\U0001d512', '\U0001d512', '\u00d2', '\u00d2', '\u014c', '\u014c', '\u03a9', '\u03a9', '\u039f', '\u039f', '\U0001d546', '\U0001d546', '\u201c', '\u201c', '\u2018', 8371 '\u2018', '\u2a54', '\u2a54', '\U0001d4aa', '\U0001d4aa', '\u00d8', '\u00d8', '\u00d5', '\u00d5', '\u2a37', '\u2a37', '\u00d6', '\u00d6', '\u203e', '\u203e', '\u23de', '\u23de', '\u23b4', '\u23b4', '\u23dc', '\u23dc', '\u2202', '\u2202', '\u041f', '\u041f', '\U0001d513', '\U0001d513', '\u03a6', '\u03a6', '\u03a0', '\u03a0', '\u00b1', 8372 '\u00b1', '\u210c', '\u210c', '\u2119', '\u2119', '\u2abb', '\u2abb', '\u227a', '\u227a', '\u2aaf', '\u2aaf', '\u227c', '\u227c', '\u227e', '\u227e', '\u2033', '\u2033', '\u220f', '\u220f', '\u2237', '\u2237', '\u221d', '\u221d', '\U0001d4ab', '\U0001d4ab', 8373 '\u03a8', '\u03a8', '\u0022', '\u0022', '\U0001d514', '\U0001d514', '\u211a', '\u211a', '\U0001d4ac', '\U0001d4ac', '\u2910', '\u2910', '\u00ae', '\u00ae', '\u0154', '\u0154', '\u27eb', '\u27eb', '\u21a0', '\u21a0', '\u2916', '\u2916', '\u0158', '\u0158', '\u0156', '\u0156', '\u0420', '\u0420', '\u211c', '\u211c', '\u220b', '\u220b', '\u21cb', '\u21cb', 8374 '\u296f', '\u296f', '\u211c', '\u211c', '\u03a1', '\u03a1', '\u27e9', '\u27e9', '\u2192', '\u2192', '\u21e5', '\u21e5', '\u21c4', '\u21c4', '\u2309', '\u2309', '\u27e7', '\u27e7', '\u295d', 8375 '\u295d', '\u21c2', '\u21c2', '\u2955', '\u2955', '\u230b', '\u230b', '\u22a2', '\u22a2', '\u21a6', '\u21a6', '\u295b', '\u295b', '\u22b3', '\u22b3', '\u29d0', '\u29d0', '\u22b5', 8376 '\u22b5', '\u294f', '\u294f', '\u295c', '\u295c', '\u21be', '\u21be', '\u2954', '\u2954', '\u21c0', '\u21c0', '\u2953', '\u2953', '\u21d2', '\u21d2', '\u211d', '\u211d', '\u2970', '\u2970', 8377 '\u21db', '\u21db', '\u211b', '\u211b', '\u21b1', '\u21b1', '\u29f4', '\u29f4', '\u0429', '\u0429', '\u0428', '\u0428', '\u042c', '\u042c', '\u015a', '\u015a', '\u2abc', '\u2abc', '\u0160', '\u0160', '\u015e', '\u015e', '\u015c', '\u015c', '\u0421', '\u0421', '\U0001d516', '\U0001d516', '\u2193', '\u2193', '\u2190', '\u2190', 8378 '\u2192', '\u2192', '\u2191', '\u2191', '\u03a3', '\u03a3', '\u2218', '\u2218', '\U0001d54a', '\U0001d54a', '\u221a', '\u221a', '\u25a1', '\u25a1', '\u2293', '\u2293', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', 8379 '\u2292', '\u2292', '\u2294', '\u2294', '\U0001d4ae', '\U0001d4ae', '\u22c6', '\u22c6', '\u22d0', '\u22d0', '\u22d0', '\u22d0', '\u2286', '\u2286', '\u227b', '\u227b', '\u2ab0', '\u2ab0', '\u227d', '\u227d', '\u227f', '\u227f', '\u220b', 8380 '\u220b', '\u2211', '\u2211', '\u22d1', '\u22d1', '\u2283', '\u2283', '\u2287', '\u2287', '\u22d1', '\u22d1', '\u00de', '\u00de', '\u2122', '\u2122', '\u040b', '\u040b', '\u0426', '\u0426', '\u0009', '\u0009', '\u03a4', '\u03a4', '\u0164', '\u0164', '\u0162', '\u0162', '\u0422', '\u0422', '\U0001d517', '\U0001d517', '\u2234', '\u2234', '\u0398', '\u0398', 8381 '\u2009', '\u2009', '\u223c', '\u223c', '\u2243', '\u2243', '\u2245', '\u2245', '\u2248', '\u2248', '\U0001d54b', '\U0001d54b', '\u20db', '\u20db', '\U0001d4af', '\U0001d4af', '\u0166', '\u0166', '\u00da', '\u00da', '\u219f', '\u219f', '\u2949', '\u2949', '\u040e', '\u040e', '\u016c', '\u016c', '\u00db', 8382 '\u00db', '\u0423', '\u0423', '\u0170', '\u0170', '\U0001d518', '\U0001d518', '\u00d9', '\u00d9', '\u016a', '\u016a', '\u005f', '\u005f', '\u23df', '\u23df', '\u23b5', '\u23b5', '\u23dd', '\u23dd', '\u22c3', '\u22c3', '\u228e', '\u228e', '\u0172', '\u0172', '\U0001d54c', '\U0001d54c', '\u2191', '\u2191', '\u2912', 8383 '\u2912', '\u21c5', '\u21c5', '\u2195', '\u2195', '\u296e', '\u296e', '\u22a5', '\u22a5', '\u21a5', '\u21a5', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2196', '\u2196', '\u2197', '\u2197', '\u03d2', '\u03d2', '\u03a5', '\u03a5', 8384 '\u016e', '\u016e', '\U0001d4b0', '\U0001d4b0', '\u0168', '\u0168', '\u00dc', '\u00dc', '\u22ab', '\u22ab', '\u2aeb', '\u2aeb', '\u0412', '\u0412', '\u22a9', '\u22a9', '\u2ae6', '\u2ae6', '\u22c1', '\u22c1', '\u2016', '\u2016', '\u2016', '\u2016', '\u2223', '\u2223', '\u007c', '\u007c', '\u2758', '\u2758', '\u2240', 8385 '\u2240', '\u200a', '\u200a', '\U0001d519', '\U0001d519', '\U0001d54d', '\U0001d54d', '\U0001d4b1', '\U0001d4b1', '\u22aa', '\u22aa', '\u0174', '\u0174', '\u22c0', '\u22c0', '\U0001d51a', '\U0001d51a', '\U0001d54e', '\U0001d54e', '\U0001d4b2', '\U0001d4b2', '\U0001d51b', '\U0001d51b', '\u039e', '\u039e', '\U0001d54f', '\U0001d54f', '\U0001d4b3', '\U0001d4b3', '\u042f', '\u042f', '\u0407', '\u0407', '\u042e', '\u042e', '\u00dd', '\u00dd', 8386 '\u0176', '\u0176', '\u042b', '\u042b', '\U0001d51c', '\U0001d51c', '\U0001d550', '\U0001d550', '\U0001d4b4', '\U0001d4b4', '\u0178', '\u0178', '\u0416', '\u0416', '\u0179', '\u0179', '\u017d', '\u017d', '\u0417', '\u0417', '\u017b', '\u017b', '\u200b', '\u200b', '\u0396', '\u0396', '\u2128', '\u2128', '\u2124', '\u2124', '\U0001d4b5', '\U0001d4b5', '\u00e1', '\u00e1', '\u0103', '\u0103', '\u223e', 8387 '\u223e', '\u223f', '\u223f', '\u00e2', '\u00e2', '\u00b4', '\u00b4', '\u0430', '\u0430', '\u00e6', '\u00e6', '\u2061', '\u2061', '\U0001d51e', '\U0001d51e', '\u00e0', '\u00e0', '\u2135', '\u2135', '\u2135', '\u2135', '\u03b1', '\u03b1', '\u0101', '\u0101', '\u2a3f', '\u2a3f', '\u2227', '\u2227', '\u2a55', '\u2a55', '\u2a5c', '\u2a5c', '\u2a58', '\u2a58', '\u2a5a', '\u2a5a', '\u2220', 8388 '\u2220', '\u29a4', '\u29a4', '\u2220', '\u2220', '\u2221', '\u2221', '\u29a8', '\u29a8', '\u29a9', '\u29a9', '\u29aa', '\u29aa', '\u29ab', '\u29ab', '\u29ac', '\u29ac', '\u29ad', '\u29ad', '\u29ae', '\u29ae', '\u29af', '\u29af', '\u221f', '\u221f', '\u22be', '\u22be', '\u299d', '\u299d', '\u2222', 8389 '\u2222', '\u00c5', '\u00c5', '\u237c', '\u237c', '\u0105', '\u0105', '\U0001d552', '\U0001d552', '\u2248', '\u2248', '\u2a70', '\u2a70', '\u2a6f', '\u2a6f', '\u224a', '\u224a', '\u224b', '\u224b', '\u2248', '\u2248', '\u224a', '\u224a', '\u00e5', '\u00e5', '\U0001d4b6', '\U0001d4b6', '\u002a', '\u002a', '\u2248', '\u2248', '\u224d', '\u224d', '\u00e3', '\u00e3', '\u00e4', 8390 '\u00e4', '\u2233', '\u2233', '\u2a11', '\u2a11', '\u2aed', '\u2aed', '\u224c', '\u224c', '\u03f6', '\u03f6', '\u2035', '\u2035', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u22bd', '\u22bd', '\u2305', '\u2305', '\u2305', '\u2305', '\u23b5', '\u23b5', '\u23b6', '\u23b6', '\u224c', '\u224c', '\u0431', 8391 '\u0431', '\u201e', '\u201e', '\u2235', '\u2235', '\u2235', '\u2235', '\u29b0', '\u29b0', '\u03f6', '\u03f6', '\u212c', '\u212c', '\u03b2', '\u03b2', '\u2136', '\u2136', '\u226c', '\u226c', '\U0001d51f', '\U0001d51f', '\u22c2', '\u22c2', '\u25ef', '\u25ef', '\u22c3', '\u22c3', '\u2a00', '\u2a00', '\u2a01', '\u2a01', '\u2a02', '\u2a02', 8392 '\u2a06', '\u2a06', '\u2605', '\u2605', '\u25bd', '\u25bd', '\u25b3', '\u25b3', '\u2a04', '\u2a04', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u290d', '\u290d', '\u29eb', '\u29eb', '\u25aa', '\u25aa', '\u25b4', '\u25b4', '\u25be', 8393 '\u25be', '\u25c2', '\u25c2', '\u25b8', '\u25b8', '\u2423', '\u2423', '\u2592', '\u2592', '\u2591', '\u2591', '\u2593', '\u2593', '\u2588', '\u2588', '\u2310', '\u2310', '\U0001d553', '\U0001d553', '\u22a5', '\u22a5', '\u22a5', '\u22a5', '\u22c8', '\u22c8', '\u2557', '\u2557', '\u2554', '\u2554', '\u2556', 8394 '\u2556', '\u2553', '\u2553', '\u2550', '\u2550', '\u2566', '\u2566', '\u2569', '\u2569', '\u2564', '\u2564', '\u2567', '\u2567', '\u255d', '\u255d', '\u255a', '\u255a', '\u255c', '\u255c', '\u2559', '\u2559', '\u2551', '\u2551', '\u256c', '\u256c', '\u2563', '\u2563', '\u2560', '\u2560', '\u256b', '\u256b', '\u2562', '\u2562', '\u255f', '\u255f', '\u29c9', 8395 '\u29c9', '\u2555', '\u2555', '\u2552', '\u2552', '\u2510', '\u2510', '\u250c', '\u250c', '\u2500', '\u2500', '\u2565', '\u2565', '\u2568', '\u2568', '\u252c', '\u252c', '\u2534', '\u2534', '\u229f', '\u229f', '\u229e', '\u229e', '\u22a0', '\u22a0', '\u255b', '\u255b', '\u2558', '\u2558', '\u2518', '\u2518', '\u2514', '\u2514', '\u2502', 8396 '\u2502', '\u256a', '\u256a', '\u2561', '\u2561', '\u255e', '\u255e', '\u253c', '\u253c', '\u2524', '\u2524', '\u251c', '\u251c', '\u2035', '\u2035', '\u02d8', '\u02d8', '\u00a6', '\u00a6', '\U0001d4b7', '\U0001d4b7', '\u204f', '\u204f', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u005c', '\u005c', '\u29c5', '\u29c5', '\u27c8', '\u27c8', '\u2022', '\u2022', '\u2022', 8397 '\u2022', '\u224e', '\u224e', '\u2aae', '\u2aae', '\u224f', '\u224f', '\u224f', '\u224f', '\u0107', '\u0107', '\u2229', '\u2229', '\u2a44', '\u2a44', '\u2a49', '\u2a49', '\u2a4b', '\u2a4b', '\u2a47', '\u2a47', '\u2a40', '\u2a40', '\u2041', '\u2041', '\u02c7', '\u02c7', '\u2a4d', '\u2a4d', '\u010d', '\u010d', '\u00e7', '\u00e7', '\u0109', 8398 '\u0109', '\u2a4c', '\u2a4c', '\u2a50', '\u2a50', '\u010b', '\u010b', '\u00b8', '\u00b8', '\u29b2', '\u29b2', '\u00a2', '\u00a2', '\u00b7', '\u00b7', '\U0001d520', '\U0001d520', '\u0447', '\u0447', '\u2713', '\u2713', '\u2713', '\u2713', '\u03c7', '\u03c7', '\u25cb', '\u25cb', '\u29c3', '\u29c3', '\u02c6', '\u02c6', '\u2257', '\u2257', '\u21ba', 8399 '\u21ba', '\u21bb', '\u21bb', '\u00ae', '\u00ae', '\u24c8', '\u24c8', '\u229b', '\u229b', '\u229a', '\u229a', '\u229d', '\u229d', '\u2257', '\u2257', '\u2a10', '\u2a10', '\u2aef', '\u2aef', '\u29c2', '\u29c2', '\u2663', '\u2663', '\u2663', '\u2663', '\u003a', 8400 '\u003a', '\u2254', '\u2254', '\u2254', '\u2254', '\u002c', '\u002c', '\u0040', '\u0040', '\u2201', '\u2201', '\u2218', '\u2218', '\u2201', '\u2201', '\u2102', '\u2102', '\u2245', '\u2245', '\u2a6d', '\u2a6d', '\u222e', '\u222e', '\U0001d554', '\U0001d554', '\u2210', '\u2210', '\u00a9', '\u00a9', '\u2117', '\u2117', '\u21b5', '\u21b5', 8401 '\u2717', '\u2717', '\U0001d4b8', '\U0001d4b8', '\u2acf', '\u2acf', '\u2ad1', '\u2ad1', '\u2ad0', '\u2ad0', '\u2ad2', '\u2ad2', '\u22ef', '\u22ef', '\u2938', '\u2938', '\u2935', '\u2935', '\u22de', '\u22de', '\u22df', '\u22df', '\u21b6', '\u21b6', '\u293d', '\u293d', '\u222a', '\u222a', '\u2a48', '\u2a48', '\u2a46', '\u2a46', '\u2a4a', '\u2a4a', 8402 '\u228d', '\u228d', '\u2a45', '\u2a45', '\u21b7', '\u21b7', '\u293c', '\u293c', '\u22de', '\u22de', '\u22df', '\u22df', '\u22ce', '\u22ce', '\u22cf', '\u22cf', '\u00a4', '\u00a4', '\u21b6', '\u21b6', '\u21b7', '\u21b7', '\u22ce', '\u22ce', '\u22cf', '\u22cf', 8403 '\u2232', '\u2232', '\u2231', '\u2231', '\u232d', '\u232d', '\u21d3', '\u21d3', '\u2965', '\u2965', '\u2020', '\u2020', '\u2138', '\u2138', '\u2193', '\u2193', '\u2010', '\u2010', '\u22a3', '\u22a3', '\u290f', '\u290f', '\u02dd', '\u02dd', '\u010f', '\u010f', '\u0434', '\u0434', '\u2146', '\u2146', '\u2021', '\u2021', '\u21ca', '\u21ca', '\u2a77', 8404 '\u2a77', '\u00b0', '\u00b0', '\u03b4', '\u03b4', '\u29b1', '\u29b1', '\u297f', '\u297f', '\U0001d521', '\U0001d521', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u22c4', '\u22c4', '\u22c4', '\u22c4', '\u2666', '\u2666', '\u2666', '\u2666', '\u00a8', '\u00a8', '\u03dd', '\u03dd', '\u22f2', '\u22f2', '\u00f7', '\u00f7', '\u00f7', '\u00f7', '\u22c7', 8405 '\u22c7', '\u22c7', '\u22c7', '\u0452', '\u0452', '\u231e', '\u231e', '\u230d', '\u230d', '\u0024', '\u0024', '\U0001d555', '\U0001d555', '\u02d9', '\u02d9', '\u2250', '\u2250', '\u2251', '\u2251', '\u2238', '\u2238', '\u2214', '\u2214', '\u22a1', '\u22a1', '\u2306', '\u2306', '\u2193', '\u2193', '\u21ca', 8406 '\u21ca', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u2910', '\u2910', '\u231f', '\u231f', '\u230c', '\u230c', '\U0001d4b9', '\U0001d4b9', '\u0455', '\u0455', '\u29f6', '\u29f6', '\u0111', '\u0111', '\u22f1', '\u22f1', '\u25bf', '\u25bf', '\u25be', '\u25be', '\u21f5', '\u21f5', '\u296f', '\u296f', '\u29a6', 8407 '\u29a6', '\u045f', '\u045f', '\u27ff', '\u27ff', '\u2a77', '\u2a77', '\u2251', '\u2251', '\u00e9', '\u00e9', '\u2a6e', '\u2a6e', '\u011b', '\u011b', '\u2256', '\u2256', '\u00ea', '\u00ea', '\u2255', '\u2255', '\u044d', '\u044d', '\u0117', '\u0117', '\u2147', '\u2147', '\u2252', '\u2252', '\U0001d522', '\U0001d522', '\u2a9a', '\u2a9a', '\u00e8', '\u00e8', '\u2a96', '\u2a96', '\u2a98', 8408 '\u2a98', '\u2a99', '\u2a99', '\u23e7', '\u23e7', '\u2113', '\u2113', '\u2a95', '\u2a95', '\u2a97', '\u2a97', '\u0113', '\u0113', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2003', '\u2003', '\u2004', '\u2004', '\u2005', '\u2005', '\u014b', '\u014b', '\u2002', '\u2002', '\u0119', '\u0119', '\U0001d556', '\U0001d556', '\u22d5', '\u22d5', '\u29e3', 8409 '\u29e3', '\u2a71', '\u2a71', '\u03b5', '\u03b5', '\u03b5', '\u03b5', '\u03f5', '\u03f5', '\u2256', '\u2256', '\u2255', '\u2255', '\u2242', '\u2242', '\u2a96', '\u2a96', '\u2a95', '\u2a95', '\u003d', '\u003d', '\u225f', '\u225f', '\u2261', '\u2261', '\u2a78', '\u2a78', '\u29e5', '\u29e5', '\u2253', '\u2253', 8410 '\u2971', '\u2971', '\u212f', '\u212f', '\u2250', '\u2250', '\u2242', '\u2242', '\u03b7', '\u03b7', '\u00f0', '\u00f0', '\u00eb', '\u00eb', '\u20ac', '\u20ac', '\u0021', '\u0021', '\u2203', '\u2203', '\u2130', '\u2130', '\u2147', '\u2147', '\u2252', '\u2252', '\u0444', '\u0444', '\u2640', '\u2640', '\ufb03', '\ufb03', '\ufb00', 8411 '\ufb00', '\ufb04', '\ufb04', '\U0001d523', '\U0001d523', '\ufb01', '\ufb01', '\u266d', '\u266d', '\ufb02', '\ufb02', '\u25b1', '\u25b1', '\u0192', '\u0192', '\U0001d557', '\U0001d557', '\u2200', '\u2200', '\u22d4', '\u22d4', '\u2ad9', '\u2ad9', '\u2a0d', '\u2a0d', '\u00bd', '\u00bd', '\u2153', '\u2153', '\u00bc', '\u00bc', '\u2155', '\u2155', '\u2159', '\u2159', 8412 '\u215b', '\u215b', '\u2154', '\u2154', '\u2156', '\u2156', '\u00be', '\u00be', '\u2157', '\u2157', '\u215c', '\u215c', '\u2158', '\u2158', '\u215a', '\u215a', '\u215d', '\u215d', '\u215e', '\u215e', '\u2044', '\u2044', '\u2322', '\u2322', '\U0001d4bb', '\U0001d4bb', '\u2267', '\u2267', '\u2a8c', '\u2a8c', '\u01f5', '\u01f5', '\u03b3', '\u03b3', '\u03dd', 8413 '\u03dd', '\u2a86', '\u2a86', '\u011f', '\u011f', '\u011d', '\u011d', '\u0433', '\u0433', '\u0121', '\u0121', '\u2265', '\u2265', '\u22db', '\u22db', '\u2265', '\u2265', '\u2267', '\u2267', '\u2a7e', '\u2a7e', '\u2a7e', '\u2a7e', '\u2aa9', '\u2aa9', '\u2a80', '\u2a80', '\u2a82', '\u2a82', '\u2a84', '\u2a84', '\u2a94', '\u2a94', '\U0001d524', '\U0001d524', '\u226b', '\u226b', '\u22d9', 8414 '\u22d9', '\u2137', '\u2137', '\u0453', '\u0453', '\u2277', '\u2277', '\u2a92', '\u2a92', '\u2aa5', '\u2aa5', '\u2aa4', '\u2aa4', '\u2269', '\u2269', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a88', '\u2a88', '\u2a88', '\u2a88', '\u2269', '\u2269', '\u22e7', '\u22e7', '\U0001d558', '\U0001d558', '\u0060', '\u0060', '\u210a', '\u210a', '\u2273', '\u2273', '\u2a8e', '\u2a8e', '\u2a90', '\u2a90', '\u2aa7', 8415 '\u2aa7', '\u2a7a', '\u2a7a', '\u22d7', '\u22d7', '\u2995', '\u2995', '\u2a7c', '\u2a7c', '\u2a86', '\u2a86', '\u2978', '\u2978', '\u22d7', '\u22d7', '\u22db', '\u22db', '\u2a8c', '\u2a8c', '\u2277', '\u2277', '\u2273', '\u2273', '\u21d4', '\u21d4', '\u200a', '\u200a', '\u00bd', '\u00bd', '\u210b', '\u210b', 8416 '\u044a', '\u044a', '\u2194', '\u2194', '\u2948', '\u2948', '\u21ad', '\u21ad', '\u210f', '\u210f', '\u0125', '\u0125', '\u2665', '\u2665', '\u2665', '\u2665', '\u2026', '\u2026', '\u22b9', '\u22b9', '\U0001d525', '\U0001d525', '\u2925', '\u2925', '\u2926', '\u2926', '\u21ff', '\u21ff', '\u223b', '\u223b', '\u21a9', '\u21a9', 8417 '\u21aa', '\u21aa', '\U0001d559', '\U0001d559', '\u2015', '\u2015', '\U0001d4bd', '\U0001d4bd', '\u210f', '\u210f', '\u0127', '\u0127', '\u2043', '\u2043', '\u2010', '\u2010', '\u00ed', '\u00ed', '\u2063', '\u2063', '\u00ee', '\u00ee', '\u0438', '\u0438', '\u0435', '\u0435', '\u00a1', '\u00a1', '\u21d4', '\u21d4', '\U0001d526', '\U0001d526', '\u00ec', '\u00ec', '\u2148', 8418 '\u2148', '\u2a0c', '\u2a0c', '\u222d', '\u222d', '\u29dc', '\u29dc', '\u2129', '\u2129', '\u0133', '\u0133', '\u012b', '\u012b', '\u2111', '\u2111', '\u2110', '\u2110', '\u2111', '\u2111', '\u0131', '\u0131', '\u22b7', '\u22b7', '\u01b5', '\u01b5', '\u2208', '\u2208', '\u2105', '\u2105', '\u221e', '\u221e', '\u29dd', '\u29dd', '\u0131', 8419 '\u0131', '\u222b', '\u222b', '\u22ba', '\u22ba', '\u2124', '\u2124', '\u22ba', '\u22ba', '\u2a17', '\u2a17', '\u2a3c', '\u2a3c', '\u0451', '\u0451', '\u012f', '\u012f', '\U0001d55a', '\U0001d55a', '\u03b9', '\u03b9', '\u2a3c', '\u2a3c', '\u00bf', '\u00bf', '\U0001d4be', '\U0001d4be', '\u2208', '\u2208', '\u22f9', '\u22f9', '\u22f5', '\u22f5', '\u22f4', 8420 '\u22f4', '\u22f3', '\u22f3', '\u2208', '\u2208', '\u2062', '\u2062', '\u0129', '\u0129', '\u0456', '\u0456', '\u00ef', '\u00ef', '\u0135', '\u0135', '\u0439', '\u0439', '\U0001d527', '\U0001d527', '\u0237', '\u0237', '\U0001d55b', '\U0001d55b', '\U0001d4bf', '\U0001d4bf', '\u0458', '\u0458', '\u0454', '\u0454', '\u03ba', '\u03ba', '\u03f0', '\u03f0', '\u0137', '\u0137', '\u043a', '\u043a', '\U0001d528', 8421 '\U0001d528', '\u0138', '\u0138', '\u0445', '\u0445', '\u045c', '\u045c', '\U0001d55c', '\U0001d55c', '\U0001d4c0', '\U0001d4c0', '\u21da', '\u21da', '\u21d0', '\u21d0', '\u291b', '\u291b', '\u290e', '\u290e', '\u2266', '\u2266', '\u2a8b', '\u2a8b', '\u2962', '\u2962', '\u013a', '\u013a', '\u29b4', '\u29b4', '\u2112', '\u2112', '\u03bb', '\u03bb', '\u27e8', '\u27e8', '\u2991', '\u2991', 8422 '\u27e8', '\u27e8', '\u2a85', '\u2a85', '\u00ab', '\u00ab', '\u2190', '\u2190', '\u21e4', '\u21e4', '\u291f', '\u291f', '\u291d', '\u291d', '\u21a9', '\u21a9', '\u21ab', '\u21ab', '\u2939', '\u2939', '\u2973', '\u2973', '\u21a2', '\u21a2', '\u2aab', '\u2aab', '\u2919', '\u2919', '\u2aad', '\u2aad', '\u290c', '\u290c', '\u2772', '\u2772', '\u007b', 8423 '\u007b', '\u005b', '\u005b', '\u298b', '\u298b', '\u298f', '\u298f', '\u298d', '\u298d', '\u013e', '\u013e', '\u013c', '\u013c', '\u2308', '\u2308', '\u007b', '\u007b', '\u043b', '\u043b', '\u2936', '\u2936', '\u201c', '\u201c', '\u201e', '\u201e', '\u2967', '\u2967', '\u294b', '\u294b', '\u21b2', '\u21b2', '\u2264', '\u2264', '\u2190', 8424 '\u2190', '\u21a2', '\u21a2', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u21c7', '\u21c7', '\u2194', '\u2194', '\u21c6', '\u21c6', '\u21cb', '\u21cb', '\u21ad', '\u21ad', '\u22cb', 8425 '\u22cb', '\u22da', '\u22da', '\u2264', '\u2264', '\u2266', '\u2266', '\u2a7d', '\u2a7d', '\u2a7d', '\u2a7d', '\u2aa8', '\u2aa8', '\u2a7f', '\u2a7f', '\u2a81', '\u2a81', '\u2a83', '\u2a83', '\u2a93', '\u2a93', '\u2a85', '\u2a85', '\u22d6', '\u22d6', '\u22da', '\u22da', '\u2a8b', '\u2a8b', '\u2276', '\u2276', 8426 '\u2272', '\u2272', '\u297c', '\u297c', '\u230a', '\u230a', '\U0001d529', '\U0001d529', '\u2276', '\u2276', '\u2a91', '\u2a91', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u296a', '\u296a', '\u2584', '\u2584', '\u0459', '\u0459', '\u226a', '\u226a', '\u21c7', '\u21c7', '\u231e', '\u231e', '\u296b', '\u296b', '\u25fa', '\u25fa', '\u0140', '\u0140', '\u23b0', '\u23b0', 8427 '\u23b0', '\u23b0', '\u2268', '\u2268', '\u2a89', '\u2a89', '\u2a89', '\u2a89', '\u2a87', '\u2a87', '\u2a87', '\u2a87', '\u2268', '\u2268', '\u22e6', '\u22e6', '\u27ec', '\u27ec', '\u21fd', '\u21fd', '\u27e6', '\u27e6', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27fc', '\u27fc', '\u27f6', 8428 '\u27f6', '\u21ab', '\u21ab', '\u21ac', '\u21ac', '\u2985', '\u2985', '\U0001d55d', '\U0001d55d', '\u2a2d', '\u2a2d', '\u2a34', '\u2a34', '\u2217', '\u2217', '\u005f', '\u005f', '\u25ca', '\u25ca', '\u25ca', '\u25ca', '\u29eb', '\u29eb', '\u0028', '\u0028', '\u2993', '\u2993', '\u21c6', '\u21c6', '\u231f', 8429 '\u231f', '\u21cb', '\u21cb', '\u296d', '\u296d', '\u200e', '\u200e', '\u22bf', '\u22bf', '\u2039', '\u2039', '\U0001d4c1', '\U0001d4c1', '\u21b0', '\u21b0', '\u2272', '\u2272', '\u2a8d', '\u2a8d', '\u2a8f', '\u2a8f', '\u005b', '\u005b', '\u2018', '\u2018', '\u201a', '\u201a', '\u0142', '\u0142', '\u2aa6', '\u2aa6', '\u2a79', '\u2a79', '\u22d6', '\u22d6', '\u22cb', 8430 '\u22cb', '\u22c9', '\u22c9', '\u2976', '\u2976', '\u2a7b', '\u2a7b', '\u2996', '\u2996', '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u25c2', '\u25c2', '\u294a', '\u294a', '\u2966', '\u2966', '\u223a', '\u223a', '\u00af', '\u00af', '\u2642', '\u2642', '\u2720', '\u2720', '\u2720', '\u2720', '\u21a6', '\u21a6', '\u21a6', '\u21a6', '\u21a7', 8431 '\u21a7', '\u21a4', '\u21a4', '\u21a5', '\u21a5', '\u25ae', '\u25ae', '\u2a29', '\u2a29', '\u043c', '\u043c', '\u2014', '\u2014', '\u2221', '\u2221', '\U0001d52a', '\U0001d52a', '\u2127', '\u2127', '\u00b5', '\u00b5', '\u2223', '\u2223', '\u002a', '\u002a', '\u2af0', '\u2af0', '\u00b7', '\u00b7', '\u2212', '\u2212', '\u229f', 8432 '\u229f', '\u2238', '\u2238', '\u2a2a', '\u2a2a', '\u2adb', '\u2adb', '\u2026', '\u2026', '\u2213', '\u2213', '\u22a7', '\u22a7', '\U0001d55e', '\U0001d55e', '\u2213', '\u2213', '\U0001d4c2', '\U0001d4c2', '\u223e', '\u223e', '\u03bc', '\u03bc', '\u22b8', '\u22b8', '\u22b8', '\u22b8', '\u21cd', '\u21cd', '\u21ce', '\u21ce', '\u21cf', 8433 '\u21cf', '\u22af', '\u22af', '\u22ae', '\u22ae', '\u2207', '\u2207', '\u0144', '\u0144', '\u2249', '\u2249', '\u0149', '\u0149', '\u2249', '\u2249', '\u266e', '\u266e', '\u266e', '\u266e', '\u2115', '\u2115', '\u00a0', '\u00a0', '\u2a43', '\u2a43', '\u0148', '\u0148', '\u0146', '\u0146', '\u2247', '\u2247', '\u2a42', '\u2a42', '\u043d', 8434 '\u043d', '\u2013', '\u2013', '\u2260', '\u2260', '\u21d7', '\u21d7', '\u2924', '\u2924', '\u2197', '\u2197', '\u2197', '\u2197', '\u2262', '\u2262', '\u2928', '\u2928', '\u2204', '\u2204', '\u2204', '\u2204', '\U0001d52b', '\U0001d52b', '\u2271', '\u2271', '\u2271', '\u2271', '\u2275', '\u2275', '\u226f', '\u226f', '\u226f', '\u226f', '\u21ce', '\u21ce', '\u21ae', '\u21ae', 8435 '\u2af2', '\u2af2', '\u220b', '\u220b', '\u22fc', '\u22fc', '\u22fa', '\u22fa', '\u220b', '\u220b', '\u045a', '\u045a', '\u21cd', '\u21cd', '\u219a', '\u219a', '\u2025', '\u2025', '\u2270', '\u2270', '\u219a', '\u219a', '\u21ae', '\u21ae', '\u2270', '\u2270', '\u226e', '\u226e', '\u2274', '\u2274', '\u226e', '\u226e', '\u22ea', '\u22ea', '\u22ec', '\u22ec', 8436 '\u2224', '\u2224', '\U0001d55f', '\U0001d55f', '\u00ac', '\u00ac', '\u2209', '\u2209', '\u2209', '\u2209', '\u22f7', '\u22f7', '\u22f6', '\u22f6', '\u220c', '\u220c', '\u220c', '\u220c', '\u22fe', '\u22fe', '\u22fd', '\u22fd', '\u2226', '\u2226', '\u2226', '\u2226', '\u2a14', '\u2a14', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u2280', 8437 '\u2280', '\u21cf', '\u21cf', '\u219b', '\u219b', '\u219b', '\u219b', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u2281', '\u2281', '\u22e1', '\u22e1', '\U0001d4c3', '\U0001d4c3', '\u2224', '\u2224', '\u2226', '\u2226', '\u2241', '\u2241', '\u2244', '\u2244', '\u2244', '\u2244', '\u2224', '\u2224', '\u2226', '\u2226', '\u22e2', 8438 '\u22e2', '\u22e3', '\u22e3', '\u2284', '\u2284', '\u2288', '\u2288', '\u2288', '\u2288', '\u2281', '\u2281', '\u2285', '\u2285', '\u2289', '\u2289', '\u2289', '\u2289', '\u2279', '\u2279', '\u00f1', '\u00f1', '\u2278', '\u2278', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u22eb', '\u22eb', 8439 '\u22ed', '\u22ed', '\u03bd', '\u03bd', '\u0023', '\u0023', '\u2116', '\u2116', '\u2007', '\u2007', '\u22ad', '\u22ad', '\u2904', '\u2904', '\u22ac', '\u22ac', '\u29de', '\u29de', '\u2902', '\u2902', '\u2903', '\u2903', '\u21d6', '\u21d6', '\u2923', '\u2923', '\u2196', '\u2196', '\u2196', '\u2196', '\u2927', '\u2927', 8440 '\u24c8', '\u24c8', '\u00f3', '\u00f3', '\u229b', '\u229b', '\u229a', '\u229a', '\u00f4', '\u00f4', '\u043e', '\u043e', '\u229d', '\u229d', '\u0151', '\u0151', '\u2a38', '\u2a38', '\u2299', '\u2299', '\u29bc', '\u29bc', '\u0153', '\u0153', '\u29bf', '\u29bf', '\U0001d52c', '\U0001d52c', '\u02db', '\u02db', '\u00f2', '\u00f2', '\u29c1', '\u29c1', '\u29b5', '\u29b5', '\u03a9', '\u03a9', '\u222e', 8441 '\u222e', '\u21ba', '\u21ba', '\u29be', '\u29be', '\u29bb', '\u29bb', '\u203e', '\u203e', '\u29c0', '\u29c0', '\u014d', '\u014d', '\u03c9', '\u03c9', '\u03bf', '\u03bf', '\u29b6', '\u29b6', '\u2296', '\u2296', '\U0001d560', '\U0001d560', '\u29b7', '\u29b7', '\u29b9', '\u29b9', '\u2295', '\u2295', '\u2228', '\u2228', '\u21bb', '\u21bb', '\u2a5d', '\u2a5d', '\u2134', '\u2134', 8442 '\u2134', '\u2134', '\u00aa', '\u00aa', '\u00ba', '\u00ba', '\u22b6', '\u22b6', '\u2a56', '\u2a56', '\u2a57', '\u2a57', '\u2a5b', '\u2a5b', '\u2134', '\u2134', '\u00f8', '\u00f8', '\u2298', '\u2298', '\u00f5', '\u00f5', '\u2297', '\u2297', '\u2a36', '\u2a36', '\u00f6', '\u00f6', '\u233d', '\u233d', '\u2225', '\u2225', '\u00b6', '\u00b6', '\u2225', '\u2225', 8443 '\u2af3', '\u2af3', '\u2afd', '\u2afd', '\u2202', '\u2202', '\u043f', '\u043f', '\u0025', '\u0025', '\u002e', '\u002e', '\u2030', '\u2030', '\u22a5', '\u22a5', '\u2031', '\u2031', '\U0001d52d', '\U0001d52d', '\u03c6', '\u03c6', '\u03d5', '\u03d5', '\u2133', '\u2133', '\u260e', '\u260e', '\u03c0', '\u03c0', '\u22d4', '\u22d4', '\u03d6', '\u03d6', '\u210f', '\u210f', 8444 '\u210e', '\u210e', '\u210f', '\u210f', '\u002b', '\u002b', '\u2a23', '\u2a23', '\u229e', '\u229e', '\u2a22', '\u2a22', '\u2214', '\u2214', '\u2a25', '\u2a25', '\u2a72', '\u2a72', '\u00b1', '\u00b1', '\u2a26', '\u2a26', '\u2a27', '\u2a27', '\u00b1', '\u00b1', '\u2a15', '\u2a15', '\U0001d561', '\U0001d561', '\u00a3', '\u00a3', '\u227a', 8445 '\u227a', '\u2ab3', '\u2ab3', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u227a', '\u227a', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u2ab9', '\u2ab9', '\u2ab5', '\u2ab5', '\u22e8', '\u22e8', '\u227e', '\u227e', '\u2032', '\u2032', '\u2119', '\u2119', '\u2ab5', '\u2ab5', '\u2ab9', 8446 '\u2ab9', '\u22e8', '\u22e8', '\u220f', '\u220f', '\u232e', '\u232e', '\u2312', '\u2312', '\u2313', '\u2313', '\u221d', '\u221d', '\u221d', '\u221d', '\u227e', '\u227e', '\u22b0', '\u22b0', '\U0001d4c5', '\U0001d4c5', '\u03c8', '\u03c8', '\u2008', '\u2008', '\U0001d52e', '\U0001d52e', '\u2a0c', '\u2a0c', '\U0001d562', '\U0001d562', '\u2057', '\u2057', '\U0001d4c6', '\U0001d4c6', 8447 '\u210d', '\u210d', '\u2a16', '\u2a16', '\u003f', '\u003f', '\u225f', '\u225f', '\u21db', '\u21db', '\u21d2', '\u21d2', '\u291c', '\u291c', '\u290f', '\u290f', '\u2964', '\u2964', '\u0155', '\u0155', '\u221a', '\u221a', '\u29b3', '\u29b3', '\u27e9', '\u27e9', '\u2992', '\u2992', '\u29a5', '\u29a5', '\u27e9', '\u27e9', '\u00bb', 8448 '\u00bb', '\u2192', '\u2192', '\u2975', '\u2975', '\u21e5', '\u21e5', '\u2920', '\u2920', '\u2933', '\u2933', '\u291e', '\u291e', '\u21aa', '\u21aa', '\u21ac', '\u21ac', '\u2945', '\u2945', '\u2974', '\u2974', '\u21a3', '\u21a3', '\u219d', '\u219d', '\u291a', '\u291a', '\u2236', '\u2236', '\u211a', '\u211a', '\u290d', '\u290d', 8449 '\u2773', '\u2773', '\u007d', '\u007d', '\u005d', '\u005d', '\u298c', '\u298c', '\u298e', '\u298e', '\u2990', '\u2990', '\u0159', '\u0159', '\u0157', '\u0157', '\u2309', '\u2309', '\u007d', '\u007d', '\u0440', '\u0440', '\u2937', '\u2937', '\u2969', '\u2969', '\u201d', '\u201d', '\u201d', '\u201d', '\u21b3', '\u21b3', '\u211c', '\u211c', '\u211b', 8450 '\u211b', '\u211c', '\u211c', '\u211d', '\u211d', '\u25ad', '\u25ad', '\u00ae', '\u00ae', '\u297d', '\u297d', '\u230b', '\u230b', '\U0001d52f', '\U0001d52f', '\u21c1', '\u21c1', '\u21c0', '\u21c0', '\u296c', '\u296c', '\u03c1', '\u03c1', '\u03f1', '\u03f1', '\u2192', '\u2192', '\u21a3', '\u21a3', '\u21c1', '\u21c1', 8451 '\u21c0', '\u21c0', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u21c9', '\u21c9', '\u219d', '\u219d', '\u22cc', '\u22cc', '\u02da', '\u02da', '\u2253', '\u2253', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u200f', 8452 '\u200f', '\u23b1', '\u23b1', '\u23b1', '\u23b1', '\u2aee', '\u2aee', '\u27ed', '\u27ed', '\u21fe', '\u21fe', '\u27e7', '\u27e7', '\u2986', '\u2986', '\U0001d563', '\U0001d563', '\u2a2e', '\u2a2e', '\u2a35', '\u2a35', '\u0029', '\u0029', '\u2994', '\u2994', '\u2a12', '\u2a12', '\u21c9', '\u21c9', '\u203a', '\u203a', '\U0001d4c7', '\U0001d4c7', '\u21b1', 8453 '\u21b1', '\u005d', '\u005d', '\u2019', '\u2019', '\u2019', '\u2019', '\u22cc', '\u22cc', '\u22ca', '\u22ca', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25b8', '\u25b8', '\u29ce', '\u29ce', '\u2968', '\u2968', '\u211e', '\u211e', '\u015b', '\u015b', '\u201a', '\u201a', '\u227b', '\u227b', '\u2ab4', '\u2ab4', '\u2ab8', '\u2ab8', '\u0161', '\u0161', '\u227d', 8454 '\u227d', '\u2ab0', '\u2ab0', '\u015f', '\u015f', '\u015d', '\u015d', '\u2ab6', '\u2ab6', '\u2aba', '\u2aba', '\u22e9', '\u22e9', '\u2a13', '\u2a13', '\u227f', '\u227f', '\u0441', '\u0441', '\u22c5', '\u22c5', '\u22a1', '\u22a1', '\u2a66', '\u2a66', '\u21d8', '\u21d8', '\u2925', '\u2925', '\u2198', '\u2198', '\u2198', '\u2198', '\u00a7', '\u00a7', '\u003b', 8455 '\u003b', '\u2929', '\u2929', '\u2216', '\u2216', '\u2216', '\u2216', '\u2736', '\u2736', '\U0001d530', '\U0001d530', '\u2322', '\u2322', '\u266f', '\u266f', '\u0449', '\u0449', '\u0448', '\u0448', '\u2223', '\u2223', '\u2225', '\u2225', '\u00ad', '\u00ad', '\u03c3', '\u03c3', '\u03c2', '\u03c2', '\u03c2', '\u03c2', '\u223c', '\u223c', '\u2a6a', 8456 '\u2a6a', '\u2243', '\u2243', '\u2243', '\u2243', '\u2a9e', '\u2a9e', '\u2aa0', '\u2aa0', '\u2a9d', '\u2a9d', '\u2a9f', '\u2a9f', '\u2246', '\u2246', '\u2a24', '\u2a24', '\u2972', '\u2972', '\u2190', '\u2190', '\u2216', '\u2216', '\u2a33', '\u2a33', '\u29e4', '\u29e4', '\u2223', '\u2223', '\u2323', '\u2323', '\u2aaa', '\u2aaa', '\u2aac', 8457 '\u2aac', '\u044c', '\u044c', '\u002f', '\u002f', '\u29c4', '\u29c4', '\u233f', '\u233f', '\U0001d564', '\U0001d564', '\u2660', '\u2660', '\u2660', '\u2660', '\u2225', '\u2225', '\u2293', '\u2293', '\u2294', '\u2294', '\u228f', '\u228f', '\u2291', '\u2291', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', '\u2292', '\u2292', 8458 '\u2290', '\u2290', '\u2292', '\u2292', '\u25a1', '\u25a1', '\u25a1', '\u25a1', '\u25aa', '\u25aa', '\u25aa', '\u25aa', '\u2192', '\u2192', '\U0001d4c8', '\U0001d4c8', '\u2216', '\u2216', '\u2323', '\u2323', '\u22c6', '\u22c6', '\u2606', '\u2606', '\u2605', '\u2605', '\u03f5', '\u03f5', '\u03d5', '\u03d5', '\u00af', 8459 '\u00af', '\u2282', '\u2282', '\u2ac5', '\u2ac5', '\u2abd', '\u2abd', '\u2286', '\u2286', '\u2ac3', '\u2ac3', '\u2ac1', '\u2ac1', '\u2acb', '\u2acb', '\u228a', '\u228a', '\u2abf', '\u2abf', '\u2979', '\u2979', '\u2282', '\u2282', '\u2286', '\u2286', '\u2ac5', '\u2ac5', '\u228a', '\u228a', '\u2acb', '\u2acb', 8460 '\u2ac7', '\u2ac7', '\u2ad5', '\u2ad5', '\u2ad3', '\u2ad3', '\u227b', '\u227b', '\u2ab8', '\u2ab8', '\u227d', '\u227d', '\u2ab0', '\u2ab0', '\u2aba', '\u2aba', '\u2ab6', '\u2ab6', '\u22e9', '\u22e9', '\u227f', '\u227f', '\u2211', '\u2211', '\u266a', '\u266a', '\u2283', '\u2283', '\u00b9', '\u00b9', '\u00b2', 8461 '\u00b2', '\u00b3', '\u00b3', '\u2ac6', '\u2ac6', '\u2abe', '\u2abe', '\u2ad8', '\u2ad8', '\u2287', '\u2287', '\u2ac4', '\u2ac4', '\u27c9', '\u27c9', '\u2ad7', '\u2ad7', '\u297b', '\u297b', '\u2ac2', '\u2ac2', '\u2acc', '\u2acc', '\u228b', '\u228b', '\u2ac0', '\u2ac0', '\u2283', '\u2283', '\u2287', '\u2287', '\u2ac6', 8462 '\u2ac6', '\u228b', '\u228b', '\u2acc', '\u2acc', '\u2ac8', '\u2ac8', '\u2ad4', '\u2ad4', '\u2ad6', '\u2ad6', '\u21d9', '\u21d9', '\u2926', '\u2926', '\u2199', '\u2199', '\u2199', '\u2199', '\u292a', '\u292a', '\u00df', '\u00df', '\u2316', '\u2316', '\u03c4', '\u03c4', '\u23b4', '\u23b4', '\u0165', '\u0165', '\u0163', 8463 '\u0163', '\u0442', '\u0442', '\u20db', '\u20db', '\u2315', '\u2315', '\U0001d531', '\U0001d531', '\u2234', '\u2234', '\u2234', '\u2234', '\u03b8', '\u03b8', '\u03d1', '\u03d1', '\u03d1', '\u03d1', '\u2248', '\u2248', '\u223c', '\u223c', '\u2009', '\u2009', '\u2248', '\u2248', '\u223c', '\u223c', '\u00fe', '\u00fe', '\u02dc', 8464 '\u02dc', '\u00d7', '\u00d7', '\u22a0', '\u22a0', '\u2a31', '\u2a31', '\u2a30', '\u2a30', '\u222d', '\u222d', '\u2928', '\u2928', '\u22a4', '\u22a4', '\u2336', '\u2336', '\u2af1', '\u2af1', '\U0001d565', '\U0001d565', '\u2ada', '\u2ada', '\u2929', '\u2929', '\u2034', '\u2034', '\u2122', '\u2122', '\u25b5', '\u25b5', '\u25bf', '\u25bf', 8465 '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u225c', '\u225c', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25ec', '\u25ec', '\u225c', '\u225c', '\u2a3a', '\u2a3a', '\u2a39', '\u2a39', '\u29cd', '\u29cd', '\u2a3b', '\u2a3b', '\u23e2', '\u23e2', '\U0001d4c9', 8466 '\U0001d4c9', '\u0446', '\u0446', '\u045b', '\u045b', '\u0167', '\u0167', '\u226c', '\u226c', '\u219e', '\u219e', '\u21a0', '\u21a0', '\u21d1', '\u21d1', '\u2963', '\u2963', '\u00fa', '\u00fa', '\u2191', '\u2191', '\u045e', '\u045e', '\u016d', '\u016d', '\u00fb', '\u00fb', '\u0443', '\u0443', '\u21c5', '\u21c5', '\u0171', 8467 '\u0171', '\u296e', '\u296e', '\u297e', '\u297e', '\U0001d532', '\U0001d532', '\u00f9', '\u00f9', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u2580', '\u2580', '\u231c', '\u231c', '\u231c', '\u231c', '\u230f', '\u230f', '\u25f8', '\u25f8', '\u016b', '\u016b', '\u00a8', '\u00a8', '\u0173', '\u0173', '\U0001d566', '\U0001d566', '\u2191', '\u2191', '\u2195', 8468 '\u2195', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u228e', '\u228e', '\u03c5', '\u03c5', '\u03d2', '\u03d2', '\u03c5', '\u03c5', '\u21c8', '\u21c8', '\u231d', '\u231d', '\u231d', '\u231d', '\u230e', '\u230e', '\u016f', '\u016f', '\u25f9', '\u25f9', '\U0001d4ca', '\U0001d4ca', '\u22f0', '\u22f0', 8469 '\u0169', '\u0169', '\u25b5', '\u25b5', '\u25b4', '\u25b4', '\u21c8', '\u21c8', '\u00fc', '\u00fc', '\u29a7', '\u29a7', '\u21d5', '\u21d5', '\u2ae8', '\u2ae8', '\u2ae9', '\u2ae9', '\u22a8', '\u22a8', '\u299c', '\u299c', '\u03f5', '\u03f5', '\u03f0', '\u03f0', '\u2205', '\u2205', '\u03d5', '\u03d5', '\u03d6', '\u03d6', '\u221d', 8470 '\u221d', '\u2195', '\u2195', '\u03f1', '\u03f1', '\u03c2', '\u03c2', '\u03d1', '\u03d1', '\u22b2', '\u22b2', '\u22b3', '\u22b3', '\u0432', '\u0432', '\u22a2', '\u22a2', '\u2228', '\u2228', '\u22bb', '\u22bb', '\u225a', '\u225a', '\u22ee', '\u22ee', '\u007c', '\u007c', '\u007c', '\u007c', '\U0001d533', 8471 '\U0001d533', '\u22b2', '\u22b2', '\U0001d567', '\U0001d567', '\u221d', '\u221d', '\u22b3', '\u22b3', '\U0001d4cb', '\U0001d4cb', '\u299a', '\u299a', '\u0175', '\u0175', '\u2a5f', '\u2a5f', '\u2227', '\u2227', '\u2259', '\u2259', '\u2118', '\u2118', '\U0001d534', '\U0001d534', '\U0001d568', '\U0001d568', '\u2118', '\u2118', '\u2240', '\u2240', '\u2240', '\u2240', '\U0001d4cc', '\U0001d4cc', '\u22c2', '\u22c2', '\u25ef', 8472 '\u25ef', '\u22c3', '\u22c3', '\u25bd', '\u25bd', '\U0001d535', '\U0001d535', '\u27fa', '\u27fa', '\u27f7', '\u27f7', '\u03be', '\u03be', '\u27f8', '\u27f8', '\u27f5', '\u27f5', '\u27fc', '\u27fc', '\u22fb', '\u22fb', '\u2a00', '\u2a00', '\U0001d569', '\U0001d569', '\u2a01', '\u2a01', '\u2a02', '\u2a02', '\u27f9', '\u27f9', '\u27f6', '\u27f6', '\U0001d4cd', '\U0001d4cd', '\u2a06', '\u2a06', '\u2a04', 8473 '\u2a04', '\u25b3', '\u25b3', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u00fd', '\u00fd', '\u044f', '\u044f', '\u0177', '\u0177', '\u044b', '\u044b', '\u00a5', '\u00a5', '\U0001d536', '\U0001d536', '\u0457', '\u0457', '\U0001d56a', '\U0001d56a', '\U0001d4ce', '\U0001d4ce', '\u044e', '\u044e', '\u00ff', '\u00ff', '\u017a', '\u017a', '\u017e', '\u017e', '\u0437', '\u0437', '\u017c', '\u017c', '\u2128', 8474 '\u2128', '\u03b6', '\u03b6', '\U0001d537', '\U0001d537', '\u0436', '\u0436', '\u21dd', '\u21dd', '\U0001d56b', '\U0001d56b', '\U0001d4cf', '\U0001d4cf', '\u200d', '\u200d', '\u200c', '\u200c', ]; 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 // dom event support, if you want to use it 8499 8500 /// used for DOM events 8501 version(dom_with_events) 8502 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8503 8504 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8505 version(dom_with_events) 8506 class Event { 8507 this(string eventName, Element target) { 8508 this.eventName = eventName; 8509 this.srcElement = target; 8510 } 8511 8512 /// Prevents the default event handler (if there is one) from being called 8513 void preventDefault() { 8514 defaultPrevented = true; 8515 } 8516 8517 /// Stops the event propagation immediately. 8518 void stopPropagation() { 8519 propagationStopped = true; 8520 } 8521 8522 bool defaultPrevented; 8523 bool propagationStopped; 8524 string eventName; 8525 8526 Element srcElement; 8527 alias srcElement target; 8528 8529 Element relatedTarget; 8530 8531 int clientX; 8532 int clientY; 8533 8534 int button; 8535 8536 bool isBubbling; 8537 8538 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8539 void send() { 8540 if(srcElement is null) 8541 return; 8542 8543 auto e = srcElement; 8544 8545 if(eventName in e.bubblingEventHandlers) 8546 foreach(handler; e.bubblingEventHandlers[eventName]) 8547 handler(e, this); 8548 8549 if(!defaultPrevented) 8550 if(eventName in e.defaultEventHandlers) 8551 e.defaultEventHandlers[eventName](e, this); 8552 } 8553 8554 /// this dispatches the element using the capture -> target -> bubble process 8555 void dispatch() { 8556 if(srcElement is null) 8557 return; 8558 8559 // first capture, then bubble 8560 8561 Element[] chain; 8562 Element curr = srcElement; 8563 while(curr) { 8564 auto l = curr; 8565 chain ~= l; 8566 curr = curr.parentNode; 8567 8568 } 8569 8570 isBubbling = false; 8571 8572 foreach(e; chain.retro()) { 8573 if(eventName in e.capturingEventHandlers) 8574 foreach(handler; e.capturingEventHandlers[eventName]) 8575 handler(e, this); 8576 8577 // the default on capture should really be to always do nothing 8578 8579 //if(!defaultPrevented) 8580 // if(eventName in e.defaultEventHandlers) 8581 // e.defaultEventHandlers[eventName](e.element, this); 8582 8583 if(propagationStopped) 8584 break; 8585 } 8586 8587 isBubbling = true; 8588 if(!propagationStopped) 8589 foreach(e; chain) { 8590 if(eventName in e.bubblingEventHandlers) 8591 foreach(handler; e.bubblingEventHandlers[eventName]) 8592 handler(e, this); 8593 8594 if(propagationStopped) 8595 break; 8596 } 8597 8598 if(!defaultPrevented) 8599 foreach(e; chain) { 8600 if(eventName in e.defaultEventHandlers) 8601 e.defaultEventHandlers[eventName](e, this); 8602 } 8603 } 8604 } 8605 8606 struct FormFieldOptions { 8607 // usable for any 8608 8609 /// this is a regex pattern used to validate the field 8610 string pattern; 8611 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8612 bool isRequired; 8613 /// this is displayed as an example to the user 8614 string placeholder; 8615 8616 // usable for numeric ones 8617 8618 8619 // convenience methods to quickly get some options 8620 @property static FormFieldOptions none() { 8621 FormFieldOptions f; 8622 return f; 8623 } 8624 8625 static FormFieldOptions required() { 8626 FormFieldOptions f; 8627 f.isRequired = true; 8628 return f; 8629 } 8630 8631 static FormFieldOptions regex(string pattern, bool required = false) { 8632 FormFieldOptions f; 8633 f.pattern = pattern; 8634 f.isRequired = required; 8635 return f; 8636 } 8637 8638 static FormFieldOptions fromElement(Element e) { 8639 FormFieldOptions f; 8640 if(e.hasAttribute("required")) 8641 f.isRequired = true; 8642 if(e.hasAttribute("pattern")) 8643 f.pattern = e.pattern; 8644 if(e.hasAttribute("placeholder")) 8645 f.placeholder = e.placeholder; 8646 return f; 8647 } 8648 8649 Element applyToElement(Element e) { 8650 if(this.isRequired) 8651 e.required = "required"; 8652 if(this.pattern.length) 8653 e.pattern = this.pattern; 8654 if(this.placeholder.length) 8655 e.placeholder = this.placeholder; 8656 return e; 8657 } 8658 } 8659 8660 // this needs to look just like a string, but can expand as needed 8661 version(no_dom_stream) 8662 alias string Utf8Stream; 8663 else 8664 class Utf8Stream { 8665 protected: 8666 // these two should be overridden in subclasses to actually do the stream magic 8667 string getMore() { 8668 if(getMoreHelper !is null) 8669 return getMoreHelper(); 8670 return null; 8671 } 8672 8673 bool hasMore() { 8674 if(hasMoreHelper !is null) 8675 return hasMoreHelper(); 8676 return false; 8677 } 8678 // the rest should be ok 8679 8680 public: 8681 this(string d) { 8682 this.data = d; 8683 } 8684 8685 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8686 this.getMoreHelper = getMoreHelper; 8687 this.hasMoreHelper = hasMoreHelper; 8688 8689 if(hasMore()) 8690 this.data ~= getMore(); 8691 8692 // stdout.flush(); 8693 } 8694 8695 @property final size_t length() { 8696 // the parser checks length primarily directly before accessing the next character 8697 // so this is the place we'll hook to append more if possible and needed. 8698 if(lastIdx + 1 >= data.length && hasMore()) { 8699 data ~= getMore(); 8700 } 8701 return data.length; 8702 } 8703 8704 final char opIndex(size_t idx) { 8705 if(idx > lastIdx) 8706 lastIdx = idx; 8707 return data[idx]; 8708 } 8709 8710 final string opSlice(size_t start, size_t end) { 8711 if(end > lastIdx) 8712 lastIdx = end; 8713 return data[start .. end]; 8714 } 8715 8716 final size_t opDollar() { 8717 return length(); 8718 } 8719 8720 final Utf8Stream opBinary(string op : "~")(string s) { 8721 this.data ~= s; 8722 return this; 8723 } 8724 8725 final Utf8Stream opOpAssign(string op : "~")(string s) { 8726 this.data ~= s; 8727 return this; 8728 } 8729 8730 final Utf8Stream opAssign(string rhs) { 8731 this.data = rhs; 8732 return this; 8733 } 8734 private: 8735 string data; 8736 8737 size_t lastIdx; 8738 8739 bool delegate() hasMoreHelper; 8740 string delegate() getMoreHelper; 8741 8742 8743 /+ 8744 // used to maybe clear some old stuff 8745 // you might have to remove elements parsed with it too since they can hold slices into the 8746 // old stuff, preventing gc 8747 void dropFront(int bytes) { 8748 posAdjustment += bytes; 8749 data = data[bytes .. $]; 8750 } 8751 8752 int posAdjustment; 8753 +/ 8754 } 8755 8756 void fillForm(T)(Form form, T obj, string name) { 8757 import arsd.database; 8758 fillData((k, v) => form.setValue(k, v), obj, name); 8759 } 8760 8761 /++ 8762 Normalizes the whitespace in the given text according to HTML rules. 8763 8764 History: 8765 Added March 25, 2022 (dub v10.8) 8766 8767 The `stripLeadingAndTrailing` argument was added September 13, 2024 (dub v11.6). 8768 +/ 8769 string normalizeWhitespace(string text, bool stripLeadingAndTrailing = true) { 8770 string ret; 8771 ret.reserve(text.length); 8772 bool lastWasWhite = stripLeadingAndTrailing; 8773 foreach(char ch; text) { 8774 if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 8775 if(lastWasWhite) 8776 continue; 8777 lastWasWhite = true; 8778 ch = ' '; 8779 } else { 8780 lastWasWhite = false; 8781 } 8782 8783 ret ~= ch; 8784 } 8785 8786 if(stripLeadingAndTrailing) 8787 return ret.stripRight; 8788 else { 8789 /+ 8790 if(lastWasWhite && (ret.length == 0 || ret[$-1] != ' ')) 8791 ret ~= ' '; 8792 +/ 8793 return ret; 8794 } 8795 } 8796 8797 unittest { 8798 assert(normalizeWhitespace(" foo ") == "foo"); 8799 assert(normalizeWhitespace(" f\n \t oo ") == "f oo"); 8800 assert(normalizeWhitespace(" foo ", false) == " foo "); 8801 assert(normalizeWhitespace(" foo ", false) == " foo "); 8802 assert(normalizeWhitespace("\nfoo", false) == " foo"); 8803 } 8804 8805 unittest { 8806 Document document; 8807 8808 document = new Document("<test> foo \r </test>"); 8809 assert(document.root.visibleText == "foo"); 8810 8811 document = new Document("<test> foo \r <br>hi</test>"); 8812 assert(document.root.visibleText == "foo\nhi"); 8813 8814 document = new Document("<test> foo \r <br>hi<pre>hi\nthere\n indent<br />line</pre></test>"); 8815 assert(document.root.visibleText == "foo\nhihi\nthere\n indent\nline", document.root.visibleText); 8816 } 8817 8818 /+ 8819 /+ 8820 Syntax: 8821 8822 Tag: tagname#id.class 8823 Tree: Tag(Children, comma, separated...) 8824 Children: Tee or Variable 8825 Variable: $varname with optional |funcname following. 8826 8827 If a variable has a tree after it, it breaks the variable down: 8828 * if array, foreach it does the tree 8829 * if struct, it breaks down the member variables 8830 8831 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8832 +/ 8833 struct Stringplate { 8834 /++ 8835 8836 +/ 8837 this(string s) { 8838 8839 } 8840 8841 /++ 8842 8843 +/ 8844 Element expand(T...)(T vars) { 8845 return null; 8846 } 8847 } 8848 /// 8849 unittest { 8850 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8851 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8852 } 8853 +/ 8854 8855 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 8856 foreach(child; children) { 8857 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8858 // cool 8859 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 8860 // cool, this is an inline element and none of its children contradict that 8861 } else { 8862 // prolly block 8863 return false; 8864 } 8865 } 8866 return true; 8867 } 8868 8869 private bool isSimpleWhite(dchar c) { 8870 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8871 } 8872 8873 unittest { 8874 // Test for issue #120 8875 string s = `<html> 8876 <body> 8877 <P>AN 8878 <P>bubbles</P> 8879 <P>giggles</P> 8880 </body> 8881 </html>`; 8882 auto doc = new Document(); 8883 doc.parseUtf8(s, false, false); 8884 auto s2 = doc.toString(); 8885 assert( 8886 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8887 "paragraph order incorrect:\n" ~ s2); 8888 } 8889 8890 unittest { 8891 // test for suncarpet email dec 24 2019 8892 // arbitrary id asduiwh 8893 auto document = new Document("<html> 8894 <head> 8895 <meta charset=\"utf-8\"></meta> 8896 <title>Element.querySelector Test</title> 8897 </head> 8898 <body> 8899 <div id=\"foo\"> 8900 <div>Foo</div> 8901 <div>Bar</div> 8902 </div> 8903 <div id=\"empty\"></div> 8904 <div id=\"empty-but-text\">test</div> 8905 </body> 8906 </html>"); 8907 8908 auto doc = document; 8909 8910 { 8911 auto empty = doc.requireElementById("empty"); 8912 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 8913 } 8914 { 8915 auto empty = doc.requireElementById("empty-but-text"); 8916 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 8917 } 8918 8919 assert(doc.querySelectorAll("div div").length == 2); 8920 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8921 assert(doc.querySelectorAll("> html").length == 0); 8922 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8923 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8924 8925 8926 assert(doc.root.matches("html")); 8927 assert(!doc.root.matches("nothtml")); 8928 assert(doc.querySelector("#foo > div").matches("div")); 8929 assert(doc.querySelector("body > #foo").matches("#foo")); 8930 8931 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8932 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8933 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8934 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8935 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8936 8937 // also confirming the querySelector works via the mdn definition 8938 auto foo = doc.requireSelector("#foo"); 8939 assert(foo.querySelector("#foo > div") !is null); 8940 assert(foo.querySelector("body #foo > div") !is null); 8941 8942 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8943 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8944 //assert(foo.querySelectorAll("#foo > div").length == 2); 8945 } 8946 8947 unittest { 8948 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8949 auto document = new Document(`<article> 8950 <div id="div-01">Here is div-01 8951 <div id="div-02">Here is div-02 8952 <div id="div-03">Here is div-03</div> 8953 </div> 8954 </div> 8955 </article>`, true, true); 8956 8957 auto el = document.getElementById("div-03"); 8958 assert(el.closest("#div-02").id == "div-02"); 8959 assert(el.closest("div div").id == "div-03"); 8960 assert(el.closest("article > div").id == "div-01"); 8961 assert(el.closest(":not(div)").tagName == "article"); 8962 8963 assert(el.closest("p") is null); 8964 assert(el.closest("p, div") is el); 8965 } 8966 8967 unittest { 8968 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8969 auto document = new Document(`<test> 8970 <div class="foo"><p>cool</p><span>bar</span></div> 8971 <main><p>two</p></main> 8972 </test>`); 8973 8974 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8975 assert(document.querySelector("div:where(.foo)") !is null); 8976 } 8977 8978 unittest { 8979 immutable string html = q{ 8980 <root> 8981 <div class="roundedbox"> 8982 <table> 8983 <caption class="boxheader">Recent Reviews</caption> 8984 <tr> 8985 <th>Game</th> 8986 <th>User</th> 8987 <th>Rating</th> 8988 <th>Created</th> 8989 </tr> 8990 8991 <tr> 8992 <td>June 13, 2020 15:10</td> 8993 <td><a href="/reviews/8833">[Show]</a></td> 8994 </tr> 8995 8996 <tr> 8997 <td>June 13, 2020 15:02</td> 8998 <td><a href="/reviews/8832">[Show]</a></td> 8999 </tr> 9000 9001 <tr> 9002 <td>June 13, 2020 14:41</td> 9003 <td><a href="/reviews/8831">[Show]</a></td> 9004 </tr> 9005 </table> 9006 </div> 9007 </root> 9008 }; 9009 9010 auto doc = new Document(cast(string)html); 9011 // this should select the second table row, but... 9012 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9013 assert(rd !is null); 9014 assert(rd.href == "/reviews/8832"); 9015 9016 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9017 assert(rd !is null); 9018 assert(rd.href == "/reviews/8832"); 9019 } 9020 9021 unittest { 9022 try { 9023 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9024 assert(0); 9025 } catch(Exception e) { 9026 // good; it should throw an exception, not an error. 9027 } 9028 } 9029 9030 unittest { 9031 // toPrettyString is not stable, but these are some best-effort attempts 9032 // despite these being in a test, I might change these anyway! 9033 assert(Element.make("a").toPrettyString == "<a></a>"); 9034 assert(Element.make("a", "").toPrettyString(false, 0, " ") == "<a></a>"); 9035 assert(Element.make("a", " ").toPrettyString(false, 0, " ") == "<a> </a>");//, Element.make("a", " ").toPrettyString(false, 0, " ")); 9036 assert(Element.make("a", "b").toPrettyString == "<a>b</a>"); 9037 assert(Element.make("a", "b").toPrettyString(false, 0, "") == "<a>b</a>"); 9038 9039 { 9040 auto document = new Document("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9041 auto pretty = document.toPrettyString(false, 0, " "); 9042 assert(pretty == 9043 `<!DOCTYPE html> 9044 <html> 9045 <body> 9046 <p>hello <a href="world">world</a></p> 9047 </body> 9048 </html>`, pretty); 9049 } 9050 9051 { 9052 auto document = new XmlDocument("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9053 assert(document.toPrettyString(false, 0, " ") == 9054 `<?xml version="1.0" encoding="UTF-8"?> 9055 <html> 9056 <body> 9057 <p> 9058 hello 9059 <a href="world">world</a> 9060 </p> 9061 </body> 9062 </html>`); 9063 } 9064 9065 foreach(test; [ 9066 "<a att=\"http://ele\"><b><ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>", 9067 "<a att=\"http://ele\"><b><ele1>Hello</ele1><c><d><ele2>How are you?</ele2></d><e><ele3>Good & you?</ele3></e></c></b></a>", 9068 ] ) 9069 { 9070 auto document = new XmlDocument(test); 9071 assert(document.root.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9072 assert(document.toPrettyString(false, 0, " ") == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9073 auto omg = document.root; 9074 omg.parent_ = null; 9075 assert(omg.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9076 } 9077 9078 { 9079 auto document = new XmlDocument(`<a><b>toto</b><c></c></a>`); 9080 assert(document.root.toPrettyString(false, 0, null) == `<a><b>toto</b><c></c></a>`); 9081 assert(document.root.toPrettyString(false, 0, " ") == `<a> 9082 <b>toto</b> 9083 <c></c> 9084 </a>`); 9085 } 9086 9087 { 9088 auto str = `<!DOCTYPE html> 9089 <html> 9090 <head> 9091 <title>Test</title> 9092 </head> 9093 <body> 9094 <p>Hello there</p> 9095 <p>I like <a href="">Links</a></p> 9096 <div> 9097 this is indented since there's a block inside 9098 <p>this is the block</p> 9099 and this gets its own line 9100 </div> 9101 </body> 9102 </html>`; 9103 auto doc = new Document(str, true, true); 9104 assert(doc.toPrettyString == str); 9105 } 9106 } 9107 9108 unittest { 9109 auto document = new Document("<foo><items><item><title>test</title><desc>desc</desc></item></items></foo>"); 9110 auto items = document.root.requireSelector("> items"); 9111 auto item = items.requireSelector("> item"); 9112 auto title = item.requireSelector("> title"); 9113 9114 // this not actually implemented at this point but i might want to later. it prolly should work as an extension of the standard behavior 9115 // assert(title.requireSelector("~ desc").innerText == "desc"); 9116 9117 assert(item.requireSelector("title ~ desc").innerText == "desc"); 9118 9119 assert(items.querySelector("item:has(title)") !is null); 9120 assert(items.querySelector("item:has(nothing)") is null); 9121 9122 assert(title.innerText == "test"); 9123 } 9124 9125 unittest { 9126 auto document = new Document("broken"); // just ensuring it doesn't crash 9127 } 9128 9129 9130 /* 9131 Copyright: Adam D. Ruppe, 2010 - 2023 9132 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9133 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9134 */