1 // FIXME: xml namespace support??? 2 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 3 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 4 5 // FIXME: the scriptable list is quite arbitrary 6 7 8 // xml entity references?! 9 10 /++ 11 This is an html DOM implementation, started with cloning 12 what the browser offers in Javascript, but going well beyond 13 it in convenience. 14 15 If you can do it in Javascript, you can probably do it with 16 this module, and much more. 17 18 --- 19 import arsd.dom; 20 21 void main() { 22 auto document = new Document("<html><p>paragraph</p></html>"); 23 writeln(document.querySelector("p")); 24 document.root.innerHTML = "<p>hey</p>"; 25 writeln(document); 26 } 27 --- 28 29 BTW: this file optionally depends on `arsd.characterencodings`, to 30 help it correctly read files from the internet. You should be able to 31 get characterencodings.d from the same place you got this file. 32 33 If you want it to stand alone, just always use the `Document.parseUtf8` 34 function or the constructor that takes a string. 35 36 Symbol_groups: 37 38 core_functionality = 39 40 These members provide core functionality. The members on these classes 41 will provide most your direct interaction. 42 43 bonus_functionality = 44 45 These provide additional functionality for special use cases. 46 47 implementations = 48 49 These provide implementations of other functionality. 50 +/ 51 module arsd.dom; 52 53 static import arsd.core; 54 import arsd.core : encodeUriComponent, decodeUriComponent; 55 56 // FIXME: support the css standard namespace thing in the selectors too 57 58 version(with_arsd_jsvar) 59 import arsd.jsvar; 60 else { 61 enum scriptable = "arsd_jsvar_compatible"; 62 } 63 64 // this is only meant to be used at compile time, as a filter for opDispatch 65 // lists the attributes we want to allow without the use of .attr 66 bool isConvenientAttribute(string name) { 67 static immutable list = [ 68 "name", "id", "href", "value", 69 "checked", "selected", "type", 70 "src", "content", "pattern", 71 "placeholder", "required", "alt", 72 "rel", 73 "method", "action", "enctype" 74 ]; 75 foreach(l; list) 76 if(name == l) return true; 77 return false; 78 } 79 80 81 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 82 // FIXME: failing to close a paragraph sometimes messes things up too 83 84 // FIXME: it would be kinda cool to have some support for internal DTDs 85 // and maybe XPath as well, to some extent 86 /* 87 we could do 88 meh this sux 89 90 auto xpath = XPath(element); 91 92 // get the first p 93 xpath.p[0].a["href"] 94 */ 95 96 97 /++ 98 The main document interface, including a html or xml parser. 99 100 There's three main ways to create a Document: 101 102 If you want to parse something and inspect the tags, you can use the [this|constructor]: 103 --- 104 // create and parse some HTML in one call 105 auto document = new Document("<html></html>"); 106 107 // or some XML 108 auto document = new Document("<xml></xml>", true, true); // strict mode enabled 109 110 // or better yet: 111 auto document = new XmlDocument("<xml></xml>"); // specialized subclass 112 --- 113 114 If you want to download something and parse it in one call, the [fromUrl] static function can help: 115 --- 116 auto document = Document.fromUrl("http://dlang.org/"); 117 --- 118 (note that this requires my [arsd.characterencodings] and [arsd.http2] libraries) 119 120 And, if you need to inspect things like `<%= foo %>` tags and comments, you can add them to the dom like this, with the [enableAddingSpecialTagsToDom] 121 and [parseUtf8] or [parseGarbage] functions: 122 --- 123 auto document = new Document(); 124 document.enableAddingSpecialTagsToDom(); 125 document.parseUtf8("<example></example>", true, true); // changes the trues to false to switch from xml to html mode 126 --- 127 128 You can also modify things like [selfClosedElements] and [rawSourceElements] before calling the `parse` family of functions to do further advanced tasks. 129 130 However you parse it, it will put a few things into special variables. 131 132 [root] contains the root document. 133 [prolog] contains the instructions before the root (like `<!DOCTYPE html>`). To keep the original things, you will need to [enableAddingSpecialTagsToDom] first, otherwise the library will return generic strings in there. [piecesBeforeRoot] will have other parsed instructions, if [enableAddingSpecialTagsToDom] is called. 134 [piecesAfterRoot] will contain any xml-looking data after the root tag is closed. 135 136 Most often though, you will not need to look at any of that data, since `Document` itself has methods like [querySelector], [appendChild], and more which will forward to the root [Element] for you. 137 +/ 138 /// Group: core_functionality 139 class Document : FileResource, DomParent { 140 inout(Document) asDocument() inout { return this; } 141 inout(Element) asElement() inout { return null; } 142 143 /++ 144 These three functions, `processTagOpen`, `processTagClose`, and `processNodeWhileParsing`, allow you to process elements as they are parsed and choose to not append them to the dom tree. 145 146 147 `processTagOpen` is called as soon as it reads the tag name and attributes into the passed `Element` structure, in order 148 of appearance in the file. `processTagClose` is called similarly, when that tag has been closed. In between, all descendant 149 nodes - including tags as well as text and other nodes - are passed to `processNodeWhileParsing`. Finally, after `processTagClose`, 150 the node itself is passed to `processNodeWhileParsing` only after its children. 151 152 So, given: 153 154 ```xml 155 <thing> 156 <child> 157 <grandchild></grandchild> 158 </child> 159 </thing> 160 ``` 161 162 It would call: 163 164 $(NUMBERED_LIST 165 * processTagOpen(thing) 166 * processNodeWhileParsing(thing, whitespace text) // the newlines, spaces, and tabs between the thing tag and child tag 167 * processTagOpen(child) 168 * processNodeWhileParsing(child, whitespace text) 169 * processTagOpen(grandchild) 170 * processTagClose(grandchild) 171 * processNodeWhileParsing(child, grandchild) 172 * processNodeWhileParsing(child, whitespace text) // whitespace after the grandchild 173 * processTagClose(child) 174 * processNodeWhileParsing(thing, child) 175 * processNodeWhileParsing(thing, whitespace text) 176 * processTagClose(thing) 177 ) 178 179 The Element objects passed to those functions are the same ones you'd see; the tag open and tag close calls receive the same 180 object, so you can compare them with the `is` operator if you want. 181 182 The default behavior of each function is that `processTagOpen` and `processTagClose` do nothing. 183 `processNodeWhileParsing`'s default behavior is to call `parent.appendChild(child)`, in order to 184 build the dom tree. If you do not want the dom tree, you can do override this function to do nothing. 185 186 If you do not choose to append child to parent in `processNodeWhileParsing`, the garbage collector is free to clean up 187 the node even as the document is not finished parsing, allowing memory use to stay lower. Memory use will tend to scale 188 approximately with the max depth in the element tree rather the entire document size. 189 190 To cancel processing before the end of a document, you'll have to throw an exception and catch it at your call to parse. 191 There is no other way to stop early and there are no concrete plans to add one. 192 193 There are several approaches to use this: you might might use `processTagOpen` and `processTagClose` to keep a stack or 194 other state variables to process nodes as they come and never add them to the actual tree. You might also build partial 195 subtrees to use all the convenient methods in `processTagClose`, but then not add that particular node to the rest of the 196 tree to keep memory usage down. 197 198 Examples: 199 200 Suppose you have a large array of items under the root element you'd like to process individually, without 201 taking all the items into memory at once. You can do that with code like this: 202 --- 203 import arsd.dom; 204 class MyStream : XmlDocument { 205 this(string s) { super(s); } // need to forward the constructor we use 206 207 override void processNodeWhileParsing(Element parent, Element child) { 208 // don't append anything to the root node, since we don't need them 209 // all in the tree - that'd take too much memory - 210 // but still build any subtree for each individual item for ease of processing 211 if(parent is root) 212 return; 213 else 214 super.processNodeWhileParsing(parent, child); 215 } 216 217 int count; 218 override void processTagClose(Element element) { 219 if(element.tagName == "item") { 220 // process the element here with all the regular dom functions on `element` 221 count++; 222 // can still use dom functions on the subtree we built 223 assert(element.requireSelector("name").textContent == "sample"); 224 } 225 } 226 } 227 228 void main() { 229 // generate an example file with a million items 230 string xml = "<list>"; 231 foreach(i; 0 .. 1_000_000) { 232 xml ~= "<item><name>sample</name><type>example</type></item>"; 233 } 234 xml ~= "</list>"; 235 236 auto document = new MyStream(xml); 237 assert(document.count == 1_000_000); 238 } 239 --- 240 241 This example runs in about 1/10th of the memory and 2/3 of the time on my computer relative to a default [XmlDocument] full tree dom. 242 243 By overriding these three functions to fit the specific document and processing requirements you have, you might realize even bigger 244 gains over the normal full document tree while still getting most the benefits of the convenient dom functions. 245 246 Tip: if you use a [Utf8Stream] instead of a string, you might be able to bring the memory use further down. The easiest way to do that 247 is something like this when loading from a file: 248 249 --- 250 import std.stdio; 251 auto file = File("filename.xml", "rb"); 252 auto textStream = new Utf8Stream(() { 253 // get more 254 auto buffer = new char[](32 * 1024); 255 return cast(string) file.rawRead(buffer); 256 }, () { 257 // has more 258 return !file.eof; 259 }); 260 261 auto document = new XmlDocument(textStream); 262 --- 263 264 You'll need to forward a constructor in your subclasses that takes `Utf8Stream` too if you want to subclass to override the streaming parsing functions. 265 266 Note that if you do save parts of the document strings or objects, it might prevent the GC from freeing that string block anyway, since dom.d will often slice into its buffer while parsing instead of copying strings. It will depend on your specific case to know if this actually saves memory or not for you. 267 268 Bugs: 269 Even if you use a [Utf8Stream] to feed data and decline to append to the tree, the entire xml text is likely to 270 end up in memory anyway. 271 272 See_Also: 273 [Document#examples]'s high level streaming example. 274 275 History: 276 `processNodeWhileParsing` was added January 6, 2023. 277 278 `processTagOpen` and `processTagClose` were added February 21, 2025. 279 +/ 280 void processTagOpen(Element what) { 281 } 282 283 /// ditto 284 void processTagClose(Element what) { 285 } 286 287 /// ditto 288 void processNodeWhileParsing(Element parent, Element child) { 289 parent.appendChild(child); 290 } 291 292 /++ 293 Convenience method for web scraping. Requires [arsd.http2] to be 294 included in the build as well as [arsd.characterencodings]. 295 296 This will download the file from the given url and create a document 297 off it, using a strict constructor or a [parseGarbage], depending on 298 the value of `strictMode`. 299 +/ 300 static Document fromUrl()(string url, bool strictMode = false) { 301 import arsd.http2; 302 auto client = new HttpClient(); 303 304 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 305 auto res = req.waitForCompletion(); 306 307 auto document = new Document(); 308 if(strictMode) { 309 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 310 } else { 311 document.parseGarbage(cast(string) res.content); 312 } 313 314 return document; 315 } 316 317 /++ 318 Creates a document with the given source data. If you want HTML behavior, use `caseSensitive` and `struct` set to `false`. For XML mode, set them to `true`. 319 320 Please note that anything after the root element will be found in [piecesAfterRoot]. Comments, processing instructions, and other special tags will be stripped out b default. You can customize this by using the zero-argument constructor and setting callbacks on the [parseSawComment], [parseSawBangInstruction], [parseSawAspCode], [parseSawPhpCode], and [parseSawQuestionInstruction] members, then calling one of the [parseUtf8], [parseGarbage], or [parse] functions. Calling the convenience method, [enableAddingSpecialTagsToDom], will enable all those things at once. 321 322 See_Also: 323 [parseGarbage] 324 [parseUtf8] 325 [parseUrl] 326 +/ 327 this(string data, bool caseSensitive = false, bool strict = false) { 328 parseUtf8(data, caseSensitive, strict); 329 } 330 331 /** 332 Creates an empty document. It has *nothing* in it at all, ready. 333 */ 334 this() { 335 336 } 337 338 /++ 339 This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 340 It returns a struct that forwards calls to all elements it holds, and returns itself so you 341 can chain it. 342 343 Example: document["p"].innerText("hello").addClass("modified"); 344 345 Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 346 347 Note: always use function calls (not property syntax) and don't use toString in there for best results. 348 349 You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 350 you could put in some kind of custom filter function tho. 351 +/ 352 ElementCollection opIndex(string selector) { 353 auto e = ElementCollection(this.root); 354 return e[selector]; 355 } 356 357 string _contentType = "text/html; charset=utf-8"; 358 359 /// If you're using this for some other kind of XML, you can 360 /// set the content type here. 361 /// 362 /// Note: this has no impact on the function of this class. 363 /// It is only used if the document is sent via a protocol like HTTP. 364 /// 365 /// This may be called by parse() if it recognizes the data. Otherwise, 366 /// if you don't set it, it assumes text/html; charset=utf-8. 367 @property string contentType(string mimeType) { 368 _contentType = mimeType; 369 return _contentType; 370 } 371 372 /// implementing the FileResource interface, useful for sending via 373 /// http automatically. 374 @property string filename() const { return null; } 375 376 /// implementing the FileResource interface, useful for sending via 377 /// http automatically. 378 override @property string contentType() const { 379 return _contentType; 380 } 381 382 /// implementing the FileResource interface; it calls toString. 383 override immutable(ubyte)[] getData() const { 384 return cast(immutable(ubyte)[]) this.toString(); 385 } 386 387 388 /* 389 /// Concatenates any consecutive text nodes 390 void normalize() { 391 392 } 393 */ 394 395 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 396 /// Call this before calling parse(). 397 398 /++ 399 Adds objects to the dom representing things normally stripped out during the default parse, like comments, `<!instructions>`, `<% code%>`, and `<? code?>` all at once. 400 401 Note this will also preserve the prolog and doctype from the original file, if there was one. 402 403 See_Also: 404 [parseSawComment] 405 [parseSawAspCode] 406 [parseSawPhpCode] 407 [parseSawQuestionInstruction] 408 [parseSawBangInstruction] 409 +/ 410 void enableAddingSpecialTagsToDom() { 411 parseSawComment = (string) => true; 412 parseSawAspCode = (string) => true; 413 parseSawPhpCode = (string) => true; 414 parseSawQuestionInstruction = (string) => true; 415 parseSawBangInstruction = (string) => true; 416 } 417 418 /// If the parser sees a html comment, it will call this callback 419 /// <!-- comment --> will call parseSawComment(" comment ") 420 /// Return true if you want the node appended to the document. It will be in a [HtmlComment] object. 421 bool delegate(string) parseSawComment; 422 423 /// If the parser sees <% asp code... %>, it will call this callback. 424 /// It will be passed "% asp code... %" or "%= asp code .. %" 425 /// Return true if you want the node appended to the document. It will be in an [AspCode] object. 426 bool delegate(string) parseSawAspCode; 427 428 /// If the parser sees <?php php code... ?>, it will call this callback. 429 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 430 /// Note: dom.d cannot identify the other php <? code ?> short format. 431 /// Return true if you want the node appended to the document. It will be in a [PhpCode] object. 432 bool delegate(string) parseSawPhpCode; 433 434 /// if it sees a <?xxx> that is not php or asp 435 /// it calls this function with the contents. 436 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 437 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 438 /// Return true if you want the node appended to the document. It will be in a [QuestionInstruction] object. 439 bool delegate(string) parseSawQuestionInstruction; 440 441 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 442 /// it calls this function with the contents. 443 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 444 /// Return true if you want the node appended to the document. It will be in a [BangInstruction] object. 445 bool delegate(string) parseSawBangInstruction; 446 447 /// Given the kind of garbage you find on the Internet, try to make sense of it. 448 /// Equivalent to document.parse(data, false, false, null); 449 /// (Case-insensitive, non-strict, determine character encoding from the data.) 450 451 /// NOTE: this makes no attempt at added security, but it will try to recover from anything instead of throwing. 452 /// 453 /// It is a template so it lazily imports characterencodings. 454 void parseGarbage()(string data) { 455 parse(data, false, false, null); 456 } 457 458 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 459 /// Will throw exceptions on things like unclosed tags. 460 void parseStrict(string data, bool pureXmlMode = false) { 461 parseStream(toUtf8Stream(data), true, true, pureXmlMode); 462 } 463 464 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 465 /// tag soup, but does NOT try to correct bad character encodings. 466 /// 467 /// They will still throw an exception. 468 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 469 parseStream(toUtf8Stream(data), caseSensitive, strict); 470 } 471 472 // this is a template so we get lazy import behavior 473 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 474 import arsd.characterencodings; 475 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 476 if(dataEncoding is null) { 477 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 478 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 479 // Now, XML and HTML can both list encoding in the document, but we can't really parse 480 // it here without changing a lot of code until we know the encoding. So I'm going to 481 // do some hackish string checking. 482 if(dataEncoding is null) { 483 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 484 // first, look for an XML prolog 485 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 486 if(idx != -1) { 487 idx += "encoding=\"".length; 488 // we're probably past the prolog if it's this far in; we might be looking at 489 // content. Forget about it. 490 if(idx > 100) 491 idx = -1; 492 } 493 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 494 if(idx == -1) { 495 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 496 if(idx != -1) { 497 idx += "charset=".length; 498 if(dataAsBytes[idx] == '"') 499 idx++; 500 } 501 } 502 503 // found something in either branch... 504 if(idx != -1) { 505 // read till a quote or about 12 chars, whichever comes first... 506 auto end = idx; 507 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 508 end++; 509 510 dataEncoding = cast(string) dataAsBytes[idx .. end]; 511 } 512 // otherwise, we just don't know. 513 } 514 } 515 516 if(dataEncoding is null) { 517 if(strict) 518 throw new MarkupException("I couldn't figure out the encoding of this document."); 519 else 520 // if we really don't know by here, it means we already tried UTF-8, 521 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 522 // tags... let's assume it's Windows-1252, since that's probably the most 523 // common aside from utf that wouldn't be labeled. 524 525 dataEncoding = "Windows 1252"; 526 } 527 528 // and now, go ahead and convert it. 529 530 string data; 531 532 if(!strict) { 533 // if we're in non-strict mode, we need to check 534 // the document for mislabeling too; sometimes 535 // web documents will say they are utf-8, but aren't 536 // actually properly encoded. If it fails to validate, 537 // we'll assume it's actually Windows encoding - the most 538 // likely candidate for mislabeled garbage. 539 dataEncoding = dataEncoding.toLower(); 540 dataEncoding = dataEncoding.replace(" ", ""); 541 dataEncoding = dataEncoding.replace("-", ""); 542 dataEncoding = dataEncoding.replace("_", ""); 543 if(dataEncoding == "utf8") { 544 try { 545 validate(rawdata); 546 } catch(UTFException e) { 547 dataEncoding = "Windows 1252"; 548 } 549 } 550 } 551 552 if(dataEncoding != "UTF-8") { 553 if(strict) 554 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 555 else { 556 try { 557 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 558 } catch(Exception e) { 559 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 560 } 561 } 562 } else 563 data = rawdata; 564 565 return toUtf8Stream(data); 566 } 567 568 private 569 Utf8Stream toUtf8Stream(in string rawdata) { 570 string data = rawdata; 571 static if(is(Utf8Stream == string)) 572 return data; 573 else 574 return new Utf8Stream(data); 575 } 576 577 /++ 578 List of elements that can be assumed to be self-closed 579 in this document. The default for a Document are a hard-coded 580 list of ones appropriate for HTML. For [XmlDocument], it defaults 581 to empty. You can modify this after construction but before parsing. 582 583 History: 584 Added February 8, 2021 (included in dub release 9.2) 585 586 Changed from `string[]` to `immutable(string)[]` on 587 February 4, 2024 (dub v11.5) to plug a hole discovered 588 by the OpenD compiler's diagnostics. 589 +/ 590 immutable(string)[] selfClosedElements = htmlSelfClosedElements; 591 592 /++ 593 List of elements that contain raw CDATA content for this 594 document, e.g. `<script>` and `<style>` for HTML. The parser 595 will read until the closing string and put everything else 596 in a [RawSource] object for future processing, not trying to 597 do any further child nodes or attributes, etc. 598 599 History: 600 Added February 4, 2024 (dub v11.5) 601 602 +/ 603 immutable(string)[] rawSourceElements = htmlRawSourceElements; 604 605 /++ 606 List of elements that are considered inline for pretty printing. 607 The default for a Document are hard-coded to something appropriate 608 for HTML. For [XmlDocument], it defaults to empty. You can modify 609 this after construction but before parsing. 610 611 History: 612 Added June 21, 2021 (included in dub release 10.1) 613 614 Changed from `string[]` to `immutable(string)[]` on 615 February 4, 2024 (dub v11.5) to plug a hole discovered 616 by the OpenD compiler's diagnostics. 617 +/ 618 immutable(string)[] inlineElements = htmlInlineElements; 619 620 /** 621 Take XMLish data and try to make the DOM tree out of it. 622 623 The goal isn't to be perfect, but to just be good enough to 624 approximate Javascript's behavior. 625 626 If strict, it throws on something that doesn't make sense. 627 (Examples: mismatched tags. It doesn't validate!) 628 If not strict, it tries to recover anyway, and only throws 629 when something is REALLY unworkable. 630 631 If strict is false, it uses a magic list of tags that needn't 632 be closed. If you are writing a document specifically for this, 633 try to avoid such - use self closed tags at least. Easier to parse. 634 635 The dataEncoding argument can be used to pass a specific 636 charset encoding for automatic conversion. If null (which is NOT 637 the default!), it tries to determine from the data itself, 638 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 639 640 If this assumption is wrong, it can throw on non-ascii 641 characters! 642 643 644 Note that it previously assumed the data was encoded as UTF-8, which 645 is why the dataEncoding argument defaults to that. 646 647 So it shouldn't break backward compatibility. 648 649 But, if you want the best behavior on wild data - figuring it out from the document 650 instead of assuming - you'll probably want to change that argument to null. 651 652 This is a template so it lazily imports arsd.characterencodings, which is required 653 to fix up data encodings. 654 655 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 656 dependency. If it is data from the Internet though, a random website, the encoding 657 is often a lie. This function, if dataEncoding == null, can correct for that, or 658 you can try parseGarbage. In those cases, arsd.characterencodings is required to 659 compile. 660 */ 661 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 662 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 663 parseStream(data, caseSensitive, strict); 664 } 665 666 // note: this work best in strict mode, unless data is just a simple string wrapper 667 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false, bool pureXmlMode = false) { 668 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 669 // of my big app. 670 671 assert(data !is null); 672 673 // go through character by character. 674 // if you see a <, consider it a tag. 675 // name goes until the first non tagname character 676 // then see if it self closes or has an attribute 677 678 // if not in a tag, anything not a tag is a big text 679 // node child. It ends as soon as it sees a < 680 681 // Whitespace in text or attributes is preserved, but not between attributes 682 683 // & and friends are converted when I know them, left the same otherwise 684 685 686 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 687 //validate(data); // it *must* be UTF-8 for this to work correctly 688 689 sizediff_t pos = 0; 690 691 clear(); 692 693 loose = !caseSensitive; 694 695 bool sawImproperNesting = false; 696 bool nonNestableHackRequired = false; 697 698 int getLineNumber(sizediff_t p) { 699 return data.getLineNumber(p); 700 } 701 702 void parseError(string message) { 703 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 704 } 705 706 bool eatWhitespace() { 707 bool ateAny = false; 708 while(pos < data.length && data[pos].isSimpleWhite) { 709 pos++; 710 ateAny = true; 711 } 712 return ateAny; 713 } 714 715 string readTagName() { 716 717 data.markDataDiscardable(pos); 718 719 // remember to include : for namespaces 720 // basically just keep going until >, /, or whitespace 721 auto start = pos; 722 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 723 { 724 pos++; 725 if(pos == data.length) { 726 if(strict) 727 throw new Exception("tag name incomplete when file ended"); 728 else 729 break; 730 } 731 } 732 733 if(!caseSensitive) 734 return toLower(data[start..pos]); 735 else 736 return data[start..pos]; 737 } 738 739 string readAttributeName() { 740 // remember to include : for namespaces 741 // basically just keep going until >, /, or whitespace 742 auto start = pos; 743 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 744 { 745 if(data[pos] == '<') { 746 if(strict) 747 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 748 else 749 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 750 } 751 pos++; 752 if(pos == data.length) { 753 if(strict) 754 throw new Exception("unterminated attribute name"); 755 else 756 break; 757 } 758 } 759 760 if(!caseSensitive) 761 return toLower(data[start..pos]); 762 else 763 return data[start..pos]; 764 } 765 766 string readAttributeValue() { 767 if(pos >= data.length) { 768 if(strict) 769 throw new Exception("no attribute value before end of file"); 770 else 771 return null; 772 } 773 switch(data[pos]) { 774 case '\'': 775 case '"': 776 auto started = pos; 777 char end = data[pos]; 778 pos++; 779 auto start = pos; 780 while(pos < data.length && data[pos] != end) 781 pos++; 782 if(strict && pos == data.length) 783 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 784 string v = htmlEntitiesDecode(data[start..pos], strict); 785 pos++; // skip over the end 786 return v; 787 default: 788 if(strict) 789 parseError("Attributes must be quoted"); 790 // read until whitespace or terminator (/> or >) 791 auto start = pos; 792 while( 793 pos < data.length && 794 data[pos] != '>' && 795 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 796 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 797 !data[pos].isSimpleWhite) 798 pos++; 799 800 string v = htmlEntitiesDecode(data[start..pos], strict); 801 // don't skip the end - we'll need it later 802 return v; 803 } 804 } 805 806 TextNode readTextNode() { 807 auto start = pos; 808 while(pos < data.length && data[pos] != '<') { 809 pos++; 810 } 811 812 return TextNode.fromUndecodedString(this, data[start..pos]); 813 } 814 815 // this is obsolete! 816 RawSource readCDataNode() { 817 auto start = pos; 818 while(pos < data.length && data[pos] != '<') { 819 pos++; 820 } 821 822 return new RawSource(this, data[start..pos]); 823 } 824 825 826 struct Ele { 827 int type; // element or closing tag or nothing 828 /* 829 type == 0 means regular node, self-closed (element is valid) 830 type == 1 means closing tag (payload is the tag name, element may be valid) 831 type == 2 means you should ignore it completely 832 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 833 type == 4 means the document was totally empty 834 */ 835 Element element; // for type == 0 or type == 3 836 string payload; // for type == 1 837 } 838 // recursively read a tag 839 Ele readElement(string[] parentChain = null) { 840 // FIXME: this is the slowest function in this module, by far, even in strict mode. 841 // Loose mode should perform decently, but strict mode is the important one. 842 if(!strict && parentChain is null) 843 parentChain = []; 844 845 static string[] recentAutoClosedTags; 846 847 if(pos >= data.length) 848 { 849 if(strict) { 850 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 851 } else { 852 if(parentChain.length) 853 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 854 else 855 return Ele(4); // signal emptiness upstream 856 } 857 } 858 859 if(data[pos] != '<') { 860 return Ele(0, readTextNode(), null); 861 } 862 863 enforce(data[pos] == '<'); 864 pos++; 865 if(pos == data.length) { 866 if(strict) 867 throw new MarkupException("Found trailing < at end of file"); 868 // if not strict, we'll just skip the switch 869 } else 870 switch(data[pos]) { 871 // I don't care about these, so I just want to skip them 872 case '!': // might be a comment, a doctype, or a special instruction 873 pos++; 874 875 // FIXME: we should store these in the tree too 876 // though I like having it stripped out tbh. 877 878 if(pos == data.length) { 879 if(strict) 880 throw new MarkupException("<! opened at end of file"); 881 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 882 // comment 883 pos += 2; 884 885 // FIXME: technically, a comment is anything 886 // between -- and -- inside a <!> block. 887 // so in <!-- test -- lol> , the " lol" is NOT a comment 888 // and should probably be handled differently in here, but for now 889 // I'll just keep running until --> since that's the common way 890 891 auto commentStart = pos; 892 while(pos+3 < data.length && data[pos..pos+3] != "-->") 893 pos++; 894 895 auto end = commentStart; 896 897 if(pos + 3 >= data.length) { 898 if(strict) 899 throw new MarkupException("unclosed comment"); 900 end = data.length; 901 pos = data.length; 902 } else { 903 end = pos; 904 assert(data[pos] == '-'); 905 pos++; 906 assert(data[pos] == '-'); 907 pos++; 908 assert(data[pos] == '>'); 909 pos++; 910 } 911 912 if(parseSawComment !is null) 913 if(parseSawComment(data[commentStart .. end])) { 914 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 915 } 916 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 917 pos += 7; 918 919 auto cdataStart = pos; 920 921 ptrdiff_t end = -1; 922 typeof(end) cdataEnd; 923 924 if(pos < data.length) { 925 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 926 end = data[pos .. $].indexOf("]]>"); 927 } 928 929 if(end == -1) { 930 if(strict) 931 throw new MarkupException("Unclosed CDATA section"); 932 end = pos; 933 cdataEnd = pos; 934 } else { 935 cdataEnd = pos + end; 936 pos = cdataEnd + 3; 937 } 938 939 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 940 } else { 941 auto start = pos; 942 while(pos < data.length && data[pos] != '>') 943 pos++; 944 945 auto bangEnds = pos; 946 if(pos == data.length) { 947 if(strict) 948 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 949 } else pos++; // skipping the > 950 951 if(parseSawBangInstruction !is null) 952 if(parseSawBangInstruction(data[start .. bangEnds])) { 953 // FIXME: these should be able to modify the parser state, 954 // doing things like adding entities, somehow. 955 956 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 957 } 958 } 959 960 /* 961 if(pos < data.length && data[pos] == '>') 962 pos++; // skip the > 963 else 964 assert(!strict); 965 */ 966 break; 967 case '%': 968 case '?': 969 /* 970 Here's what we want to support: 971 972 <% asp code %> 973 <%= asp code %> 974 <?php php code ?> 975 <?= php code ?> 976 977 The contents don't really matter, just if it opens with 978 one of the above for, it ends on the two char terminator. 979 980 <?something> 981 this is NOT php code 982 because I've seen this in the wild: <?EM-dummyText> 983 984 This could be php with shorttags which would be cut off 985 prematurely because if(a >) - that > counts as the close 986 of the tag, but since dom.d can't tell the difference 987 between that and the <?EM> real world example, it will 988 not try to look for the ?> ending. 989 990 The difference between this and the asp/php stuff is that it 991 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 992 on >. 993 */ 994 995 char end = data[pos]; 996 auto started = pos; 997 bool isAsp = end == '%'; 998 int currentIndex = 0; 999 bool isPhp = false; 1000 bool isEqualTag = false; 1001 int phpCount = 0; 1002 1003 more: 1004 pos++; // skip the start 1005 if(pos == data.length) { 1006 if(strict) 1007 throw new MarkupException("Unclosed <"~end~" by end of file"); 1008 } else { 1009 currentIndex++; 1010 if(currentIndex == 1 && data[pos] == '=') { 1011 if(!isAsp) 1012 isPhp = true; 1013 isEqualTag = true; 1014 goto more; 1015 } 1016 if(currentIndex == 1 && data[pos] == 'p') 1017 phpCount++; 1018 if(currentIndex == 2 && data[pos] == 'h') 1019 phpCount++; 1020 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 1021 isPhp = true; 1022 1023 if(data[pos] == '>') { 1024 if((isAsp || isPhp) && data[pos - 1] != end) 1025 goto more; 1026 // otherwise we're done 1027 } else 1028 goto more; 1029 } 1030 1031 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 1032 auto code = data[started .. pos]; 1033 1034 1035 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 1036 if(pos < data.length) 1037 pos++; // get past the > 1038 1039 if(isAsp && parseSawAspCode !is null) { 1040 if(parseSawAspCode(code)) { 1041 return Ele(3, new AspCode(this, code), null); 1042 } 1043 } else if(isPhp && parseSawPhpCode !is null) { 1044 if(parseSawPhpCode(code)) { 1045 return Ele(3, new PhpCode(this, code), null); 1046 } 1047 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 1048 if(parseSawQuestionInstruction(code)) { 1049 return Ele(3, new QuestionInstruction(this, code), null); 1050 } 1051 } 1052 break; 1053 case '/': // closing an element 1054 pos++; // skip the start 1055 auto p = pos; 1056 while(pos < data.length && data[pos] != '>') 1057 pos++; 1058 //writefln("</%s>", data[p..pos]); 1059 if(pos == data.length && data[pos-1] != '>') { 1060 if(strict) 1061 throw new MarkupException("File ended before closing tag had a required >"); 1062 else 1063 data ~= ">"; // just hack it in 1064 } 1065 pos++; // skip the '>' 1066 1067 string tname = data[p..pos-1]; 1068 if(!strict) 1069 tname = tname.strip; 1070 if(!caseSensitive) 1071 tname = tname.toLower(); 1072 1073 return Ele(1, null, tname); // closing tag reports itself here 1074 case ' ': // assume it isn't a real element... 1075 if(strict) { 1076 parseError("bad markup - improperly placed <"); 1077 assert(0); // parseError always throws 1078 } else 1079 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 1080 default: 1081 1082 if(!strict) { 1083 // what about something that kinda looks like a tag, but isn't? 1084 auto nextTag = data[pos .. $].indexOf("<"); 1085 auto closeTag = data[pos .. $].indexOf(">"); 1086 if(closeTag != -1 && nextTag != -1) 1087 if(nextTag < closeTag) { 1088 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 1089 1090 auto equal = data[pos .. $].indexOf("=\""); 1091 if(equal != -1 && equal < closeTag) { 1092 // this MIGHT be ok, soldier on 1093 } else { 1094 // definitely no good, this must be a (horribly distorted) text node 1095 pos++; // skip the < we're on - don't want text node to end prematurely 1096 auto node = readTextNode(); 1097 node.contents = "<" ~ node.contents; // put this back 1098 return Ele(0, node, null); 1099 } 1100 } 1101 } 1102 1103 string tagName = readTagName(); 1104 AttributesHolder attributes; 1105 1106 Ele addTag(bool selfClosed) { 1107 if(selfClosed) 1108 pos++; 1109 else { 1110 if(!strict) 1111 if(tagName.isInArray(selfClosedElements)) 1112 // these are de-facto self closed 1113 selfClosed = true; 1114 } 1115 1116 import std.algorithm.comparison; 1117 1118 if(strict) { 1119 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - data.contextToKeep) .. min(data.length, pos + data.contextToKeep)])); 1120 } else { 1121 // if we got here, it's probably because a slash was in an 1122 // unquoted attribute - don't trust the selfClosed value 1123 if(!selfClosed) 1124 selfClosed = tagName.isInArray(selfClosedElements); 1125 1126 while(pos < data.length && data[pos] != '>') 1127 pos++; 1128 1129 if(pos >= data.length) { 1130 // the tag never closed 1131 assert(data.length != 0); 1132 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 1133 } 1134 } 1135 1136 auto whereThisTagStarted = pos; // for better error messages 1137 1138 pos++; 1139 1140 auto e = createElement(tagName); 1141 e.attributes = attributes; 1142 version(dom_node_indexes) { 1143 if(e.dataset.nodeIndex.length == 0) 1144 e.dataset.nodeIndex = to!string(&(e.attributes)); 1145 } 1146 e.selfClosed = selfClosed; 1147 e.parseAttributes(); 1148 1149 // might temporarily set root to the first element we encounter, 1150 // then the final root element assignment will be at the end of the parse, 1151 // when the recursive work is complete. 1152 if(this.root is null) 1153 this.root = e; 1154 this.processTagOpen(e); 1155 scope(exit) 1156 this.processTagClose(e); 1157 1158 1159 // HACK to handle script and style as a raw data section as it is in HTML browsers 1160 if(!pureXmlMode && tagName.isInArray(rawSourceElements)) { 1161 if(!selfClosed) { 1162 string closer = "</" ~ tagName ~ ">"; 1163 ptrdiff_t ending; 1164 if(pos >= data.length) 1165 ending = -1; 1166 else 1167 ending = indexOf(data[pos..$], closer); 1168 1169 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 1170 /* 1171 if(loose && ending == -1 && pos < data.length) 1172 ending = indexOf(data[pos..$], closer.toUpper()); 1173 */ 1174 if(ending == -1) { 1175 if(strict) 1176 throw new Exception("tag " ~ tagName ~ " never closed"); 1177 else { 1178 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 1179 if(pos < data.length) { 1180 e = new TextNode(this, data[pos .. $]); 1181 pos = data.length; 1182 } 1183 } 1184 } else { 1185 ending += pos; 1186 e.innerRawSource = data[pos..ending]; 1187 pos = ending + closer.length; 1188 } 1189 } 1190 return Ele(0, e, null); 1191 } 1192 1193 bool closed = selfClosed; 1194 1195 void considerHtmlNonNestableElementHack(Element n) { 1196 assert(!strict); 1197 if(!canNestElementsInHtml(e.tagName, n.tagName)) { 1198 // html lets you write <p> para 1 <p> para 1 1199 // but in the dom tree, they should be siblings, not children. 1200 nonNestableHackRequired = true; 1201 } 1202 } 1203 1204 //writef("<%s>", tagName); 1205 while(!closed) { 1206 Ele n; 1207 if(strict) 1208 n = readElement(); 1209 else 1210 n = readElement(parentChain ~ tagName); 1211 1212 if(n.type == 4) return n; // the document is empty 1213 1214 if(n.type == 3 && n.element !is null) { 1215 // special node, append if possible 1216 if(e !is null) 1217 processNodeWhileParsing(e, n.element); 1218 else 1219 piecesBeforeRoot ~= n.element; 1220 } else if(n.type == 0) { 1221 if(!strict) 1222 considerHtmlNonNestableElementHack(n.element); 1223 processNodeWhileParsing(e, n.element); 1224 } else if(n.type == 1) { 1225 bool found = false; 1226 if(n.payload != tagName) { 1227 if(strict) 1228 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 1229 else { 1230 sawImproperNesting = true; 1231 // this is so we don't drop several levels of awful markup 1232 if(n.element) { 1233 if(!strict) 1234 considerHtmlNonNestableElementHack(n.element); 1235 processNodeWhileParsing(e, n.element); 1236 n.element = null; 1237 } 1238 1239 // is the element open somewhere up the chain? 1240 foreach(i, parent; parentChain) 1241 if(parent == n.payload) { 1242 recentAutoClosedTags ~= tagName; 1243 // just rotating it so we don't inadvertently break stuff with vile crap 1244 if(recentAutoClosedTags.length > 4) 1245 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 1246 1247 n.element = e; 1248 return n; 1249 } 1250 1251 /+ 1252 // COMMENTED OUT BLOCK 1253 // dom.d used to replace improper close tags with their 1254 // text so they'd be visible in the output. the html 1255 // spec says to just ignore them, and browsers do indeed 1256 // seem to jsut ignore them, even checking back on IE6. 1257 // so i guess i was wrong to do this (tho tbh i find it kinda 1258 // useful to call out an obvious mistake in the source... 1259 // but for calling out obvious mistakes, just use strict 1260 // mode.) 1261 1262 // if not, this is a text node; we can't fix it up... 1263 1264 // If it's already in the tree somewhere, assume it is closed by algorithm 1265 // and we shouldn't output it - odds are the user just flipped a couple tags 1266 foreach(ele; e.tree) { 1267 if(ele.tagName == n.payload) { 1268 found = true; 1269 break; 1270 } 1271 } 1272 1273 foreach(ele; recentAutoClosedTags) { 1274 if(ele == n.payload) { 1275 found = true; 1276 break; 1277 } 1278 } 1279 1280 if(!found) // if not found in the tree though, it's probably just text 1281 processNodeWhileParsing(e, TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1282 1283 +/ 1284 } 1285 } else { 1286 if(n.element) { 1287 if(!strict) 1288 considerHtmlNonNestableElementHack(n.element); 1289 processNodeWhileParsing(e, n.element); 1290 } 1291 } 1292 1293 if(n.payload == tagName) // in strict mode, this is always true 1294 closed = true; 1295 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1296 } 1297 //writef("</%s>\n", tagName); 1298 return Ele(0, e, null); 1299 } 1300 1301 // if a tag was opened but not closed by end of file, we can arrive here 1302 if(!strict && pos >= data.length) 1303 return addTag(false); 1304 //else if(strict) assert(0); // should be caught before 1305 1306 switch(data[pos]) { 1307 default: assert(0); 1308 case '/': // self closing tag 1309 return addTag(true); 1310 case '>': 1311 return addTag(false); 1312 case ' ': 1313 case '\t': 1314 case '\n': 1315 case '\r': 1316 // there might be attributes... 1317 moreAttributes: 1318 eatWhitespace(); 1319 1320 // same deal as above the switch.... 1321 if(!strict && pos >= data.length) 1322 return addTag(false); 1323 1324 if(strict && pos >= data.length) 1325 throw new MarkupException("tag open, didn't find > before end of file"); 1326 1327 switch(data[pos]) { 1328 case '/': // self closing tag 1329 return addTag(true); 1330 case '>': // closed tag; open -- we now read the contents 1331 return addTag(false); 1332 default: // it is an attribute 1333 string attrName = readAttributeName(); 1334 string attrValue = attrName; 1335 1336 bool ateAny = eatWhitespace(); 1337 // the spec allows this too, sigh https://www.w3.org/TR/REC-xml/#NT-Eq 1338 //if(strict && ateAny) 1339 //throw new MarkupException("inappropriate whitespace after attribute name"); 1340 1341 if(pos >= data.length) { 1342 if(strict) 1343 assert(0, "this should have thrown in readAttributeName"); 1344 else { 1345 data ~= ">"; 1346 goto blankValue; 1347 } 1348 } 1349 if(data[pos] == '=') { 1350 pos++; 1351 1352 ateAny = eatWhitespace(); 1353 // the spec actually allows this! 1354 //if(strict && ateAny) 1355 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1356 1357 attrValue = readAttributeValue(); 1358 1359 eatWhitespace(); 1360 } 1361 1362 blankValue: 1363 1364 if(strict && attrName in attributes) 1365 throw new MarkupException("Repeated attribute: " ~ attrName); 1366 1367 if(attrName.strip().length) 1368 attributes[attrName] = attrValue; 1369 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1370 1371 if(!strict && pos < data.length && data[pos] == '<') { 1372 // this is the broken tag that doesn't have a > at the end 1373 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1374 // let's insert one as a hack 1375 goto case '>'; 1376 } 1377 1378 goto moreAttributes; 1379 } 1380 } 1381 } 1382 1383 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1384 //assert(0); 1385 } 1386 1387 eatWhitespace(); 1388 Ele r; 1389 do { 1390 r = readElement(); // there SHOULD only be one element... 1391 1392 if(r.type == 3 && r.element !is null) 1393 piecesBeforeRoot ~= r.element; 1394 1395 if(r.type == 4) 1396 break; // the document is completely empty... 1397 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1398 1399 root = r.element; 1400 if(root !is null) 1401 root.parent_ = this; 1402 1403 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1404 while(r.type != 4) { 1405 r = readElement(); 1406 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1407 if(r.element !is null) 1408 piecesAfterRoot ~= r.element; 1409 } 1410 } 1411 1412 if(root is null) 1413 { 1414 if(strict) 1415 assert(0, "empty document should be impossible in strict mode"); 1416 else 1417 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1418 } 1419 1420 if(nonNestableHackRequired) { 1421 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1422 1423 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1424 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1425 1426 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1427 // Kind of inefficient because we can't detect when we recurse back out of a node. 1428 Element[Element] insertLocations; 1429 auto iterator = root.tree; 1430 foreach(ele; iterator) { 1431 if(ele.parentNode is null) 1432 continue; 1433 1434 if(!canNestElementsInHtml(ele.parentNode.tagName, ele.tagName)) { 1435 auto shouldBePreviousSibling = ele.parentNode; 1436 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1437 if (auto p = holder in insertLocations) { 1438 shouldBePreviousSibling = *p; 1439 assert(shouldBePreviousSibling.parentNode is holder); 1440 } 1441 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1442 insertLocations[holder] = ele; 1443 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1444 } 1445 } 1446 } 1447 } 1448 1449 /* end massive parse function */ 1450 1451 /// Gets the <title> element's innerText, if one exists 1452 @property string title() { 1453 bool doesItMatch(Element e) { 1454 return (e.tagName == "title"); 1455 } 1456 1457 auto e = findFirst(&doesItMatch); 1458 if(e) 1459 return e.innerText(); 1460 return ""; 1461 } 1462 1463 /// Sets the title of the page, creating a <title> element if needed. 1464 @property void title(string t) { 1465 bool doesItMatch(Element e) { 1466 return (e.tagName == "title"); 1467 } 1468 1469 auto e = findFirst(&doesItMatch); 1470 1471 if(!e) { 1472 e = createElement("title"); 1473 auto heads = getElementsByTagName("head"); 1474 if(heads.length) 1475 heads[0].appendChild(e); 1476 } 1477 1478 if(e) 1479 e.innerText = t; 1480 } 1481 1482 // FIXME: would it work to alias root this; ???? might be a good idea 1483 /// These functions all forward to the root element. See the documentation in the Element class. 1484 Element getElementById(string id) { 1485 return root.getElementById(id); 1486 } 1487 1488 /// ditto 1489 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1490 if( is(SomeElementType : Element)) 1491 out(ret) { assert(ret !is null); } 1492 do { 1493 return root.requireElementById!(SomeElementType)(id, file, line); 1494 } 1495 1496 /// ditto 1497 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1498 if( is(SomeElementType : Element)) 1499 out(ret) { assert(ret !is null); } 1500 do { 1501 auto e = cast(SomeElementType) querySelector(selector); 1502 if(e is null) 1503 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1504 return e; 1505 } 1506 1507 /// ditto 1508 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1509 if(is(SomeElementType : Element)) 1510 { 1511 auto e = cast(SomeElementType) querySelector(selector); 1512 return MaybeNullElement!SomeElementType(e); 1513 } 1514 1515 /// ditto 1516 @scriptable 1517 Element querySelector(string selector) { 1518 // see comment below on Document.querySelectorAll 1519 auto s = Selector(selector);//, !loose); 1520 foreach(ref comp; s.components) 1521 if(comp.parts.length && comp.parts[0].separation == 0) 1522 comp.parts[0].separation = -1; 1523 foreach(e; s.getMatchingElementsLazy(this.root)) 1524 return e; 1525 return null; 1526 1527 } 1528 1529 /// ditto 1530 @scriptable 1531 Element[] querySelectorAll(string selector) { 1532 // In standards-compliant code, the document is slightly magical 1533 // in that it is a pseudoelement at top level. It should actually 1534 // match the root as one of its children. 1535 // 1536 // In versions of dom.d before Dec 29 2019, this worked because 1537 // querySelectorAll was willing to return itself. With that bug fix 1538 // (search "arbitrary id asduiwh" in this file for associated unittest) 1539 // this would have failed. Hence adding back the root if it matches the 1540 // selector itself. 1541 // 1542 // I'd love to do this better later. 1543 1544 auto s = Selector(selector);//, !loose); 1545 foreach(ref comp; s.components) 1546 if(comp.parts.length && comp.parts[0].separation == 0) 1547 comp.parts[0].separation = -1; 1548 return s.getMatchingElements(this.root, null); 1549 } 1550 1551 /// ditto 1552 deprecated("use querySelectorAll instead") 1553 Element[] getElementsBySelector(string selector) { 1554 return root.getElementsBySelector(selector); 1555 } 1556 1557 /// ditto 1558 @scriptable 1559 Element[] getElementsByTagName(string tag) { 1560 return root.getElementsByTagName(tag); 1561 } 1562 1563 /// ditto 1564 @scriptable 1565 Element[] getElementsByClassName(string tag) { 1566 return root.getElementsByClassName(tag); 1567 } 1568 1569 /** FIXME: btw, this could just be a lazy range...... */ 1570 Element getFirstElementByTagName(string tag) { 1571 if(loose) 1572 tag = tag.toLower(); 1573 bool doesItMatch(Element e) { 1574 return e.tagName == tag; 1575 } 1576 return findFirst(&doesItMatch); 1577 } 1578 1579 /++ 1580 This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body used to be a keyword in D.) 1581 1582 History: 1583 `body` alias added February 26, 2024 1584 +/ 1585 Element mainBody() { 1586 return getFirstElementByTagName("body"); 1587 } 1588 1589 /// ditto 1590 alias body = mainBody; 1591 1592 /// this uses a weird thing... it's [name=] if no colon and 1593 /// [property=] if colon 1594 string getMeta(string name) { 1595 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1596 auto e = querySelector("head meta["~thing~"="~name~"]"); 1597 if(e is null) 1598 return null; 1599 return e.content; 1600 } 1601 1602 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1603 void setMeta(string name, string value) { 1604 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1605 auto e = querySelector("head meta["~thing~"="~name~"]"); 1606 if(e is null) { 1607 e = requireSelector("head").addChild("meta"); 1608 e.setAttribute(thing, name); 1609 } 1610 1611 e.content = value; 1612 } 1613 1614 ///. 1615 Form[] forms() { 1616 return cast(Form[]) getElementsByTagName("form"); 1617 } 1618 1619 ///. 1620 Form createForm() 1621 out(ret) { 1622 assert(ret !is null); 1623 } 1624 do { 1625 return cast(Form) createElement("form"); 1626 } 1627 1628 ///. 1629 Element createElement(string name) { 1630 if(loose) 1631 name = name.toLower(); 1632 1633 auto e = Element.make(name, null, null, selfClosedElements); 1634 1635 return e; 1636 1637 // return new Element(this, name, null, selfClosed); 1638 } 1639 1640 ///. 1641 Element createFragment() { 1642 return new DocumentFragment(this); 1643 } 1644 1645 ///. 1646 Element createTextNode(string content) { 1647 return new TextNode(this, content); 1648 } 1649 1650 1651 ///. 1652 Element findFirst(bool delegate(Element) doesItMatch) { 1653 if(root is null) 1654 return null; 1655 Element result; 1656 1657 bool goThroughElement(Element e) { 1658 if(doesItMatch(e)) { 1659 result = e; 1660 return true; 1661 } 1662 1663 foreach(child; e.children) { 1664 if(goThroughElement(child)) 1665 return true; 1666 } 1667 1668 return false; 1669 } 1670 1671 goThroughElement(root); 1672 1673 return result; 1674 } 1675 1676 ///. 1677 void clear() { 1678 root = null; 1679 loose = false; 1680 } 1681 1682 private string _prolog = "<!DOCTYPE html>\n"; 1683 private bool prologWasSet = false; // set to true if the user changed it 1684 1685 /++ 1686 Returns or sets the string before the root element. This is, for example, 1687 `<!DOCTYPE html>\n` or similar. 1688 +/ 1689 @property string prolog() const { 1690 // if the user explicitly changed it, do what they want 1691 // or if we didn't keep/find stuff from the document itself, 1692 // we'll use the builtin one as a default. 1693 if(prologWasSet || piecesBeforeRoot.length == 0) 1694 return _prolog; 1695 1696 string p; 1697 foreach(e; piecesBeforeRoot) 1698 p ~= e.toString() ~ "\n"; 1699 return p; 1700 } 1701 1702 /// ditto 1703 void setProlog(string d) { 1704 _prolog = d; 1705 prologWasSet = true; 1706 } 1707 1708 /++ 1709 Returns the document as string form. Please note that if there is anything in [piecesAfterRoot], 1710 they are discarded. If you want to add them to the file, loop over that and append it yourself 1711 (but remember xml isn't supposed to have anything after the root element). 1712 +/ 1713 override string toString() const { 1714 return prolog ~ root.toString(); 1715 } 1716 1717 /++ 1718 Writes it out with whitespace for easier eyeball debugging 1719 1720 Do NOT use for anything other than eyeball debugging, 1721 because whitespace may be significant content in XML. 1722 +/ 1723 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1724 import std.string; 1725 string s = prolog.strip; 1726 1727 /* 1728 if(insertComments) s ~= "<!--"; 1729 s ~= "\n"; 1730 if(insertComments) s ~= "-->"; 1731 */ 1732 1733 s ~= root.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1734 foreach(a; piecesAfterRoot) 1735 s ~= a.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1736 return s; 1737 } 1738 1739 /// The root element, like `<html>`. Most the methods on Document forward to this object. 1740 Element root; 1741 1742 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1743 Element[] piecesBeforeRoot; 1744 1745 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1746 Element[] piecesAfterRoot; 1747 1748 ///. 1749 bool loose; 1750 1751 1752 1753 // what follows are for mutation events that you can observe 1754 void delegate(DomMutationEvent)[] eventObservers; 1755 1756 void dispatchMutationEvent(DomMutationEvent e) { 1757 foreach(o; eventObservers) 1758 o(e); 1759 } 1760 } 1761 1762 /++ 1763 Basic parsing of HTML tag soup 1764 1765 If you simply make a `new Document("some string")` or use [Document.fromUrl] to automatically 1766 download a page (that's function is shorthand for `new Document(arsd.http2.get(your_given_url).contentText)`), 1767 the Document parser will assume it is broken HTML. It will try to fix up things like charset messes, missing 1768 closing tags, flipped tags, inconsistent letter cases, and other forms of commonly found HTML on the web. 1769 1770 It isn't exactly the same as what a HTML5 web browser does in all cases, but it usually it, and where it 1771 disagrees, it is still usually good enough (but sometimes a bug). 1772 +/ 1773 unittest { 1774 auto document = new Document(`<html><body><p>hello <P>there`); 1775 // this will automatically try to normalize the html and fix up broken tags, etc 1776 // so notice how it added the missing closing tags here and made them all lower case 1777 assert(document.toString() == "<!DOCTYPE html>\n<html><body><p>hello </p><p>there</p></body></html>", document.toString()); 1778 } 1779 1780 /++ 1781 Stricter parsing of HTML 1782 1783 When you are writing the HTML yourself, you can remove most ambiguity by making it throw exceptions instead 1784 of trying to automatically fix up things basic parsing tries to do. Using strict mode accomplishes this. 1785 1786 This will help guarantee that you have well-formed HTML, which means it is going to parse a lot more reliably 1787 by all users - browsers, dom.d, other libraries, all behave better with well-formed input... people too! 1788 1789 (note it is not a full *validator*, just a well-formedness checker. Full validation is a lot more work for very 1790 little benefit in my experience, so I stopped here.) 1791 +/ 1792 unittest { 1793 try { 1794 auto document = new Document(`<html><body><p>hello <P>there`, true, true); // turns on strict and case sensitive mode to ctor 1795 assert(0); // never reached, the constructor will throw because strict mode is turned on 1796 } catch(Exception e) { 1797 1798 } 1799 1800 // you can also create the object first, then use the [parseStrict] method 1801 auto document = new Document; 1802 document.parseStrict(`<foo></foo>`); // this is invalid html - no such foo tag - but it is well-formed, since it is opened and closed properly, so it passes 1803 1804 } 1805 1806 /++ 1807 Custom HTML extensions 1808 1809 dom.d is a custom HTML parser, which means you can add custom HTML extensions to it too. It normally reads 1810 and discards things like ASP style `<% ... %>` code as well as XML processing instruction / PHP style embeds `<? ... ?>` 1811 but you can keep this data if you call a function to opt into it in before parsing. 1812 1813 Additionally, you can add special tags to be read like `<script>` to preserve its insides for future processing 1814 via the `.innerRawSource` member. 1815 +/ 1816 unittest { 1817 auto document = new Document; // construct an empty thing first 1818 document.enableAddingSpecialTagsToDom(); // add the special tags like <% ... %> etc 1819 document.rawSourceElements ~= "embedded-plaintext"; // tell it we want a custom 1820 1821 document.parseStrict(`<html> 1822 <% some asp code %> 1823 <script>embedded && javascript</script> 1824 <embedded-plaintext>my <custom> plaintext & stuff</embedded-plaintext> 1825 </html>`); 1826 1827 // please note that if we did `document.toString()` right now, the original source - almost your same 1828 // string you passed to parseStrict - would be spit back out. Meaning the embedded-plaintext still has its 1829 // special text inside it. Another parser won't understand how to use this! So if you want to pass this 1830 // document somewhere else, you need to do some transformations. 1831 // 1832 // This differs from cases like CDATA sections, which dom.d will automatically convert into plain html entities 1833 // on the output that can be read by anyone. 1834 1835 assert(document.root.tagName == "html"); // the root element is normal 1836 1837 int foundCount; 1838 // now let's loop through the whole tree 1839 foreach(element; document.root.tree) { 1840 // the asp thing will be in 1841 if(auto asp = cast(AspCode) element) { 1842 // you use the `asp.source` member to get the code for these 1843 assert(asp.source == "% some asp code %"); 1844 foundCount++; 1845 } else if(element.tagName == "script") { 1846 // and for raw source elements - script, style, or the ones you add, 1847 // you use the innerHTML method to get the code inside 1848 assert(element.innerHTML == "embedded && javascript"); 1849 foundCount++; 1850 } else if(element.tagName == "embedded-plaintext") { 1851 // and innerHTML again 1852 assert(element.innerHTML == "my <custom> plaintext & stuff"); 1853 foundCount++; 1854 } 1855 1856 } 1857 1858 assert(foundCount == 3); 1859 1860 // writeln(document.toString()); 1861 } 1862 1863 // FIXME: <textarea> contents are treated kinda special in html5 as well... 1864 1865 /++ 1866 Demoing CDATA, entities, and non-ascii characters. 1867 1868 The previous example mentioned CDATA, let's show you what that does too. These are all read in as plain strings accessible in the DOM - there is no CDATA, no entities once you get inside the object model - but when you convert back into a string, it will normalize them in a particular way. 1869 1870 This is not exactly standards compliant completely in and out thanks to it doing some transformations... but I find it more useful - it reads the data in consistently and writes it out consistently, both in ways that work well for interop. Take a look: 1871 +/ 1872 unittest { 1873 auto document = new Document(`<html> 1874 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1875 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1876 <p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p> 1877 </html>`, true, true); // strict mode turned on 1878 1879 // Inside the object model, things are simplified to D strings. 1880 auto paragraphs = document.querySelectorAll("p"); 1881 // no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string 1882 assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output."); 1883 // but note on the second paragraph, the entity has been converted to the appropriate *character* in the object 1884 assert(paragraphs[1].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output."); 1885 // and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string: 1886 assert(paragraphs[2].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too."); 1887 // and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing. 1888 assert(paragraphs[2].childNodes.length == 1 && paragraphs[2].childNodes[0].nodeType == NodeType.Text); 1889 1890 // And now, in the output string, we can see they are normalized thusly: 1891 assert(document.toString() == "<!DOCTYPE html>\n<html> 1892 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1893 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1894 <p>xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.</p> 1895 </html>"); 1896 } 1897 1898 /++ 1899 Streaming parsing 1900 1901 dom.d normally takes a big string and returns a big DOM object tree - hence its name. This is usually the simplest 1902 code to read and write, so I prefer to stick to that, but if you wanna jump through a few hoops, you can still make 1903 dom.d work with streams. 1904 1905 It is awkward - again, dom.d's whole design is based on building the dom tree, but you can do it if you're willing to 1906 subclass a little and trust the garbage collector. Here's how. 1907 +/ 1908 unittest { 1909 bool encountered; 1910 class StreamDocument : Document { 1911 // the normal behavior for this function is to `parent.appendChild(child)` 1912 // but we can override to read it as it is processed and not append it 1913 override void processNodeWhileParsing(Element parent, Element child) { 1914 if(child.tagName == "bar") 1915 encountered = true; 1916 // note that each element's object is created but then discarded as garbage. 1917 // the GC will take care of it, even with a large document, whereas the normal 1918 // object tree could become quite large. 1919 } 1920 1921 this() { 1922 super("<foo><bar></bar></foo>"); 1923 } 1924 } 1925 1926 auto test = new StreamDocument(); 1927 assert(encountered); // it should have been seen 1928 assert(test.querySelector("bar") is null); // but not appended to the dom node, since we didn't append it 1929 } 1930 1931 /++ 1932 Basic parsing of XML. 1933 1934 dom.d is not technically a standards-compliant xml parser and doesn't implement all xml features, 1935 but its stricter parse options together with turning off HTML's special tag handling (e.g. treating 1936 `<script>` and `<style>` the same as any other tag) gets close enough to work fine for a great many 1937 use cases. 1938 1939 For more information, see [XmlDocument]. 1940 +/ 1941 unittest { 1942 auto xml = new XmlDocument(`<my-stuff>hello</my-stuff>`); 1943 } 1944 1945 bool canNestElementsInHtml(string parentTagName, string childTagName) { 1946 switch(parentTagName) { 1947 case "p", "h1", "h2", "h3", "h4", "h5", "h6": 1948 // only should include "phrasing content" 1949 switch(childTagName) { 1950 case "p", "dl", "dt", "dd", "h1", "h2", "h3", "h4", "h5", "h6": 1951 return false; 1952 default: return true; 1953 } 1954 case "dt", "dd": 1955 switch(childTagName) { 1956 case "dd", "dt": 1957 return false; 1958 default: return true; 1959 } 1960 default: 1961 return true; 1962 } 1963 } 1964 1965 interface DomParent { 1966 inout(Document) asDocument() inout; 1967 inout(Element) asElement() inout; 1968 } 1969 1970 /++ 1971 This represents almost everything in the DOM and offers a lot of inspection and manipulation functions. Element, or its subclasses, are what makes the dom tree. 1972 +/ 1973 /// Group: core_functionality 1974 class Element : DomParent { 1975 inout(Document) asDocument() inout { return null; } 1976 inout(Element) asElement() inout { return this; } 1977 1978 /// Returns a collection of elements by selector. 1979 /// See: [Document.opIndex] 1980 ElementCollection opIndex(string selector) { 1981 auto e = ElementCollection(this); 1982 return e[selector]; 1983 } 1984 1985 /++ 1986 Returns the child node with the particular index. 1987 1988 Be aware that child nodes include text nodes, including 1989 whitespace-only nodes. 1990 +/ 1991 Element opIndex(size_t index) { 1992 if(index >= children.length) 1993 return null; 1994 return this.children[index]; 1995 } 1996 1997 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1998 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1999 if( 2000 is(SomeElementType : Element) 2001 ) 2002 out(ret) { 2003 assert(ret !is null); 2004 } 2005 do { 2006 auto e = cast(SomeElementType) getElementById(id); 2007 if(e is null) 2008 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 2009 return e; 2010 } 2011 2012 /// ditto but with selectors instead of ids 2013 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2014 if( 2015 is(SomeElementType : Element) 2016 ) 2017 out(ret) { 2018 assert(ret !is null); 2019 } 2020 do { 2021 auto e = cast(SomeElementType) querySelector(selector); 2022 if(e is null) 2023 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 2024 return e; 2025 } 2026 2027 2028 /++ 2029 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 2030 +/ 2031 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2032 if(is(SomeElementType : Element)) 2033 { 2034 auto e = cast(SomeElementType) querySelector(selector); 2035 return MaybeNullElement!SomeElementType(e); 2036 } 2037 2038 2039 2040 /// get all the classes on this element 2041 @property string[] classes() const { 2042 // FIXME: remove blank names 2043 auto cs = split(className, " "); 2044 foreach(ref c; cs) 2045 c = c.strip(); 2046 return cs; 2047 } 2048 2049 /++ 2050 The object [classList] returns. 2051 +/ 2052 static struct ClassListHelper { 2053 Element this_; 2054 this(inout(Element) this_) inout { 2055 this.this_ = this_; 2056 } 2057 2058 /// 2059 bool contains(string cn) const { 2060 return this_.hasClass(cn); 2061 } 2062 2063 /// 2064 void add(string cn) { 2065 this_.addClass(cn); 2066 } 2067 2068 /// 2069 void remove(string cn) { 2070 this_.removeClass(cn); 2071 } 2072 2073 /// 2074 void toggle(string cn) { 2075 if(contains(cn)) 2076 remove(cn); 2077 else 2078 add(cn); 2079 } 2080 2081 // this thing supposed to be iterable in javascript but idk how i want to do it in D. meh 2082 /+ 2083 string[] opIndex() const { 2084 return this_.classes; 2085 } 2086 +/ 2087 } 2088 2089 /++ 2090 Returns a helper object to work with classes, just like javascript. 2091 2092 History: 2093 Added August 25, 2022 2094 +/ 2095 @property inout(ClassListHelper) classList() inout { 2096 return inout(ClassListHelper)(this); 2097 } 2098 // FIXME: classList is supposed to whitespace and duplicates when you use it. need to test. 2099 2100 unittest { 2101 Element element = Element.make("div"); 2102 element.classList.add("foo"); 2103 assert(element.classList.contains("foo")); 2104 element.classList.remove("foo"); 2105 assert(!element.classList.contains("foo")); 2106 element.classList.toggle("bar"); 2107 assert(element.classList.contains("bar")); 2108 } 2109 2110 /// ditto 2111 alias classNames = classes; 2112 2113 2114 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 2115 @scriptable 2116 Element addClass(string c) { 2117 if(hasClass(c)) 2118 return this; // don't add it twice 2119 2120 string cn = getAttribute("class"); 2121 if(cn.length == 0) { 2122 setAttribute("class", c); 2123 return this; 2124 } else { 2125 setAttribute("class", cn ~ " " ~ c); 2126 } 2127 2128 return this; 2129 } 2130 2131 /// Removes a particular class name. 2132 @scriptable 2133 Element removeClass(string c) { 2134 if(!hasClass(c)) 2135 return this; 2136 string n; 2137 foreach(name; classes) { 2138 if(c == name) 2139 continue; // cut it out 2140 if(n.length) 2141 n ~= " "; 2142 n ~= name; 2143 } 2144 2145 className = n.strip(); 2146 2147 return this; 2148 } 2149 2150 /// Returns whether the given class appears in this element. 2151 bool hasClass(string c) const { 2152 string cn = className; 2153 2154 auto idx = cn.indexOf(c); 2155 if(idx == -1) 2156 return false; 2157 2158 foreach(cla; cn.split(" ")) 2159 if(cla.strip == c) 2160 return true; 2161 return false; 2162 2163 /* 2164 int rightSide = idx + c.length; 2165 2166 bool checkRight() { 2167 if(rightSide == cn.length) 2168 return true; // it's the only class 2169 else if(iswhite(cn[rightSide])) 2170 return true; 2171 return false; // this is a substring of something else.. 2172 } 2173 2174 if(idx == 0) { 2175 return checkRight(); 2176 } else { 2177 if(!iswhite(cn[idx - 1])) 2178 return false; // substring 2179 return checkRight(); 2180 } 2181 2182 assert(0); 2183 */ 2184 } 2185 2186 2187 /* ******************************* 2188 DOM Mutation 2189 *********************************/ 2190 /++ 2191 Family of convenience functions to quickly add a tag with some text or 2192 other relevant info (for example, it's a src for an <img> element 2193 instead of inner text). They forward to [Element.make] then calls [appendChild]. 2194 2195 --- 2196 div.addChild("span", "hello there"); 2197 div.addChild("div", Html("<p>children of the div</p>")); 2198 --- 2199 +/ 2200 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 2201 in { 2202 assert(tagName !is null); 2203 } 2204 out(e) { 2205 //assert(e.parentNode is this); 2206 //assert(e.parentDocument is this.parentDocument); 2207 } 2208 do { 2209 auto e = Element.make(tagName, childInfo, childInfo2); 2210 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 2211 // return the parent. That will break existing code though. 2212 return appendChild(e); 2213 } 2214 2215 /// ditto 2216 Element addChild(Element e) { 2217 return this.appendChild(e); 2218 } 2219 2220 /// ditto 2221 Element addChild(string tagName, Element firstChild, string info2 = null) 2222 in { 2223 assert(firstChild !is null); 2224 } 2225 out(ret) { 2226 assert(ret !is null); 2227 assert(ret.parentNode is this); 2228 assert(firstChild.parentNode is ret); 2229 2230 assert(ret.parentDocument is this.parentDocument); 2231 //assert(firstChild.parentDocument is this.parentDocument); 2232 } 2233 do { 2234 auto e = Element.make(tagName, "", info2); 2235 e.appendChild(firstChild); 2236 this.appendChild(e); 2237 return e; 2238 } 2239 2240 /// ditto 2241 Element addChild(string tagName, in Html innerHtml, string info2 = null) 2242 in { 2243 } 2244 out(ret) { 2245 assert(ret !is null); 2246 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 2247 assert(ret.parentDocument is this.parentDocument); 2248 } 2249 do { 2250 auto e = Element.make(tagName, "", info2); 2251 this.appendChild(e); 2252 e.innerHTML = innerHtml.source; 2253 return e; 2254 } 2255 2256 2257 /// Another convenience function. Adds a child directly after the current one, returning 2258 /// the new child. 2259 /// 2260 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 2261 /// See_Also: [addChild] 2262 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 2263 in { 2264 assert(tagName !is null); 2265 assert(parentNode !is null); 2266 } 2267 out(e) { 2268 assert(e.parentNode is this.parentNode); 2269 assert(e.parentDocument is this.parentDocument); 2270 } 2271 do { 2272 auto e = Element.make(tagName, childInfo, childInfo2); 2273 return parentNode.insertAfter(this, e); 2274 } 2275 2276 /// ditto 2277 Element addSibling(Element e) { 2278 return parentNode.insertAfter(this, e); 2279 } 2280 2281 /// Convenience function to append text intermixed with other children. 2282 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 2283 /// or div.addChildren("Hello, ", user.name, "!"); 2284 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 2285 void addChildren(T...)(T t) { 2286 foreach(item; t) { 2287 static if(is(item : Element)) 2288 appendChild(item); 2289 else static if (is(isSomeString!(item))) 2290 appendText(to!string(item)); 2291 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 2292 } 2293 } 2294 2295 /// Appends the list of children to this element. 2296 void appendChildren(Element[] children) { 2297 foreach(ele; children) 2298 appendChild(ele); 2299 } 2300 2301 /// Removes this element form its current parent and appends it to the given `newParent`. 2302 void reparent(Element newParent) 2303 in { 2304 assert(newParent !is null); 2305 assert(parentNode !is null); 2306 } 2307 out { 2308 assert(this.parentNode is newParent); 2309 //assert(isInArray(this, newParent.children)); 2310 } 2311 do { 2312 parentNode.removeChild(this); 2313 newParent.appendChild(this); 2314 } 2315 2316 /** 2317 Strips this tag out of the document, putting its inner html 2318 as children of the parent. 2319 2320 For example, given: `<p>hello <b>there</b></p>`, if you 2321 call `stripOut` on the `b` element, you'll be left with 2322 `<p>hello there<p>`. 2323 2324 The idea here is to make it easy to get rid of garbage 2325 markup you aren't interested in. 2326 */ 2327 void stripOut() 2328 in { 2329 assert(parentNode !is null); 2330 } 2331 out { 2332 assert(parentNode is null); 2333 assert(children.length == 0); 2334 } 2335 do { 2336 foreach(c; children) 2337 c.parentNode = null; // remove the parent 2338 if(children.length) 2339 parentNode.replaceChild(this, this.children); 2340 else 2341 parentNode.removeChild(this); 2342 this.children.length = 0; // we reparented them all above 2343 } 2344 2345 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 2346 /// if the element already isn't in a tree, it does nothing. 2347 Element removeFromTree() 2348 in { 2349 2350 } 2351 out(var) { 2352 assert(this.parentNode is null); 2353 assert(var is this); 2354 } 2355 do { 2356 if(this.parentNode is null) 2357 return this; 2358 2359 this.parentNode.removeChild(this); 2360 2361 return this; 2362 } 2363 2364 /++ 2365 Wraps this element inside the given element. 2366 It's like `this.replaceWith(what); what.appendchild(this);` 2367 2368 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 2369 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 2370 +/ 2371 Element wrapIn(Element what) 2372 in { 2373 assert(what !is null); 2374 } 2375 out(ret) { 2376 assert(this.parentNode is what); 2377 assert(ret is what); 2378 } 2379 do { 2380 this.replaceWith(what); 2381 what.appendChild(this); 2382 2383 return what; 2384 } 2385 2386 /// Replaces this element with something else in the tree. 2387 Element replaceWith(Element e) 2388 in { 2389 assert(this.parentNode !is null); 2390 } 2391 do { 2392 e.removeFromTree(); 2393 this.parentNode.replaceChild(this, e); 2394 return e; 2395 } 2396 2397 /** 2398 Fetches the first consecutive text nodes concatenated together. 2399 2400 2401 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 2402 2403 See_also: [directText], [innerText] 2404 */ 2405 string firstInnerText() const { 2406 string s; 2407 foreach(child; children) { 2408 if(child.nodeType != NodeType.Text) 2409 break; 2410 2411 s ~= child.nodeValue(); 2412 } 2413 return s; 2414 } 2415 2416 2417 /** 2418 Returns the text directly under this element. 2419 2420 2421 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 2422 past child tags. So, `<example>some <b>bold</b> text</example>` 2423 will return `some text` because it only gets the text, skipping non-text children. 2424 2425 See_also: [firstInnerText], [innerText] 2426 */ 2427 @property string directText() { 2428 string ret; 2429 foreach(e; children) { 2430 if(e.nodeType == NodeType.Text) 2431 ret ~= e.nodeValue(); 2432 } 2433 2434 return ret; 2435 } 2436 2437 /** 2438 Sets the direct text, without modifying other child nodes. 2439 2440 2441 Unlike [innerText], this does *not* remove existing elements in the element. 2442 2443 It only replaces the first text node it sees. 2444 2445 If there are no text nodes, it calls [appendText]. 2446 2447 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 2448 */ 2449 @property void directText(string text) { 2450 foreach(e; children) { 2451 if(e.nodeType == NodeType.Text) { 2452 auto it = cast(TextNode) e; 2453 it.contents = text; 2454 return; 2455 } 2456 } 2457 2458 appendText(text); 2459 } 2460 2461 // do nothing, this is primarily a virtual hook 2462 // for links and forms 2463 void setValue(string field, string value) { } 2464 void setValue(string field, string[] value) { } 2465 2466 2467 // this is a thing so i can remove observer support if it gets slow 2468 // I have not implemented all these yet 2469 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 2470 if(parentDocument is null) return; 2471 DomMutationEvent me; 2472 me.operation = operation; 2473 me.target = this; 2474 me.relatedString = s1; 2475 me.relatedString2 = s2; 2476 me.related = r; 2477 me.related2 = r2; 2478 parentDocument.dispatchMutationEvent(me); 2479 } 2480 2481 // putting all the members up front 2482 2483 // this ought to be private. don't use it directly. 2484 Element[] children; 2485 2486 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 2487 string tagName; 2488 2489 /++ 2490 This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 2491 2492 History: 2493 `AttributesHolder` replaced `string[string]` on August 22, 2024 2494 +/ 2495 AttributesHolder attributes; 2496 2497 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 2498 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 2499 private bool selfClosed; 2500 2501 private DomParent parent_; 2502 2503 /// Get the parent Document object that contains this element. 2504 /// It may be null, so remember to check for that. 2505 @property inout(Document) parentDocument() inout { 2506 if(this.parent_ is null) 2507 return null; 2508 auto p = cast() this.parent_.asElement; 2509 auto prev = cast() this; 2510 while(p) { 2511 prev = p; 2512 if(p.parent_ is null) 2513 return null; 2514 p = cast() p.parent_.asElement; 2515 } 2516 return cast(inout) prev.parent_.asDocument; 2517 } 2518 2519 /*deprecated*/ @property void parentDocument(Document doc) { 2520 parent_ = doc; 2521 } 2522 2523 /// Returns the parent node in the tree this element is attached to. 2524 inout(Element) parentNode() inout { 2525 if(parent_ is null) 2526 return null; 2527 2528 auto p = parent_.asElement; 2529 2530 if(cast(DocumentFragment) p) { 2531 if(p.parent_ is null) 2532 return null; 2533 else 2534 return p.parent_.asElement; 2535 } 2536 2537 return p; 2538 } 2539 2540 //protected 2541 Element parentNode(Element e) { 2542 parent_ = e; 2543 return e; 2544 } 2545 2546 // these are here for event handlers. Don't forget that this library never fires events. 2547 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 2548 2549 version(dom_with_events) { 2550 EventHandler[][string] bubblingEventHandlers; 2551 EventHandler[][string] capturingEventHandlers; 2552 EventHandler[string] defaultEventHandlers; 2553 2554 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 2555 if(event.length > 2 && event[0..2] == "on") 2556 event = event[2 .. $]; 2557 2558 if(useCapture) 2559 capturingEventHandlers[event] ~= handler; 2560 else 2561 bubblingEventHandlers[event] ~= handler; 2562 } 2563 } 2564 2565 2566 // and now methods 2567 2568 /++ 2569 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2570 2571 History: 2572 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2573 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2574 the parameter. 2575 See_Also: 2576 [addChild], [addSibling] 2577 +/ 2578 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2579 bool selfClosed = tagName.isInArray(selfClosedElements); 2580 2581 Element e; 2582 // want to create the right kind of object for the given tag... 2583 switch(tagName) { 2584 case "#text": 2585 e = new TextNode(null, childInfo); 2586 return e; 2587 // break; 2588 case "table": 2589 e = new Table(null); 2590 break; 2591 case "a": 2592 e = new Link(null); 2593 break; 2594 case "form": 2595 e = new Form(null); 2596 break; 2597 case "tr": 2598 e = new TableRow(null); 2599 break; 2600 case "td", "th": 2601 e = new TableCell(null, tagName); 2602 break; 2603 default: 2604 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2605 } 2606 2607 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2608 e.tagName = tagName; 2609 e.selfClosed = selfClosed; 2610 2611 if(childInfo !is null) 2612 switch(tagName) { 2613 /* html5 convenience tags */ 2614 case "audio": 2615 if(childInfo.length) 2616 e.addChild("source", childInfo); 2617 if(childInfo2 !is null) 2618 e.appendText(childInfo2); 2619 break; 2620 case "source": 2621 e.src = childInfo; 2622 if(childInfo2 !is null) 2623 e.type = childInfo2; 2624 break; 2625 /* regular html 4 stuff */ 2626 case "img": 2627 e.src = childInfo; 2628 if(childInfo2 !is null) 2629 e.alt = childInfo2; 2630 break; 2631 case "link": 2632 e.href = childInfo; 2633 if(childInfo2 !is null) 2634 e.rel = childInfo2; 2635 break; 2636 case "option": 2637 e.innerText = childInfo; 2638 if(childInfo2 !is null) 2639 e.value = childInfo2; 2640 break; 2641 case "input": 2642 e.type = "hidden"; 2643 e.name = childInfo; 2644 if(childInfo2 !is null) 2645 e.value = childInfo2; 2646 break; 2647 case "button": 2648 e.innerText = childInfo; 2649 if(childInfo2 !is null) 2650 e.type = childInfo2; 2651 break; 2652 case "a": 2653 e.innerText = childInfo; 2654 if(childInfo2 !is null) 2655 e.href = childInfo2; 2656 break; 2657 case "script": 2658 case "style": 2659 e.innerRawSource = childInfo; 2660 break; 2661 case "meta": 2662 e.name = childInfo; 2663 if(childInfo2 !is null) 2664 e.content = childInfo2; 2665 break; 2666 /* generically, assume we were passed text and perhaps class */ 2667 default: 2668 e.innerText = childInfo; 2669 if(childInfo2.length) 2670 e.className = childInfo2; 2671 } 2672 2673 return e; 2674 } 2675 2676 /// ditto 2677 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2678 // FIXME: childInfo2 is ignored when info1 is null 2679 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2680 m.innerHTML = innerHtml.source; 2681 return m; 2682 } 2683 2684 /// ditto 2685 static Element make(string tagName, Element child, string childInfo2 = null) { 2686 auto m = Element.make(tagName, cast(string) null, childInfo2); 2687 m.appendChild(child); 2688 return m; 2689 } 2690 2691 2692 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2693 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2694 tagName = _tagName; 2695 foreach(k, v; _attributes) 2696 attributes[k] = v; 2697 selfClosed = _selfClosed; 2698 2699 version(dom_node_indexes) 2700 this.dataset.nodeIndex = to!string(&(this.attributes)); 2701 2702 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2703 } 2704 2705 /++ 2706 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2707 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2708 2709 History: 2710 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2711 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2712 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2713 something here if you like. 2714 +/ 2715 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2716 tagName = _tagName; 2717 foreach(k, v; _attributes) 2718 attributes[k] = v; 2719 selfClosed = tagName.isInArray(selfClosedElements); 2720 2721 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2722 //children.length = 8; 2723 //children.length = 0; 2724 2725 version(dom_node_indexes) 2726 this.dataset.nodeIndex = to!string(&(this.attributes)); 2727 } 2728 2729 private this(Document _parentDocument) { 2730 version(dom_node_indexes) 2731 this.dataset.nodeIndex = to!string(&(this.attributes)); 2732 } 2733 2734 2735 /* ******************************* 2736 Navigating the DOM 2737 *********************************/ 2738 2739 /// Returns the first child of this element. If it has no children, returns null. 2740 /// Remember, text nodes are children too. 2741 @property Element firstChild() { 2742 return children.length ? children[0] : null; 2743 } 2744 2745 /// Returns the last child of the element, or null if it has no children. Remember, text nodes are children too. 2746 @property Element lastChild() { 2747 return children.length ? children[$ - 1] : null; 2748 } 2749 2750 // FIXME UNTESTED 2751 /// the next or previous element you would encounter if you were reading it in the source. May be a text node or other special non-tag object if you enabled them. 2752 Element nextInSource() { 2753 auto n = firstChild; 2754 if(n is null) 2755 n = nextSibling(); 2756 if(n is null) { 2757 auto p = this.parentNode; 2758 while(p !is null && n is null) { 2759 n = p.nextSibling; 2760 } 2761 } 2762 2763 return n; 2764 } 2765 2766 /// ditto 2767 Element previousInSource() { 2768 auto p = previousSibling; 2769 if(p is null) { 2770 auto par = parentNode; 2771 if(par) 2772 p = par.lastChild; 2773 if(p is null) 2774 p = par; 2775 } 2776 return p; 2777 } 2778 2779 /++ 2780 Returns the next or previous sibling that is not a text node. Please note: the behavior with comments is subject to change. Currently, it will return a comment or other nodes if it is in the tree (if you enabled it with [Document.enableAddingSpecialTagsToDom] or [Document.parseSawComment]) and not if you didn't, but the implementation will probably change at some point to skip them regardless. 2781 2782 Equivalent to [previousSibling]/[nextSibling]("*"). 2783 2784 Please note it may return `null`. 2785 +/ 2786 @property Element previousElementSibling() { 2787 return previousSibling("*"); 2788 } 2789 2790 /// ditto 2791 @property Element nextElementSibling() { 2792 return nextSibling("*"); 2793 } 2794 2795 /++ 2796 Returns the next or previous sibling matching the `tagName` filter. The default filter of `null` will return the first sibling it sees, even if it is a comment or text node, or anything else. A filter of `"*"` will match any tag with a name. Otherwise, the string must match the [tagName] of the sibling you want to find. 2797 +/ 2798 @property Element previousSibling(string tagName = null) { 2799 if(this.parentNode is null) 2800 return null; 2801 Element ps = null; 2802 foreach(e; this.parentNode.childNodes) { 2803 if(e is this) 2804 break; 2805 if(tagName == "*" && e.nodeType != NodeType.Text) { 2806 ps = e; 2807 } else if(tagName is null || e.tagName == tagName) 2808 ps = e; 2809 } 2810 2811 return ps; 2812 } 2813 2814 /// ditto 2815 @property Element nextSibling(string tagName = null) { 2816 if(this.parentNode is null) 2817 return null; 2818 Element ns = null; 2819 bool mightBe = false; 2820 foreach(e; this.parentNode.childNodes) { 2821 if(e is this) { 2822 mightBe = true; 2823 continue; 2824 } 2825 if(mightBe) { 2826 if(tagName == "*" && e.nodeType != NodeType.Text) { 2827 ns = e; 2828 break; 2829 } 2830 if(tagName is null || e.tagName == tagName) { 2831 ns = e; 2832 break; 2833 } 2834 } 2835 } 2836 2837 return ns; 2838 } 2839 2840 2841 /++ 2842 Gets the nearest node, going up the chain, with the given tagName 2843 May return null or throw. The type `T` will specify a subclass like 2844 [Form], [Table], or [Link], which it will cast for you when found. 2845 +/ 2846 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2847 if(tagName is null) { 2848 static if(is(T == Form)) 2849 tagName = "form"; 2850 else static if(is(T == Table)) 2851 tagName = "table"; 2852 else static if(is(T == Link)) 2853 tagName == "a"; 2854 } 2855 2856 auto par = this.parentNode; 2857 while(par !is null) { 2858 if(tagName is null || par.tagName == tagName) 2859 break; 2860 par = par.parentNode; 2861 } 2862 2863 static if(!is(T == Element)) { 2864 auto t = cast(T) par; 2865 if(t is null) 2866 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2867 } else 2868 auto t = par; 2869 2870 return t; 2871 } 2872 2873 /++ 2874 Searches this element and the tree of elements under it for one matching the given `id` attribute. 2875 +/ 2876 Element getElementById(string id) { 2877 // FIXME: I use this function a lot, and it's kinda slow 2878 // not terribly slow, but not great. 2879 foreach(e; tree) 2880 if(e.id == id) 2881 return e; 2882 return null; 2883 } 2884 2885 /++ 2886 Returns a child element that matches the given `selector`. 2887 2888 Note: you can give multiple selectors, separated by commas. 2889 It will return the first match it finds. 2890 2891 Tip: to use namespaces, escape the colon in the name: 2892 2893 --- 2894 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2895 --- 2896 +/ 2897 @scriptable 2898 Element querySelector(string selector) { 2899 Selector s = Selector(selector); 2900 2901 foreach(ref comp; s.components) 2902 if(comp.parts.length && comp.parts[0].separation > 0) { 2903 // this is illegal in standard dom, but i use it a lot 2904 // gonna insert a :scope thing 2905 2906 SelectorPart part; 2907 part.separation = -1; 2908 part.scopeElement = true; 2909 comp.parts = part ~ comp.parts; 2910 } 2911 2912 foreach(ele; tree) 2913 if(s.matchesElement(ele, this)) 2914 return ele; 2915 return null; 2916 } 2917 2918 /// If the element matches the given selector. Previously known as `matchesSelector`. 2919 @scriptable 2920 bool matches(string selector) { 2921 /+ 2922 bool caseSensitiveTags = true; 2923 if(parentDocument && parentDocument.loose) 2924 caseSensitiveTags = false; 2925 +/ 2926 2927 Selector s = Selector(selector); 2928 return s.matchesElement(this); 2929 } 2930 2931 /// Returns itself or the closest parent that matches the given selector, or null if none found 2932 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2933 @scriptable 2934 Element closest(string selector) { 2935 Element e = this; 2936 while(e !is null) { 2937 if(e.matches(selector)) 2938 return e; 2939 e = e.parentNode; 2940 } 2941 return null; 2942 } 2943 2944 /** 2945 Returns elements that match the given CSS selector 2946 2947 * -- all, default if nothing else is there 2948 2949 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2950 2951 It is all additive 2952 2953 OP 2954 2955 space = descendant 2956 > = direct descendant 2957 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2958 2959 [foo] Foo is present as an attribute 2960 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2961 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2962 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2963 2964 [item$=sdas] ends with 2965 [item^-sdsad] begins with 2966 2967 Quotes are optional here. 2968 2969 Pseudos: 2970 :first-child 2971 :last-child 2972 :link (same as a[href] for our purposes here) 2973 2974 2975 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2976 2977 2978 2979 This ONLY cares about elements. text, etc, are ignored 2980 2981 2982 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2983 2984 The name `getElementsBySelector` was the original name, written back before the name `querySelector` was standardized (this library is older than you might think!), but they do the same thing.. 2985 */ 2986 @scriptable 2987 Element[] querySelectorAll(string selector) { 2988 // FIXME: this function could probably use some performance attention 2989 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2990 2991 2992 bool caseSensitiveTags = true; 2993 if(parentDocument && parentDocument.loose) 2994 caseSensitiveTags = false; 2995 2996 Element[] ret; 2997 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2998 ret ~= sel.getElements(this, null); 2999 return ret; 3000 } 3001 3002 /// ditto 3003 alias getElementsBySelector = querySelectorAll; 3004 3005 /++ 3006 Returns child elements that have the given class name or tag name. 3007 3008 Please note the standard specifies this should return a live node list. This means, in Javascript for example, if you loop over the value returned by getElementsByTagName and getElementsByClassName and remove the elements, the length of the list will decrease. When I implemented this, I figured that was more trouble than it was worth and returned a plain array instead. By the time I had the infrastructure to make it simple, I didn't want to do the breaking change. 3009 3010 So these is incompatible with Javascript in the face of live dom mutation and will likely remain so. 3011 +/ 3012 Element[] getElementsByClassName(string cn) { 3013 // is this correct? 3014 return getElementsBySelector("." ~ cn); 3015 } 3016 3017 /// ditto 3018 Element[] getElementsByTagName(string tag) { 3019 if(parentDocument && parentDocument.loose) 3020 tag = tag.toLower(); 3021 Element[] ret; 3022 foreach(e; tree) 3023 if(e.tagName == tag || tag == "*") 3024 ret ~= e; 3025 return ret; 3026 } 3027 3028 3029 /* ******************************* 3030 Attributes 3031 *********************************/ 3032 3033 /** 3034 Gets the given attribute value, or null if the 3035 attribute is not set. 3036 3037 Note that the returned string is decoded, so it no longer contains any xml entities. 3038 */ 3039 @scriptable 3040 string getAttribute(string name) const { 3041 if(parentDocument && parentDocument.loose) 3042 name = name.toLower(); 3043 return attributes.get(name, null); 3044 } 3045 3046 /** 3047 Sets an attribute. Returns this for easy chaining 3048 */ 3049 @scriptable 3050 Element setAttribute(string name, string value) { 3051 if(parentDocument && parentDocument.loose) 3052 name = name.toLower(); 3053 3054 // I never use this shit legitimately and neither should you 3055 auto it = name.toLower(); 3056 if(it == "href" || it == "src") { 3057 auto v = value.strip().toLower(); 3058 if(v.startsWith("vbscript:")) 3059 value = value[9..$]; 3060 if(v.startsWith("javascript:")) 3061 value = value[11..$]; 3062 } 3063 3064 attributes[name] = value; 3065 3066 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 3067 3068 return this; 3069 } 3070 3071 /** 3072 Returns if the attribute exists. 3073 */ 3074 @scriptable 3075 bool hasAttribute(string name) { 3076 if(parentDocument && parentDocument.loose) 3077 name = name.toLower(); 3078 3079 if(name in attributes) 3080 return true; 3081 else 3082 return false; 3083 } 3084 3085 /** 3086 Removes the given attribute from the element. 3087 */ 3088 @scriptable 3089 Element removeAttribute(string name) 3090 out(ret) { 3091 assert(ret is this); 3092 } 3093 do { 3094 if(parentDocument && parentDocument.loose) 3095 name = name.toLower(); 3096 if(name in attributes) 3097 attributes.remove(name); 3098 3099 sendObserverEvent(DomMutationOperations.removeAttribute, name); 3100 return this; 3101 } 3102 3103 /** 3104 Gets or sets the class attribute's contents. Returns 3105 an empty string if it has no class. 3106 */ 3107 @property string className() const { 3108 auto c = getAttribute("class"); 3109 if(c is null) 3110 return ""; 3111 return c; 3112 } 3113 3114 /// ditto 3115 @property Element className(string c) { 3116 setAttribute("class", c); 3117 return this; 3118 } 3119 3120 /** 3121 Provides easy access to common HTML attributes, object style. 3122 3123 --- 3124 auto element = Element.make("a"); 3125 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 3126 string where = a.href; // same as a.getAttribute("href"); 3127 --- 3128 3129 */ 3130 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 3131 if(v !is null) 3132 setAttribute(name, v); 3133 return getAttribute(name); 3134 } 3135 3136 /** 3137 Old access to attributes. Use [attrs] instead. 3138 3139 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 3140 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 3141 3142 Instead, use element.attrs.attribute, element.attrs["attribute"], 3143 or element.getAttribute("attribute")/element.setAttribute("attribute"). 3144 */ 3145 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 3146 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 3147 } 3148 3149 /* 3150 // this would be nice for convenience, but it broke the getter above. 3151 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 3152 if(boolean) 3153 setAttribute(name, name); 3154 else 3155 removeAttribute(name); 3156 } 3157 */ 3158 3159 /** 3160 Returns the element's children. 3161 */ 3162 @property inout(Element[]) childNodes() inout { 3163 return children; 3164 } 3165 3166 /++ 3167 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 3168 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 3169 +/ 3170 @property DataSet dataset() { 3171 return DataSet(this); 3172 } 3173 3174 /++ 3175 Gives dot/opIndex access to attributes 3176 --- 3177 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 3178 --- 3179 +/ 3180 @property AttributeSet attrs() { 3181 return AttributeSet(this); 3182 } 3183 3184 /++ 3185 Provides both string and object style (like in Javascript) access to the style attribute. 3186 3187 --- 3188 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 3189 --- 3190 +/ 3191 @property ElementStyle style() { 3192 return ElementStyle(this); 3193 } 3194 3195 /++ 3196 This sets the style attribute with a string. 3197 +/ 3198 @property ElementStyle style(string s) { 3199 this.setAttribute("style", s); 3200 return this.style; 3201 } 3202 3203 private void parseAttributes(string[] whichOnes = null) { 3204 /+ 3205 if(whichOnes is null) 3206 whichOnes = attributes.keys; 3207 foreach(attr; whichOnes) { 3208 switch(attr) { 3209 case "id": 3210 3211 break; 3212 case "class": 3213 3214 break; 3215 case "style": 3216 3217 break; 3218 default: 3219 // we don't care about it 3220 } 3221 } 3222 +/ 3223 } 3224 3225 3226 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 3227 3228 // the next few methods are for implementing interactive kind of things 3229 private CssStyle _computedStyle; 3230 3231 /// Don't use this. It can try to parse out the style element but it isn't complete and if I get back to it, it won't be for a while. 3232 @property CssStyle computedStyle() { 3233 if(_computedStyle is null) { 3234 auto style = this.getAttribute("style"); 3235 /* we'll treat shitty old html attributes as css here */ 3236 if(this.hasAttribute("width")) 3237 style ~= "; width: " ~ this.attrs.width; 3238 if(this.hasAttribute("height")) 3239 style ~= "; height: " ~ this.attrs.height; 3240 if(this.hasAttribute("bgcolor")) 3241 style ~= "; background-color: " ~ this.attrs.bgcolor; 3242 if(this.tagName == "body" && this.hasAttribute("text")) 3243 style ~= "; color: " ~ this.attrs.text; 3244 if(this.hasAttribute("color")) 3245 style ~= "; color: " ~ this.attrs.color; 3246 /* done */ 3247 3248 3249 _computedStyle = computedStyleFactory(this); 3250 } 3251 return _computedStyle; 3252 } 3253 3254 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 3255 version(browser) { 3256 void* expansionHook; ///ditto 3257 int offsetWidth; ///ditto 3258 int offsetHeight; ///ditto 3259 int offsetLeft; ///ditto 3260 int offsetTop; ///ditto 3261 Element offsetParent; ///ditto 3262 bool hasLayout; ///ditto 3263 int zIndex; ///ditto 3264 3265 ///ditto 3266 int absoluteLeft() { 3267 int a = offsetLeft; 3268 auto p = offsetParent; 3269 while(p) { 3270 a += p.offsetLeft; 3271 p = p.offsetParent; 3272 } 3273 3274 return a; 3275 } 3276 3277 ///ditto 3278 int absoluteTop() { 3279 int a = offsetTop; 3280 auto p = offsetParent; 3281 while(p) { 3282 a += p.offsetTop; 3283 p = p.offsetParent; 3284 } 3285 3286 return a; 3287 } 3288 } 3289 3290 // Back to the regular dom functions 3291 3292 public: 3293 3294 3295 /* ******************************* 3296 DOM Mutation 3297 *********************************/ 3298 3299 /// Removes all inner content from the tag; all child text and elements are gone. 3300 void removeAllChildren() 3301 out { 3302 assert(this.children.length == 0); 3303 } 3304 do { 3305 foreach(child; children) 3306 child.parentNode = null; 3307 children = null; 3308 } 3309 3310 /++ 3311 Adds a sibling element before or after this one in the dom. 3312 3313 History: added June 13, 2020 3314 +/ 3315 Element appendSibling(Element e) { 3316 parentNode.insertAfter(this, e); 3317 return e; 3318 } 3319 3320 /// ditto 3321 Element prependSibling(Element e) { 3322 parentNode.insertBefore(this, e); 3323 return e; 3324 } 3325 3326 3327 /++ 3328 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 3329 3330 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 3331 3332 History: 3333 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 3334 +/ 3335 Element appendChild(Element e) 3336 in { 3337 assert(e !is null); 3338 assert(e !is this); 3339 } 3340 out (ret) { 3341 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 3342 assert(e.parentDocument is this.parentDocument); 3343 assert(e is ret); 3344 } 3345 do { 3346 if(e.parentNode !is null) 3347 e.parentNode.removeChild(e); 3348 3349 selfClosed = false; 3350 if(auto frag = cast(DocumentFragment) e) 3351 children ~= frag.children; 3352 else 3353 children ~= e; 3354 3355 e.parentNode = this; 3356 3357 /+ 3358 foreach(item; e.tree) 3359 item.parentDocument = this.parentDocument; 3360 +/ 3361 3362 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 3363 3364 return e; 3365 } 3366 3367 /// Inserts the second element to this node, right before the first param 3368 Element insertBefore(in Element where, Element what) 3369 in { 3370 assert(where !is null); 3371 assert(where.parentNode is this); 3372 assert(what !is null); 3373 assert(what.parentNode is null); 3374 } 3375 out (ret) { 3376 assert(where.parentNode is this); 3377 assert(what.parentNode is this); 3378 3379 assert(what.parentDocument is this.parentDocument); 3380 assert(ret is what); 3381 } 3382 do { 3383 foreach(i, e; children) { 3384 if(e is where) { 3385 if(auto frag = cast(DocumentFragment) what) { 3386 children = children[0..i] ~ frag.children ~ children[i..$]; 3387 foreach(child; frag.children) 3388 child.parentNode = this; 3389 } else { 3390 children = children[0..i] ~ what ~ children[i..$]; 3391 } 3392 what.parentNode = this; 3393 return what; 3394 } 3395 } 3396 3397 return what; 3398 3399 assert(0); 3400 } 3401 3402 /++ 3403 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 3404 +/ 3405 Element insertAfter(in Element where, Element what) 3406 in { 3407 assert(where !is null); 3408 assert(where.parentNode is this); 3409 assert(what !is null); 3410 assert(what.parentNode is null); 3411 } 3412 out (ret) { 3413 assert(where.parentNode is this); 3414 assert(what.parentNode is this); 3415 assert(what.parentDocument is this.parentDocument); 3416 assert(ret is what); 3417 } 3418 do { 3419 foreach(i, e; children) { 3420 if(e is where) { 3421 if(auto frag = cast(DocumentFragment) what) { 3422 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 3423 foreach(child; frag.children) 3424 child.parentNode = this; 3425 } else 3426 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 3427 what.parentNode = this; 3428 return what; 3429 } 3430 } 3431 3432 return what; 3433 3434 assert(0); 3435 } 3436 3437 /// swaps one child for a new thing. Returns the old child which is now parentless. 3438 Element swapNode(Element child, Element replacement) 3439 in { 3440 assert(child !is null); 3441 assert(replacement !is null); 3442 assert(child.parentNode is this); 3443 } 3444 out(ret) { 3445 assert(ret is child); 3446 assert(ret.parentNode is null); 3447 assert(replacement.parentNode is this); 3448 assert(replacement.parentDocument is this.parentDocument); 3449 } 3450 do { 3451 foreach(ref c; this.children) 3452 if(c is child) { 3453 c.parentNode = null; 3454 c = replacement; 3455 c.parentNode = this; 3456 return child; 3457 } 3458 assert(0); 3459 } 3460 3461 3462 /++ 3463 Appends the given to the node. 3464 3465 3466 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 3467 yields `<example>text <b>bold</b> hi</example>`. 3468 3469 See_Also: 3470 [firstInnerText], [directText], [innerText], [appendChild] 3471 +/ 3472 @scriptable 3473 Element appendText(string text) { 3474 Element e = new TextNode(parentDocument, text); 3475 appendChild(e); 3476 return this; 3477 } 3478 3479 /++ 3480 Returns child elements which are of a tag type (excludes text, comments, etc.). 3481 3482 3483 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 3484 3485 Params: 3486 tagName = filter results to only the child elements with the given tag name. 3487 +/ 3488 @property Element[] childElements(string tagName = null) { 3489 Element[] ret; 3490 foreach(c; children) 3491 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 3492 ret ~= c; 3493 return ret; 3494 } 3495 3496 /++ 3497 Appends the given html to the element, returning the elements appended 3498 3499 3500 This is similar to `element.innerHTML += "html string";` in Javascript. 3501 +/ 3502 @scriptable 3503 Element[] appendHtml(string html) { 3504 Document d = new Document("<root>" ~ html ~ "</root>"); 3505 return stealChildren(d.root); 3506 } 3507 3508 /++ 3509 Returns `this` for use inside `with` expressions. 3510 3511 History: 3512 Added December 20, 2024 3513 +/ 3514 inout(Element) self() inout pure @nogc nothrow @safe scope return { 3515 return this; 3516 } 3517 3518 /++ 3519 Inserts a child under this element after the element `where`. 3520 +/ 3521 void insertChildAfter(Element child, Element where) 3522 in { 3523 assert(child !is null); 3524 assert(where !is null); 3525 assert(where.parentNode is this); 3526 assert(!selfClosed); 3527 //assert(isInArray(where, children)); 3528 } 3529 out { 3530 assert(child.parentNode is this); 3531 assert(where.parentNode is this); 3532 //assert(isInArray(where, children)); 3533 //assert(isInArray(child, children)); 3534 } 3535 do { 3536 foreach(ref i, c; children) { 3537 if(c is where) { 3538 i++; 3539 if(auto frag = cast(DocumentFragment) child) { 3540 children = children[0..i] ~ child.children ~ children[i..$]; 3541 //foreach(child; frag.children) 3542 //child.parentNode = this; 3543 } else 3544 children = children[0..i] ~ child ~ children[i..$]; 3545 child.parentNode = this; 3546 break; 3547 } 3548 } 3549 } 3550 3551 /++ 3552 Reparents all the child elements of `e` to `this`, leaving `e` childless. 3553 3554 Params: 3555 e = the element whose children you want to steal 3556 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 3557 +/ 3558 Element[] stealChildren(Element e, Element position = null) 3559 in { 3560 assert(!selfClosed); 3561 assert(e !is null); 3562 //if(position !is null) 3563 //assert(isInArray(position, children)); 3564 } 3565 out (ret) { 3566 assert(e.children.length == 0); 3567 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 3568 version(none) 3569 debug foreach(child; ret) { 3570 assert(child.parentNode is this); 3571 assert(child.parentDocument is this.parentDocument); 3572 } 3573 } 3574 do { 3575 foreach(c; e.children) { 3576 c.parentNode = this; 3577 } 3578 if(position is null) 3579 children ~= e.children; 3580 else { 3581 foreach(i, child; children) { 3582 if(child is position) { 3583 children = children[0..i] ~ 3584 e.children ~ 3585 children[i..$]; 3586 break; 3587 } 3588 } 3589 } 3590 3591 auto ret = e.children[]; 3592 e.children.length = 0; 3593 3594 return ret; 3595 } 3596 3597 /// Puts the current element first in our children list. The given element must not have a parent already. 3598 Element prependChild(Element e) 3599 in { 3600 assert(e.parentNode is null); 3601 assert(!selfClosed); 3602 } 3603 out { 3604 assert(e.parentNode is this); 3605 assert(e.parentDocument is this.parentDocument); 3606 assert(children[0] is e); 3607 } 3608 do { 3609 if(auto frag = cast(DocumentFragment) e) { 3610 children = e.children ~ children; 3611 foreach(child; frag.children) 3612 child.parentNode = this; 3613 } else 3614 children = e ~ children; 3615 e.parentNode = this; 3616 return e; 3617 } 3618 3619 3620 /** 3621 Returns a string containing all child elements, formatted such that it could be pasted into 3622 an XML file. 3623 */ 3624 @property string innerHTML(Appender!string where = appender!string()) const { 3625 if(children is null) 3626 return ""; 3627 3628 auto start = where.data.length; 3629 3630 foreach(child; children) { 3631 assert(child !is null); 3632 3633 child.writeToAppender(where); 3634 } 3635 3636 return where.data[start .. $]; 3637 } 3638 3639 /** 3640 Takes some html and replaces the element's children with the tree made from the string. 3641 */ 3642 @property Element innerHTML(string html, bool strict = false) { 3643 if(html.length) 3644 selfClosed = false; 3645 3646 if(html.length == 0) { 3647 // I often say innerHTML = ""; as a shortcut to clear it out, 3648 // so let's optimize that slightly. 3649 removeAllChildren(); 3650 return this; 3651 } 3652 3653 auto doc = new Document(); 3654 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3655 3656 children = doc.root.children; 3657 foreach(c; children) { 3658 c.parentNode = this; 3659 } 3660 3661 doc.root.children = null; 3662 3663 return this; 3664 } 3665 3666 /// ditto 3667 @property Element innerHTML(Html html) { 3668 return this.innerHTML = html.source; 3669 } 3670 3671 /** 3672 Replaces this node with the given html string, which is parsed 3673 3674 Note: this invalidates the this reference, since it is removed 3675 from the tree. 3676 3677 Returns the new children that replace this. 3678 */ 3679 @property Element[] outerHTML(string html) { 3680 auto doc = new Document(); 3681 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3682 3683 children = doc.root.children; 3684 foreach(c; children) { 3685 c.parentNode = this; 3686 } 3687 3688 stripOut(); 3689 3690 return doc.root.children; 3691 } 3692 3693 /++ 3694 Returns all the html for this element, including the tag itself. 3695 3696 This is equivalent to calling toString(). 3697 +/ 3698 @property string outerHTML() { 3699 return this.toString(); 3700 } 3701 3702 /// This sets the inner content of the element *without* trying to parse it. 3703 /// You can inject any code in there; this serves as an escape hatch from the dom. 3704 /// 3705 /// The only times you might actually need it are for < style > and < script > tags in html. 3706 /// Other than that, innerHTML and/or innerText should do the job. 3707 @property void innerRawSource(string rawSource) { 3708 children.length = 0; 3709 auto rs = new RawSource(parentDocument, rawSource); 3710 children ~= rs; 3711 rs.parentNode = this; 3712 } 3713 3714 /++ 3715 Replaces the element `find`, which must be a child of `this`, with the element `replace`, which must have no parent. 3716 +/ 3717 Element replaceChild(Element find, Element replace) 3718 in { 3719 assert(find !is null); 3720 assert(find.parentNode is this); 3721 assert(replace !is null); 3722 assert(replace.parentNode is null); 3723 } 3724 out(ret) { 3725 assert(ret is replace); 3726 assert(replace.parentNode is this); 3727 assert(replace.parentDocument is this.parentDocument); 3728 assert(find.parentNode is null); 3729 } 3730 do { 3731 // FIXME 3732 //if(auto frag = cast(DocumentFragment) replace) 3733 //return this.replaceChild(frag, replace.children); 3734 for(int i = 0; i < children.length; i++) { 3735 if(children[i] is find) { 3736 replace.parentNode = this; 3737 children[i].parentNode = null; 3738 children[i] = replace; 3739 return replace; 3740 } 3741 } 3742 3743 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3744 } 3745 3746 /** 3747 Replaces the given element with a whole group. 3748 */ 3749 void replaceChild(Element find, Element[] replace) 3750 in { 3751 assert(find !is null); 3752 assert(replace !is null); 3753 assert(find.parentNode is this); 3754 debug foreach(r; replace) 3755 assert(r.parentNode is null); 3756 } 3757 out { 3758 assert(find.parentNode is null); 3759 assert(children.length >= replace.length); 3760 debug foreach(child; children) 3761 assert(child !is find); 3762 debug foreach(r; replace) 3763 assert(r.parentNode is this); 3764 } 3765 do { 3766 if(replace.length == 0) { 3767 removeChild(find); 3768 return; 3769 } 3770 assert(replace.length); 3771 for(int i = 0; i < children.length; i++) { 3772 if(children[i] is find) { 3773 children[i].parentNode = null; // this element should now be dead 3774 children[i] = replace[0]; 3775 foreach(e; replace) { 3776 e.parentNode = this; 3777 } 3778 3779 children = .insertAfter(children, i, replace[1..$]); 3780 3781 return; 3782 } 3783 } 3784 3785 throw new Exception("no such child"); 3786 } 3787 3788 3789 /** 3790 Removes the given child from this list. 3791 3792 Returns the removed element. 3793 */ 3794 Element removeChild(Element c) 3795 in { 3796 assert(c !is null); 3797 assert(c.parentNode is this); 3798 } 3799 out { 3800 debug foreach(child; children) 3801 assert(child !is c); 3802 assert(c.parentNode is null); 3803 } 3804 do { 3805 foreach(i, e; children) { 3806 if(e is c) { 3807 children = children[0..i] ~ children [i+1..$]; 3808 c.parentNode = null; 3809 return c; 3810 } 3811 } 3812 3813 throw new Exception("no such child"); 3814 } 3815 3816 /// This removes all the children from this element, returning the old list. 3817 Element[] removeChildren() 3818 out (ret) { 3819 assert(children.length == 0); 3820 debug foreach(r; ret) 3821 assert(r.parentNode is null); 3822 } 3823 do { 3824 Element[] oldChildren = children.dup; 3825 foreach(c; oldChildren) 3826 c.parentNode = null; 3827 3828 children.length = 0; 3829 3830 return oldChildren; 3831 } 3832 3833 /** 3834 Fetch the inside text, with all tags stripped out. 3835 3836 <p>cool <b>api</b> & code dude<p> 3837 innerText of that is "cool api & code dude". 3838 3839 This does not match what real innerText does! 3840 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3841 3842 It is more like [textContent]. 3843 3844 See_Also: 3845 [visibleText], which is closer to what the real `innerText` 3846 does. 3847 */ 3848 @scriptable 3849 @property string innerText() const { 3850 string s; 3851 foreach(child; children) { 3852 if(child.nodeType != NodeType.Text) 3853 s ~= child.innerText; 3854 else 3855 s ~= child.nodeValue(); 3856 } 3857 return s; 3858 } 3859 3860 /// ditto 3861 alias textContent = innerText; 3862 3863 /++ 3864 Gets the element's visible text, similar to how it would look assuming 3865 the document was HTML being displayed by a browser. This means it will 3866 attempt whitespace normalization (unless it is a `<pre>` tag), add `\n` 3867 characters for `<br>` tags, and I reserve the right to make it process 3868 additional css and tags in the future. 3869 3870 If you need specific output, use the more stable [textContent] property 3871 or iterate yourself with [tree] or a recursive function with [children]. 3872 3873 History: 3874 Added March 25, 2022 (dub v10.8) 3875 +/ 3876 string visibleText() const { 3877 return this.visibleTextHelper(this.tagName == "pre"); 3878 } 3879 3880 private string visibleTextHelper(bool pre) const { 3881 string result; 3882 foreach(thing; this.children) { 3883 if(thing.nodeType == NodeType.Text) 3884 result ~= pre ? thing.nodeValue : normalizeWhitespace(thing.nodeValue); 3885 else if(thing.tagName == "br") 3886 result ~= "\n"; 3887 else 3888 result ~= thing.visibleTextHelper(pre || thing.tagName == "pre"); 3889 } 3890 return result; 3891 } 3892 3893 /** 3894 Sets the inside text, replacing all children. You don't 3895 have to worry about entity encoding. 3896 */ 3897 @scriptable 3898 @property void innerText(string text) { 3899 selfClosed = false; 3900 Element e = new TextNode(parentDocument, text); 3901 children = [e]; 3902 e.parentNode = this; 3903 } 3904 3905 /** 3906 Strips this node out of the document, replacing it with the given text 3907 */ 3908 @property void outerText(string text) { 3909 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3910 } 3911 3912 /** 3913 Same result as innerText; the tag with all inner tags stripped out 3914 */ 3915 @property string outerText() const { 3916 return innerText; 3917 } 3918 3919 3920 /* ******************************* 3921 Miscellaneous 3922 *********************************/ 3923 3924 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3925 @property Element cloned() 3926 /+ 3927 out(ret) { 3928 // FIXME: not sure why these fail... 3929 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3930 assert(ret.tagName == this.tagName); 3931 } 3932 do { 3933 +/ 3934 { 3935 return this.cloneNode(true); 3936 } 3937 3938 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3939 Element cloneNode(bool deepClone) { 3940 auto e = Element.make(this.tagName); 3941 e.attributes = this.attributes.aadup; 3942 e.selfClosed = this.selfClosed; 3943 3944 if(deepClone) { 3945 foreach(child; children) { 3946 e.appendChild(child.cloneNode(true)); 3947 } 3948 } 3949 3950 3951 return e; 3952 } 3953 3954 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3955 string nodeValue() const { 3956 return ""; 3957 } 3958 3959 // should return int 3960 ///. 3961 @property int nodeType() const { 3962 return 1; 3963 } 3964 3965 3966 invariant () { 3967 debug assert(tagName.indexOf(" ") == -1); 3968 3969 // commented cuz it gets into recursive pain and eff dat. 3970 /+ 3971 if(children !is null) 3972 foreach(child; children) { 3973 // assert(parentNode !is null); 3974 assert(child !is null); 3975 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3976 assert(child !is this); 3977 //assert(child !is parentNode); 3978 } 3979 +/ 3980 3981 /+ 3982 // this isn't helping 3983 if(parent_ && parent_.asElement) { 3984 bool found = false; 3985 foreach(child; parent_.asElement.children) 3986 if(child is this) 3987 found = true; 3988 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3989 } 3990 +/ 3991 3992 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3993 if(parentNode !is null) { 3994 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3995 auto lol = cast(TextNode) this; 3996 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3997 } 3998 +/ 3999 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 4000 // reason is so you can create these without needing a reference to the document 4001 } 4002 4003 /** 4004 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 4005 an XML file. 4006 */ 4007 override string toString() const { 4008 return writeToAppender(); 4009 } 4010 4011 /++ 4012 Returns if the node would be printed to string as `<tag />` or `<tag></tag>`. In other words, if it has no non-empty text nodes and no element nodes. Please note that whitespace text nodes are NOT considered empty; `Html("<tag> </tag>").isEmpty == false`. 4013 4014 4015 The value is undefined if there are comment or processing instruction nodes. The current implementation returns false if it sees those, assuming the nodes haven't been stripped out during parsing. But I'm not married to the current implementation and reserve the right to change it without notice. 4016 4017 History: 4018 Added December 3, 2021 (dub v10.5) 4019 4020 +/ 4021 public bool isEmpty() const { 4022 foreach(child; this.children) { 4023 // any non-text node is of course not empty since that's a tag 4024 if(child.nodeType != NodeType.Text) 4025 return false; 4026 // or a text node is empty if it is is a null or empty string, so this length check fixes that 4027 if(child.nodeValue.length) 4028 return false; 4029 } 4030 4031 return true; 4032 } 4033 4034 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 4035 if(indentWith is null) 4036 return null; 4037 4038 // at the top we don't have anything to really do 4039 //if(parent_ is null) 4040 //return null; 4041 4042 // I've used isEmpty before but this other check seems better.... 4043 //|| this.isEmpty()) 4044 4045 string s; 4046 4047 if(insertComments) s ~= "<!--"; 4048 s ~= "\n"; 4049 foreach(indent; 0 .. indentationLevel) 4050 s ~= indentWith; 4051 if(insertComments) s ~= "-->"; 4052 4053 return s; 4054 } 4055 4056 /++ 4057 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 4058 for eyeball debugging. 4059 4060 $(PITFALL 4061 This function is not stable. Its interface and output may change without 4062 notice. The only promise I make is that it will continue to make a best- 4063 effort attempt at being useful for debugging by human eyes. 4064 4065 I have used it in the past for diffing html documents, but even then, it 4066 might change between versions. If it is useful, great, but beware; this 4067 use is at your own risk. 4068 ) 4069 4070 History: 4071 On November 19, 2021, I changed this to `final`. If you were overriding it, 4072 change our override to `toPrettyStringImpl` instead. It now just calls 4073 `toPrettyStringImpl.strip` to be an entry point for a stand-alone call. 4074 4075 If you are calling it as part of another implementation, you might want to 4076 change that call to `toPrettyStringImpl` as well. 4077 4078 I am NOT considering this a breaking change since this function is documented 4079 to only be used for eyeball debugging anyway, which means the exact format is 4080 not specified and the override behavior can generally not be relied upon. 4081 4082 (And I find it extremely unlikely anyone was subclassing anyway, but if you were, 4083 email me, and we'll see what we can do. I'd like to know at least.) 4084 4085 I reserve the right to make future changes in the future without considering 4086 them breaking as well. 4087 +/ 4088 final string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4089 return toPrettyStringImpl(insertComments, indentationLevel, indentWith).strip; 4090 } 4091 4092 string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4093 4094 // first step is to concatenate any consecutive text nodes to simplify 4095 // the white space analysis. this changes the tree! but i'm allowed since 4096 // the comment always says it changes the comments 4097 // 4098 // actually i'm not allowed cuz it is const so i will cheat and lie 4099 /+ 4100 TextNode lastTextChild = null; 4101 for(int a = 0; a < this.children.length; a++) { 4102 auto child = this.children[a]; 4103 if(auto tn = cast(TextNode) child) { 4104 if(lastTextChild) { 4105 lastTextChild.contents ~= tn.contents; 4106 for(int b = a; b < this.children.length - 1; b++) 4107 this.children[b] = this.children[b + 1]; 4108 this.children = this.children[0 .. $-1]; 4109 } else { 4110 lastTextChild = tn; 4111 } 4112 } else { 4113 lastTextChild = null; 4114 } 4115 } 4116 +/ 4117 4118 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 4119 4120 const(Element)[] children; 4121 4122 TextNode lastTextChild = null; 4123 for(int a = 0; a < this.children.length; a++) { 4124 auto child = this.children[a]; 4125 if(auto tn = cast(const(TextNode)) child) { 4126 if(lastTextChild !is null) { 4127 lastTextChild.contents ~= tn.contents; 4128 } else { 4129 lastTextChild = new TextNode(""); 4130 lastTextChild.parentNode = cast(Element) this; 4131 lastTextChild.contents ~= tn.contents; 4132 children ~= lastTextChild; 4133 } 4134 } else { 4135 lastTextChild = null; 4136 children ~= child; 4137 } 4138 } 4139 4140 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4141 4142 s ~= "<"; 4143 s ~= tagName; 4144 4145 // i sort these for consistent output. might be more legible 4146 // but especially it keeps it the same for diff purposes. 4147 import std.algorithm : sort; 4148 auto keys = sort(attributes.keys); 4149 foreach(n; keys) { 4150 auto v = attributes[n]; 4151 s ~= " "; 4152 s ~= n; 4153 s ~= "=\""; 4154 s ~= htmlEntitiesEncode(v); 4155 s ~= "\""; 4156 } 4157 4158 if(selfClosed){ 4159 s ~= " />"; 4160 return s; 4161 } 4162 4163 s ~= ">"; 4164 4165 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 4166 // just keep them on the same line 4167 4168 if(isEmpty) { 4169 // no work needed, this is empty so don't indent just for a blank line 4170 } else if(children.length == 1 && children[0].isEmpty) { 4171 // just one empty one, can put it inline too 4172 s ~= children[0].toString(); 4173 } else if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 4174 foreach(child; children) { 4175 s ~= child.toString();//toPrettyString(false, 0, null); 4176 } 4177 } else { 4178 foreach(child; children) { 4179 assert(child !is null); 4180 4181 s ~= child.toPrettyStringImpl(insertComments, indentationLevel + 1, indentWith); 4182 } 4183 4184 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4185 } 4186 4187 s ~= "</"; 4188 s ~= tagName; 4189 s ~= ">"; 4190 4191 return s; 4192 } 4193 4194 /+ 4195 /// Writes out the opening tag only, if applicable. 4196 string writeTagOnly(Appender!string where = appender!string()) const { 4197 +/ 4198 4199 /++ 4200 This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 4201 Note: the ordering of attributes in the string is undefined. 4202 Returns the string it creates. 4203 4204 Implementation_Notes: 4205 The order of attributes printed by this function is undefined, as permitted by the XML spec. You should NOT rely on any implementation detail noted here. 4206 4207 However, in practice, between June 14, 2019 and August 22, 2024, it actually did sort attributes by key name. After August 22, 2024, it changed to track attribute append order and will print them back out in the order in which the keys were first seen. 4208 4209 This is subject to change again at any time. Use [toPrettyString] if you want a defined output (toPrettyString always sorts by name for consistent diffing). 4210 +/ 4211 string writeToAppender(Appender!string where = appender!string()) const { 4212 assert(tagName !is null); 4213 4214 where.reserve((this.children.length + 1) * 512); 4215 4216 auto start = where.data.length; 4217 4218 where.put("<"); 4219 where.put(tagName); 4220 4221 /+ 4222 import std.algorithm : sort; 4223 auto keys = sort(attributes.keys); 4224 foreach(n; keys) { 4225 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 4226 +/ 4227 foreach(n, v; attributes) { 4228 //assert(v !is null); 4229 where.put(" "); 4230 where.put(n); 4231 where.put("=\""); 4232 htmlEntitiesEncode(v, where); 4233 where.put("\""); 4234 } 4235 4236 if(selfClosed){ 4237 where.put(" />"); 4238 return where.data[start .. $]; 4239 } 4240 4241 where.put('>'); 4242 4243 innerHTML(where); 4244 4245 where.put("</"); 4246 where.put(tagName); 4247 where.put('>'); 4248 4249 return where.data[start .. $]; 4250 } 4251 4252 /** 4253 Returns a lazy range of all its children, recursively. 4254 */ 4255 @property ElementStream tree() { 4256 return new ElementStream(this); 4257 } 4258 4259 // I moved these from Form because they are generally useful. 4260 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 4261 // FIXME: add overloads for other label types... 4262 /++ 4263 Adds a form field to this element, normally a `<input>` but `type` can also be `"textarea"`. 4264 4265 This is fairly html specific and the label uses my style. I recommend you view the source before you use it to better understand what it does. 4266 +/ 4267 /// Tags: HTML, HTML5 4268 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4269 auto fs = this; 4270 auto i = fs.addChild("label"); 4271 4272 if(!(type == "checkbox" || type == "radio")) 4273 i.addChild("span", label); 4274 4275 Element input; 4276 if(type == "textarea") 4277 input = i.addChild("textarea"). 4278 setAttribute("name", name). 4279 setAttribute("rows", "6"); 4280 else 4281 input = i.addChild("input"). 4282 setAttribute("name", name). 4283 setAttribute("type", type); 4284 4285 if(type == "checkbox" || type == "radio") 4286 i.addChild("span", label); 4287 4288 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4289 fieldOptions.applyToElement(input); 4290 return i; 4291 } 4292 4293 /// ditto 4294 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4295 auto fs = this; 4296 auto i = fs.addChild("label"); 4297 i.addChild(label); 4298 Element input; 4299 if(type == "textarea") 4300 input = i.addChild("textarea"). 4301 setAttribute("name", name). 4302 setAttribute("rows", "6"); 4303 else 4304 input = i.addChild("input"). 4305 setAttribute("name", name). 4306 setAttribute("type", type); 4307 4308 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4309 fieldOptions.applyToElement(input); 4310 return i; 4311 } 4312 4313 /// ditto 4314 Element addField(string label, string name, FormFieldOptions fieldOptions) { 4315 return addField(label, name, "text", fieldOptions); 4316 } 4317 4318 /// ditto 4319 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4320 auto fs = this; 4321 auto i = fs.addChild("label"); 4322 i.addChild("span", label); 4323 auto sel = i.addChild("select").setAttribute("name", name); 4324 4325 foreach(k, opt; options) 4326 sel.addChild("option", opt, k); 4327 4328 // FIXME: implement requirements somehow 4329 4330 return i; 4331 } 4332 4333 /// ditto 4334 Element addSubmitButton(string label = null) { 4335 auto t = this; 4336 auto holder = t.addChild("div"); 4337 holder.addClass("submit-holder"); 4338 auto i = holder.addChild("input"); 4339 i.type = "submit"; 4340 if(label.length) 4341 i.value = label; 4342 return holder; 4343 } 4344 4345 } 4346 4347 // computedStyle could argubaly be removed to bring size down 4348 //pragma(msg, __traits(classInstanceSize, Element)); 4349 //pragma(msg, Element.tupleof); 4350 4351 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4352 /++ 4353 Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4354 4355 History: 4356 On December 16, 2022, it disabled the special case treatment of `<script>` and `<style>` that [Document] 4357 does for HTML. To get the old behavior back, add `, true` to your constructor call. 4358 +/ 4359 /// Group: core_functionality 4360 class XmlDocument : Document { 4361 /++ 4362 Constructs a stricter-mode XML parser and parses the given data source. 4363 4364 History: 4365 The `Utf8Stream` version of the constructor was added on February 22, 2025. 4366 +/ 4367 this(string data, bool enableHtmlHacks = false) { 4368 this(new Utf8Stream(data), enableHtmlHacks); 4369 } 4370 4371 /// ditto 4372 this(Utf8Stream data, bool enableHtmlHacks = false) { 4373 selfClosedElements = null; 4374 inlineElements = null; 4375 rawSourceElements = null; 4376 contentType = "text/xml; charset=utf-8"; 4377 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4378 4379 parseStream(data, true, true, !enableHtmlHacks); 4380 } 4381 } 4382 4383 unittest { 4384 // FIXME: i should also make XmlDocument do different entities than just html too. 4385 auto str = "<html><style>foo {}</style><script>void function() { a < b; }</script></html>"; 4386 auto document = new Document(str, true, true); 4387 assert(document.requireSelector("style").children[0].tagName == "#raw"); 4388 assert(document.requireSelector("script").children[0].tagName == "#raw"); 4389 try { 4390 auto xml = new XmlDocument(str); 4391 assert(0); 4392 } catch(MarkupException e) { 4393 // failure expected, script special case is not valid XML without a dtd (which isn't here) 4394 } 4395 //assert(xml.requireSelector("style").children[0].tagName == "#raw"); 4396 //assert(xml.requireSelector("script").children[0].tagName == "#raw"); 4397 } 4398 4399 4400 4401 import std.string; 4402 4403 /* domconvenience follows { */ 4404 4405 /// finds comments that match the given txt. Case insensitive, strips whitespace. 4406 /// Group: core_functionality 4407 Element[] findComments(Document document, string txt) { 4408 return findComments(document.root, txt); 4409 } 4410 4411 /// ditto 4412 Element[] findComments(Element element, string txt) { 4413 txt = txt.strip().toLower(); 4414 Element[] ret; 4415 4416 foreach(comment; element.getElementsByTagName("#comment")) { 4417 string t = comment.nodeValue().strip().toLower(); 4418 if(t == txt) 4419 ret ~= comment; 4420 } 4421 4422 return ret; 4423 } 4424 4425 /// An option type that propagates null. See: [Element.optionSelector] 4426 /// Group: implementations 4427 struct MaybeNullElement(SomeElementType) { 4428 this(SomeElementType ele) { 4429 this.element = ele; 4430 } 4431 SomeElementType element; 4432 4433 /// Forwards to the element, wit a null check inserted that propagates null. 4434 auto opDispatch(string method, T...)(T args) { 4435 alias type = typeof(__traits(getMember, element, method)(args)); 4436 static if(is(type : Element)) { 4437 if(element is null) 4438 return MaybeNullElement!type(null); 4439 return __traits(getMember, element, method)(args); 4440 } else static if(is(type == string)) { 4441 if(element is null) 4442 return cast(string) null; 4443 return __traits(getMember, element, method)(args); 4444 } else static if(is(type == void)) { 4445 if(element is null) 4446 return; 4447 __traits(getMember, element, method)(args); 4448 } else { 4449 static assert(0); 4450 } 4451 } 4452 4453 /// Allows implicit casting to the wrapped element. 4454 alias element this; 4455 } 4456 4457 /++ 4458 A collection of elements which forwards methods to the children. 4459 +/ 4460 /// Group: implementations 4461 struct ElementCollection { 4462 /// 4463 this(Element e) { 4464 elements = [e]; 4465 } 4466 4467 /// 4468 this(Element e, string selector) { 4469 elements = e.querySelectorAll(selector); 4470 } 4471 4472 /// 4473 this(Element[] e) { 4474 elements = e; 4475 } 4476 4477 Element[] elements; 4478 //alias elements this; // let it implicitly convert to the underlying array 4479 4480 /// 4481 ElementCollection opIndex(string selector) { 4482 ElementCollection ec; 4483 foreach(e; elements) 4484 ec.elements ~= e.getElementsBySelector(selector); 4485 return ec; 4486 } 4487 4488 /// 4489 Element opIndex(int i) { 4490 return elements[i]; 4491 } 4492 4493 /// if you slice it, give the underlying array for easy forwarding of the 4494 /// collection to range expecting algorithms or looping over. 4495 Element[] opSlice() { 4496 return elements; 4497 } 4498 4499 /// And input range primitives so we can foreach over this 4500 void popFront() { 4501 elements = elements[1..$]; 4502 } 4503 4504 /// ditto 4505 Element front() { 4506 return elements[0]; 4507 } 4508 4509 /// ditto 4510 bool empty() { 4511 return !elements.length; 4512 } 4513 4514 /++ 4515 Collects strings from the collection, concatenating them together 4516 Kinda like running reduce and ~= on it. 4517 4518 --- 4519 document["p"].collect!"innerText"; 4520 --- 4521 +/ 4522 string collect(string method)(string separator = "") { 4523 string text; 4524 foreach(e; elements) { 4525 text ~= mixin("e." ~ method); 4526 text ~= separator; 4527 } 4528 return text; 4529 } 4530 4531 /// Forward method calls to each individual [Element|element] of the collection 4532 /// returns this so it can be chained. 4533 ElementCollection opDispatch(string name, T...)(T t) { 4534 foreach(e; elements) { 4535 mixin("e." ~ name)(t); 4536 } 4537 return this; 4538 } 4539 4540 /++ 4541 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 4542 +/ 4543 ElementCollection wrapIn(Element what) { 4544 foreach(e; elements) { 4545 e.wrapIn(what.cloneNode(false)); 4546 } 4547 4548 return this; 4549 } 4550 4551 /// Concatenates two ElementCollection together. 4552 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 4553 return ElementCollection(this.elements ~ rhs.elements); 4554 } 4555 } 4556 4557 4558 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 4559 /// Group: implementations 4560 mixin template JavascriptStyleDispatch() { 4561 /// 4562 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 4563 if(v !is null) 4564 return set(name, v); 4565 return get(name); 4566 } 4567 4568 /// 4569 string opIndex(string key) const { 4570 return get(key); 4571 } 4572 4573 /// 4574 string opIndexAssign(string value, string field) { 4575 return set(field, value); 4576 } 4577 4578 // FIXME: doesn't seem to work 4579 string* opBinary(string op)(string key) if(op == "in") { 4580 return key in fields; 4581 } 4582 } 4583 4584 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 4585 /// 4586 /// Do not create this object directly. 4587 /// Group: implementations 4588 struct DataSet { 4589 /// 4590 this(Element e) { 4591 this._element = e; 4592 } 4593 4594 private Element _element; 4595 /// 4596 string set(string name, string value) { 4597 _element.setAttribute("data-" ~ unCamelCase(name), value); 4598 return value; 4599 } 4600 4601 /// 4602 string get(string name) const { 4603 return _element.getAttribute("data-" ~ unCamelCase(name)); 4604 } 4605 4606 /// 4607 mixin JavascriptStyleDispatch!(); 4608 } 4609 4610 /// Proxy object for attributes which will replace the main opDispatch eventually 4611 /// Group: implementations 4612 struct AttributeSet { 4613 /// Generally, you shouldn't create this yourself, since you can use [Element.attrs] instead. 4614 this(Element e) { 4615 this._element = e; 4616 } 4617 4618 private Element _element; 4619 /++ 4620 Sets a `value` for attribute with `name`. If the attribute doesn't exist, this will create it, even if `value` is `null`. 4621 +/ 4622 string set(string name, string value) { 4623 _element.setAttribute(name, value); 4624 return value; 4625 } 4626 4627 /++ 4628 Provides support for testing presence of an attribute with the `in` operator. 4629 4630 History: 4631 Added December 16, 2020 (dub v10.10) 4632 +/ 4633 auto opBinaryRight(string op : "in")(string name) const 4634 { 4635 return name in _element.attributes; 4636 } 4637 /// 4638 unittest 4639 { 4640 auto doc = new XmlDocument(`<test attr="test"/>`); 4641 assert("attr" in doc.root.attrs); 4642 assert("test" !in doc.root.attrs); 4643 } 4644 4645 /++ 4646 Returns the value of attribute `name`, or `null` if doesn't exist 4647 +/ 4648 string get(string name) const { 4649 return _element.getAttribute(name); 4650 } 4651 4652 /// 4653 mixin JavascriptStyleDispatch!(); 4654 } 4655 4656 private struct InternalAttribute { 4657 // variable length structure 4658 private InternalAttribute* next; 4659 private uint totalLength; 4660 private ushort keyLength; 4661 private char[0] chars; 4662 4663 // this really should be immutable tbh 4664 inout(char)[] key() inout return { 4665 return chars.ptr[0 .. keyLength]; 4666 } 4667 4668 inout(char)[] value() inout return { 4669 return chars.ptr[keyLength .. totalLength]; 4670 } 4671 4672 static InternalAttribute* make(in char[] key, in char[] value) { 4673 // old code was 4674 //auto data = new ubyte[](InternalAttribute.sizeof + key.length + value.length); 4675 //GC.addRange(data.ptr, data.length); // MUST add the range to scan it! 4676 4677 import core.memory; 4678 // but this code is a bit better, notice we did NOT set the NO_SCAN attribute because of the presence of the next pointer 4679 // (this can sometimes be a pessimization over the separate strings but meh, most of these attributes are supposed to be small) 4680 auto obj = cast(InternalAttribute*) GC.calloc(InternalAttribute.sizeof + key.length + value.length); 4681 4682 // assert(key.length > 0); 4683 4684 obj.totalLength = cast(uint) (key.length + value.length); 4685 obj.keyLength = cast(ushort) key.length; 4686 if(key.length != obj.keyLength) 4687 throw new Exception("attribute key overflow"); 4688 if(key.length + value.length != obj.totalLength) 4689 throw new Exception("attribute length overflow"); 4690 4691 obj.key[] = key[]; 4692 obj.value[] = value[]; 4693 4694 return obj; 4695 } 4696 4697 // FIXME: disable default ctor and op new 4698 } 4699 4700 import core.exception; 4701 4702 struct AttributesHolder { 4703 private @system InternalAttribute* attributes; 4704 4705 /+ 4706 invariant() { 4707 const(InternalAttribute)* wtf = attributes; 4708 while(wtf) { 4709 assert(wtf != cast(void*) 1); 4710 assert(wtf.keyLength != 0); 4711 import std.stdio; writeln(wtf.key, "=", wtf.value); 4712 wtf = wtf.next; 4713 } 4714 } 4715 +/ 4716 4717 /+ 4718 It is legal to do foo["key", "default"] to call it with no error... 4719 +/ 4720 string opIndex(scope const char[] key) const { 4721 auto found = find(key); 4722 if(found is null) 4723 throw new RangeError(key.idup); // FIXME 4724 return cast(string) found.value; 4725 } 4726 4727 string get(scope const char[] key, string returnedIfKeyNotFound = null) const { 4728 auto attr = this.find(key); 4729 if(attr is null) 4730 return returnedIfKeyNotFound; 4731 else 4732 return cast(string) attr.value; 4733 } 4734 4735 private string[] keys() const { 4736 string[] ret; 4737 foreach(k, v; this) 4738 ret ~= k; 4739 return ret; 4740 } 4741 4742 /+ 4743 If this were to return a string* it'd be tricky cuz someone could try to rebind it, which is impossible. 4744 4745 This is a breaking change. You can get a similar result though with [get]. 4746 +/ 4747 bool opBinaryRight(string op : "in")(scope const char[] key) const { 4748 return find(key) !is null; 4749 } 4750 4751 private inout(InternalAttribute)* find(scope const char[] key) inout @trusted { 4752 inout(InternalAttribute)* current = attributes; 4753 while(current) { 4754 // assert(current > cast(void*) 1); 4755 if(current.key == key) 4756 return current; 4757 current = current.next; 4758 } 4759 return null; 4760 } 4761 4762 void remove(scope const char[] key) @trusted { 4763 if(attributes is null) 4764 return; 4765 auto current = attributes; 4766 InternalAttribute* previous; 4767 while(current) { 4768 if(current.key == key) 4769 break; 4770 previous = current; 4771 current = current.next; 4772 } 4773 if(current is null) 4774 return; 4775 if(previous is null) 4776 attributes = current.next; 4777 else 4778 previous.next = current.next; 4779 // assert(previous.next != cast(void*) 1); 4780 // assert(attributes != cast(void*) 1); 4781 } 4782 4783 void opIndexAssign(scope const char[] value, scope const char[] key) @trusted { 4784 if(attributes is null) { 4785 attributes = InternalAttribute.make(key, value); 4786 return; 4787 } 4788 auto current = attributes; 4789 4790 if(current.key == key) { 4791 if(current.value != value) { 4792 auto replacement = InternalAttribute.make(key, value); 4793 attributes = replacement; 4794 replacement.next = current.next; 4795 // assert(replacement.next != cast(void*) 1); 4796 // assert(attributes != cast(void*) 1); 4797 } 4798 return; 4799 } 4800 4801 while(current.next) { 4802 if(current.next.key == key) { 4803 if(current.next.value == value) 4804 return; // replacing immutable value with self, no change 4805 break; 4806 } 4807 current = current.next; 4808 } 4809 assert(current !is null); 4810 4811 auto replacement = InternalAttribute.make(key, value); 4812 if(current.next !is null) 4813 replacement.next = current.next.next; 4814 current.next = replacement; 4815 // assert(current.next != cast(void*) 1); 4816 // assert(replacement.next != cast(void*) 1); 4817 } 4818 4819 int opApply(int delegate(string key, string value) dg) const @trusted { 4820 const(InternalAttribute)* current = attributes; 4821 while(current !is null) { 4822 if(auto res = dg(cast(string) current.key, cast(string) current.value)) 4823 return res; 4824 current = current.next; 4825 } 4826 return 0; 4827 } 4828 4829 string toString() { 4830 string ret; 4831 foreach(k, v; this) { 4832 if(ret.length) 4833 ret ~= " "; 4834 ret ~= k; 4835 ret ~= `="`; 4836 ret ~= v; 4837 ret ~= `"`; 4838 } 4839 return ret; 4840 } 4841 } 4842 4843 unittest { 4844 AttributesHolder holder; 4845 holder["one"] = "1"; 4846 holder["two"] = "2"; 4847 holder["three"] = "3"; 4848 4849 { 4850 assert("one" in holder); 4851 assert("two" in holder); 4852 assert("three" in holder); 4853 assert("four" !in holder); 4854 4855 int count; 4856 foreach(k, v; holder) { 4857 switch(count) { 4858 case 0: assert(k == "one" && v == "1"); break; 4859 case 1: assert(k == "two" && v == "2"); break; 4860 case 2: assert(k == "three" && v == "3"); break; 4861 default: assert(0); 4862 } 4863 count++; 4864 } 4865 } 4866 4867 holder["two"] = "dos"; 4868 4869 { 4870 assert("one" in holder); 4871 assert("two" in holder); 4872 assert("three" in holder); 4873 assert("four" !in holder); 4874 4875 int count; 4876 foreach(k, v; holder) { 4877 switch(count) { 4878 case 0: assert(k == "one" && v == "1"); break; 4879 case 1: assert(k == "two" && v == "dos"); break; 4880 case 2: assert(k == "three" && v == "3"); break; 4881 default: assert(0); 4882 } 4883 count++; 4884 } 4885 } 4886 4887 holder["four"] = "4"; 4888 4889 { 4890 assert("one" in holder); 4891 assert("two" in holder); 4892 assert("three" in holder); 4893 assert("four" in holder); 4894 4895 int count; 4896 foreach(k, v; holder) { 4897 switch(count) { 4898 case 0: assert(k == "one" && v == "1"); break; 4899 case 1: assert(k == "two" && v == "dos"); break; 4900 case 2: assert(k == "three" && v == "3"); break; 4901 case 3: assert(k == "four" && v == "4"); break; 4902 default: assert(0); 4903 } 4904 count++; 4905 } 4906 } 4907 } 4908 4909 /// for style, i want to be able to set it with a string like a plain attribute, 4910 /// but also be able to do properties Javascript style. 4911 4912 /// Group: implementations 4913 struct ElementStyle { 4914 this(Element parent) { 4915 _element = parent; 4916 _attribute = _element.getAttribute("style"); 4917 originalAttribute = _attribute; 4918 } 4919 4920 ~this() { 4921 if(_attribute !is originalAttribute) 4922 _element.setAttribute("style", _attribute); 4923 } 4924 4925 Element _element; 4926 string _attribute; 4927 string originalAttribute; 4928 4929 /+ 4930 @property ref inout(string) _attribute() inout { 4931 auto s = "style" in _element.attributes; 4932 if(s is null) { 4933 auto e = cast() _element; // const_cast 4934 e.attributes["style"] = ""; // we need something to reference 4935 s = cast(inout) ("style" in e.attributes); 4936 } 4937 4938 assert(s !is null); 4939 return *s; 4940 } 4941 +/ 4942 4943 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 4944 4945 string set(string name, string value) { 4946 if(name.length == 0) 4947 return value; 4948 if(name == "cssFloat") 4949 name = "float"; 4950 else 4951 name = unCamelCase(name); 4952 auto r = rules(); 4953 r[name] = value; 4954 4955 _attribute = ""; 4956 foreach(k, v; r) { 4957 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 4958 continue; 4959 if(_attribute.length) 4960 _attribute ~= " "; 4961 _attribute ~= k ~ ": " ~ v ~ ";"; 4962 } 4963 4964 _element.setAttribute("style", _attribute); // this is to trigger the observer call 4965 4966 return value; 4967 } 4968 string get(string name) const { 4969 if(name == "cssFloat") 4970 name = "float"; 4971 else 4972 name = unCamelCase(name); 4973 auto r = rules(); 4974 if(name in r) 4975 return r[name]; 4976 return null; 4977 } 4978 4979 string[string] rules() const { 4980 string[string] ret; 4981 foreach(rule; _attribute.split(";")) { 4982 rule = rule.strip(); 4983 if(rule.length == 0) 4984 continue; 4985 auto idx = rule.indexOf(":"); 4986 if(idx == -1) 4987 ret[rule] = ""; 4988 else { 4989 auto name = rule[0 .. idx].strip(); 4990 auto value = rule[idx + 1 .. $].strip(); 4991 4992 ret[name] = value; 4993 } 4994 } 4995 4996 return ret; 4997 } 4998 4999 mixin JavascriptStyleDispatch!(); 5000 } 5001 5002 /// Converts a camel cased propertyName to a css style dashed property-name 5003 string unCamelCase(string a) { 5004 string ret; 5005 foreach(c; a) 5006 if((c >= 'A' && c <= 'Z')) 5007 ret ~= "-" ~ toLower("" ~ c)[0]; 5008 else 5009 ret ~= c; 5010 return ret; 5011 } 5012 5013 /// Translates a css style property-name to a camel cased propertyName 5014 string camelCase(string a) { 5015 string ret; 5016 bool justSawDash = false; 5017 foreach(c; a) 5018 if(c == '-') { 5019 justSawDash = true; 5020 } else { 5021 if(justSawDash) { 5022 justSawDash = false; 5023 ret ~= toUpper("" ~ c); 5024 } else 5025 ret ~= c; 5026 } 5027 return ret; 5028 } 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 // domconvenience ends } 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 // @safe: 5051 5052 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 5053 // Instead, override writeToAppender(); 5054 5055 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 5056 5057 // Stripping them is useful for reading php as html.... but adding them 5058 // is good for building php. 5059 5060 // I need to maintain compatibility with the way it is now too. 5061 5062 import std.string; 5063 import std.exception; 5064 import std.array; 5065 import std.range; 5066 5067 //import std.stdio; 5068 5069 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 5070 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 5071 // most likely a typo so I say kill kill kill. 5072 5073 5074 /++ 5075 This might belong in another module, but it represents a file with a mime type and some data. 5076 Document implements this interface with type = text/html (see Document.contentType for more info) 5077 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 5078 +/ 5079 /// Group: bonus_functionality 5080 interface FileResource { 5081 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 5082 @property string contentType() const; 5083 /// the data 5084 immutable(ubyte)[] getData() const; 5085 /++ 5086 filename, return null if none 5087 5088 History: 5089 Added December 25, 2020 5090 +/ 5091 @property string filename() const; 5092 } 5093 5094 5095 5096 5097 ///. 5098 /// Group: bonus_functionality 5099 enum NodeType { Text = 3 } 5100 5101 5102 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 5103 /// Group: core_functionality 5104 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 5105 in {} 5106 out(ret) { assert(ret !is null); } 5107 do { 5108 auto ret = cast(T) e; 5109 if(ret is null) 5110 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 5111 return ret; 5112 } 5113 5114 5115 ///. 5116 /// Group: core_functionality 5117 class DocumentFragment : Element { 5118 ///. 5119 this(Document _parentDocument) { 5120 tagName = "#fragment"; 5121 super(_parentDocument); 5122 } 5123 5124 /++ 5125 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 5126 5127 Since: March 29, 2018 (or git tagged v2.1.0) 5128 +/ 5129 this(Html html) { 5130 this(null); 5131 5132 this.innerHTML = html.source; 5133 } 5134 5135 ///. 5136 override string writeToAppender(Appender!string where = appender!string()) const { 5137 return this.innerHTML(where); 5138 } 5139 5140 override string toPrettyStringImpl(bool insertComments, int indentationLevel, string indentWith) const { 5141 string s; 5142 foreach(child; children) 5143 s ~= child.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 5144 return s; 5145 } 5146 5147 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 5148 /* 5149 override inout(Element) parentNode() inout { 5150 return children.length ? children[0].parentNode : null; 5151 } 5152 */ 5153 /+ 5154 override Element parentNode(Element p) { 5155 this.parentNode = p; 5156 foreach(child; children) 5157 child.parentNode = p; 5158 return p; 5159 } 5160 +/ 5161 } 5162 5163 /// Given text, encode all html entities on it - &, <, >, and ". This function also 5164 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 5165 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 5166 /// 5167 /// The output parameter can be given to append to an existing buffer. You don't have to 5168 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 5169 /// Group: core_functionality 5170 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 5171 // if there's no entities, we can save a lot of time by not bothering with the 5172 // decoding loop. This check cuts the net toString time by better than half in my test. 5173 // let me know if it made your tests worse though, since if you use an entity in just about 5174 // every location, the check will add time... but I suspect the average experience is like mine 5175 // since the check gives up as soon as it can anyway. 5176 5177 bool shortcut = true; 5178 foreach(char c; data) { 5179 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 5180 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 5181 shortcut = false; // there's actual work to be done 5182 break; 5183 } 5184 } 5185 5186 if(shortcut) { 5187 output.put(data); 5188 return data; 5189 } 5190 5191 auto start = output.data.length; 5192 5193 output.reserve(data.length + 64); // grab some extra space for the encoded entities 5194 5195 foreach(dchar d; data) { 5196 if(d == '&') 5197 output.put("&"); 5198 else if (d == '<') 5199 output.put("<"); 5200 else if (d == '>') 5201 output.put(">"); 5202 else if (d == '\"') 5203 output.put("""); 5204 // else if (d == '\'') 5205 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 5206 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 5207 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 5208 // idk about apostrophes though. Might be worth it, might not. 5209 else if (!encodeNonAscii || (d < 128 && d > 0)) 5210 output.put(d); 5211 else 5212 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 5213 } 5214 5215 //assert(output !is null); // this fails on empty attributes..... 5216 return output.data[start .. $]; 5217 5218 // data = data.replace("\u00a0", " "); 5219 } 5220 5221 /// An alias for htmlEntitiesEncode; it works for xml too 5222 /// Group: core_functionality 5223 string xmlEntitiesEncode(string data) { 5224 return htmlEntitiesEncode(data); 5225 } 5226 5227 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 5228 /// Group: core_functionality 5229 dchar parseEntity(in dchar[] entity) { 5230 5231 char[128] buffer; 5232 int bpos; 5233 foreach(char c; entity[1 .. $-1]) 5234 buffer[bpos++] = c; 5235 char[] entityAsString = buffer[0 .. bpos]; 5236 5237 int min = 0; 5238 int max = cast(int) availableEntities.length; 5239 5240 keep_looking: 5241 if(min + 1 < max) { 5242 int spot = (max - min) / 2 + min; 5243 if(availableEntities[spot] == entityAsString) { 5244 return availableEntitiesValues[spot]; 5245 } else if(entityAsString < availableEntities[spot]) { 5246 max = spot; 5247 goto keep_looking; 5248 } else { 5249 min = spot; 5250 goto keep_looking; 5251 } 5252 } 5253 5254 switch(entity[1..$-1]) { 5255 case "quot": 5256 return '"'; 5257 case "apos": 5258 return '\''; 5259 case "lt": 5260 return '<'; 5261 case "gt": 5262 return '>'; 5263 case "amp": 5264 return '&'; 5265 // the next are html rather than xml 5266 5267 // and handling numeric entities 5268 default: 5269 if(entity[1] == '#') { 5270 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5271 auto hex = entity[3..$-1]; 5272 5273 auto p = intFromHex(to!string(hex).toLower()); 5274 return cast(dchar) p; 5275 } else { 5276 auto decimal = entity[2..$-1]; 5277 5278 // dealing with broken html entities 5279 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5280 decimal = decimal[1 .. $]; 5281 5282 while(decimal.length && (decimal[$-1] < '0' || decimal[$-1] > '9')) 5283 decimal = decimal[0 .. $ - 1]; 5284 5285 if(decimal.length == 0) 5286 return ' '; // this is really broken html 5287 // done with dealing with broken stuff 5288 5289 auto p = std.conv.to!int(decimal); 5290 return cast(dchar) p; 5291 } 5292 } else 5293 return '\ufffd'; // replacement character diamond thing 5294 } 5295 5296 assert(0); 5297 } 5298 5299 unittest { 5300 // not in the binary search 5301 assert(parseEntity("""d) == '"'); 5302 5303 // numeric value 5304 assert(parseEntity("Դ") == '\u0534'); 5305 5306 // not found at all 5307 assert(parseEntity("&asdasdasd;"d) == '\ufffd'); 5308 5309 // random values in the bin search 5310 assert(parseEntity("	"d) == '\t'); 5311 assert(parseEntity("»"d) == '\»'); 5312 5313 // near the middle and edges of the bin search 5314 assert(parseEntity("𝒶"d) == '\U0001d4b6'); 5315 assert(parseEntity("*"d) == '\u002a'); 5316 assert(parseEntity("Æ"d) == '\u00c6'); 5317 assert(parseEntity("‌"d) == '\u200c'); 5318 } 5319 5320 import std.utf; 5321 import std.stdio; 5322 5323 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5324 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5325 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5326 /// Group: core_functionality 5327 string htmlEntitiesDecode(string data, bool strict = false) { 5328 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5329 if(data.indexOf("&") == -1) // all html entities begin with & 5330 return data; // if there are no entities in here, we can return the original slice and save some time 5331 5332 char[] a; // this seems to do a *better* job than appender! 5333 5334 char[4] buffer; 5335 5336 bool tryingEntity = false; 5337 bool tryingNumericEntity = false; 5338 bool tryingHexEntity = false; 5339 dchar[16] entityBeingTried; 5340 int entityBeingTriedLength = 0; 5341 int entityAttemptIndex = 0; 5342 5343 foreach(dchar ch; data) { 5344 if(tryingEntity) { 5345 entityAttemptIndex++; 5346 entityBeingTried[entityBeingTriedLength++] = ch; 5347 5348 if(entityBeingTriedLength == 2 && ch == '#') { 5349 tryingNumericEntity = true; 5350 continue; 5351 } else if(tryingNumericEntity && entityBeingTriedLength == 3 && ch == 'x') { 5352 tryingHexEntity = true; 5353 continue; 5354 } 5355 5356 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5357 if(ch == '&') { 5358 if(strict) 5359 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5360 5361 // if not strict, let's try to parse both. 5362 5363 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") { 5364 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5365 } else { 5366 auto ch2 = parseEntity(entityBeingTried[0 .. entityBeingTriedLength]); 5367 if(ch2 == '\ufffd') { // either someone put this in intentionally (lol) or we failed to get it 5368 // but either way, just abort and keep the plain text 5369 foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now 5370 a ~= c; 5371 } else { 5372 a ~= buffer[0.. std.utf.encode(buffer, ch2)]; 5373 } 5374 } 5375 5376 // tryingEntity is still true 5377 goto new_entity; 5378 } else 5379 if(ch == ';') { 5380 tryingEntity = false; 5381 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5382 } else if(ch == ' ') { 5383 // e.g. you & i 5384 if(strict) 5385 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5386 else { 5387 tryingEntity = false; 5388 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]); 5389 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5390 } 5391 } else { 5392 if(tryingNumericEntity) { 5393 if(ch < '0' || ch > '9') { 5394 if(tryingHexEntity) { 5395 if(ch < 'A') 5396 goto trouble; 5397 if(ch > 'Z' && ch < 'a') 5398 goto trouble; 5399 if(ch > 'z') 5400 goto trouble; 5401 } else { 5402 trouble: 5403 if(strict) 5404 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5405 tryingEntity = false; 5406 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5407 a ~= ch; 5408 continue; 5409 } 5410 } 5411 } 5412 5413 5414 if(entityAttemptIndex >= 9) { 5415 done: 5416 if(strict) 5417 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5418 else { 5419 tryingEntity = false; 5420 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5421 } 5422 } 5423 } 5424 } else { 5425 if(ch == '&') { 5426 new_entity: 5427 tryingEntity = true; 5428 tryingNumericEntity = false; 5429 tryingHexEntity = false; 5430 entityBeingTriedLength = 0; 5431 entityBeingTried[entityBeingTriedLength++] = ch; 5432 entityAttemptIndex = 0; 5433 } else { 5434 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5435 } 5436 } 5437 } 5438 5439 if(tryingEntity) { 5440 if(strict) 5441 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5442 5443 // otherwise, let's try to recover, at least so we don't drop any data 5444 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5445 // FIXME: what if we have "cool &"? should we try to parse it? 5446 } 5447 5448 return cast(string) a; // assumeUnique is actually kinda slow, lol 5449 } 5450 5451 unittest { 5452 // error recovery 5453 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // unterminated turned back to thing 5454 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // semi-terminated... parse and carry on (is this really sane?) 5455 assert(htmlEntitiesDecode("loc=en_us&tracknum=111") == "loc=en_us&tracknum=111"); // a bit of both, seen in a real life email 5456 assert(htmlEntitiesDecode("& test") == "& test"); // unterminated, just abort 5457 5458 // in strict mode all of these should fail 5459 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5460 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5461 try { assert(htmlEntitiesDecode("loc=en_us&tracknum=111", true) == "<&foo"); assert(0); } catch(Exception e) { } 5462 try { assert(htmlEntitiesDecode("& test", true) == "& test"); assert(0); } catch(Exception e) { } 5463 5464 // correct cases that should pass the same in strict or loose mode 5465 foreach(strict; [false, true]) { 5466 assert(htmlEntitiesDecode("&hello» win", strict) == "&hello\» win"); 5467 } 5468 } 5469 5470 /// Group: implementations 5471 abstract class SpecialElement : Element { 5472 this(Document _parentDocument) { 5473 super(_parentDocument); 5474 } 5475 5476 ///. 5477 override Element appendChild(Element e) { 5478 assert(0, "Cannot append to a special node"); 5479 } 5480 5481 ///. 5482 @property override int nodeType() const { 5483 return 100; 5484 } 5485 } 5486 5487 ///. 5488 /// Group: implementations 5489 class RawSource : SpecialElement { 5490 ///. 5491 this(Document _parentDocument, string s) { 5492 super(_parentDocument); 5493 source = s; 5494 tagName = "#raw"; 5495 } 5496 5497 ///. 5498 override string nodeValue() const { 5499 return this.toString(); 5500 } 5501 5502 ///. 5503 override string writeToAppender(Appender!string where = appender!string()) const { 5504 where.put(source); 5505 return source; 5506 } 5507 5508 override string toPrettyStringImpl(bool, int, string) const { 5509 return source; 5510 } 5511 5512 5513 override RawSource cloneNode(bool deep) { 5514 return new RawSource(parentDocument, source); 5515 } 5516 5517 ///. 5518 string source; 5519 } 5520 5521 /// Group: implementations 5522 abstract class ServerSideCode : SpecialElement { 5523 this(Document _parentDocument, string type) { 5524 super(_parentDocument); 5525 tagName = "#" ~ type; 5526 } 5527 5528 ///. 5529 override string nodeValue() const { 5530 return this.source; 5531 } 5532 5533 ///. 5534 override string writeToAppender(Appender!string where = appender!string()) const { 5535 auto start = where.data.length; 5536 where.put("<"); 5537 where.put(source); 5538 where.put(">"); 5539 return where.data[start .. $]; 5540 } 5541 5542 override string toPrettyStringImpl(bool, int, string) const { 5543 return "<" ~ source ~ ">"; 5544 } 5545 5546 ///. 5547 string source; 5548 } 5549 5550 ///. 5551 /// Group: implementations 5552 class PhpCode : ServerSideCode { 5553 ///. 5554 this(Document _parentDocument, string s) { 5555 super(_parentDocument, "php"); 5556 source = s; 5557 } 5558 5559 override PhpCode cloneNode(bool deep) { 5560 return new PhpCode(parentDocument, source); 5561 } 5562 } 5563 5564 ///. 5565 /// Group: implementations 5566 class AspCode : ServerSideCode { 5567 ///. 5568 this(Document _parentDocument, string s) { 5569 super(_parentDocument, "asp"); 5570 source = s; 5571 } 5572 5573 override AspCode cloneNode(bool deep) { 5574 return new AspCode(parentDocument, source); 5575 } 5576 } 5577 5578 ///. 5579 /// Group: implementations 5580 class BangInstruction : SpecialElement { 5581 ///. 5582 this(Document _parentDocument, string s) { 5583 super(_parentDocument); 5584 source = s; 5585 tagName = "#bpi"; 5586 } 5587 5588 ///. 5589 override string nodeValue() const { 5590 return this.source; 5591 } 5592 5593 override BangInstruction cloneNode(bool deep) { 5594 return new BangInstruction(parentDocument, source); 5595 } 5596 5597 ///. 5598 override string writeToAppender(Appender!string where = appender!string()) const { 5599 auto start = where.data.length; 5600 where.put("<!"); 5601 where.put(source); 5602 where.put(">"); 5603 return where.data[start .. $]; 5604 } 5605 5606 override string toPrettyStringImpl(bool, int, string) const { 5607 string s; 5608 s ~= "<!"; 5609 s ~= source; 5610 s ~= ">"; 5611 return s; 5612 } 5613 5614 ///. 5615 string source; 5616 } 5617 5618 ///. 5619 /// Group: implementations 5620 class QuestionInstruction : SpecialElement { 5621 ///. 5622 this(Document _parentDocument, string s) { 5623 super(_parentDocument); 5624 source = s; 5625 tagName = "#qpi"; 5626 } 5627 5628 override QuestionInstruction cloneNode(bool deep) { 5629 return new QuestionInstruction(parentDocument, source); 5630 } 5631 5632 ///. 5633 override string nodeValue() const { 5634 return this.source; 5635 } 5636 5637 ///. 5638 override string writeToAppender(Appender!string where = appender!string()) const { 5639 auto start = where.data.length; 5640 where.put("<"); 5641 where.put(source); 5642 where.put(">"); 5643 return where.data[start .. $]; 5644 } 5645 5646 override string toPrettyStringImpl(bool, int, string) const { 5647 string s; 5648 s ~= "<"; 5649 s ~= source; 5650 s ~= ">"; 5651 return s; 5652 } 5653 5654 5655 ///. 5656 string source; 5657 } 5658 5659 ///. 5660 /// Group: implementations 5661 class HtmlComment : SpecialElement { 5662 ///. 5663 this(Document _parentDocument, string s) { 5664 super(_parentDocument); 5665 source = s; 5666 tagName = "#comment"; 5667 } 5668 5669 override HtmlComment cloneNode(bool deep) { 5670 return new HtmlComment(parentDocument, source); 5671 } 5672 5673 ///. 5674 override string nodeValue() const { 5675 return this.source; 5676 } 5677 5678 ///. 5679 override string writeToAppender(Appender!string where = appender!string()) const { 5680 auto start = where.data.length; 5681 where.put("<!--"); 5682 where.put(source); 5683 where.put("-->"); 5684 return where.data[start .. $]; 5685 } 5686 5687 override string toPrettyStringImpl(bool, int, string) const { 5688 string s; 5689 s ~= "<!--"; 5690 s ~= source; 5691 s ~= "-->"; 5692 return s; 5693 } 5694 5695 5696 ///. 5697 string source; 5698 } 5699 5700 5701 5702 5703 ///. 5704 /// Group: implementations 5705 class TextNode : Element { 5706 public: 5707 ///. 5708 this(Document _parentDocument, string e) { 5709 super(_parentDocument); 5710 contents = e; 5711 tagName = "#text"; 5712 } 5713 5714 /// 5715 this(string e) { 5716 this(null, e); 5717 } 5718 5719 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5720 5721 ///. 5722 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5723 auto e = new TextNode(_parentDocument, ""); 5724 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5725 return e; 5726 } 5727 5728 ///. 5729 override @property TextNode cloneNode(bool deep) { 5730 auto n = new TextNode(parentDocument, contents); 5731 return n; 5732 } 5733 5734 ///. 5735 override string nodeValue() const { 5736 return this.contents; //toString(); 5737 } 5738 5739 ///. 5740 @property override int nodeType() const { 5741 return NodeType.Text; 5742 } 5743 5744 ///. 5745 override string writeToAppender(Appender!string where = appender!string()) const { 5746 string s; 5747 if(contents.length) 5748 s = htmlEntitiesEncode(contents, where); 5749 else 5750 s = ""; 5751 5752 assert(s !is null); 5753 return s; 5754 } 5755 5756 override string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5757 string s; 5758 5759 string contents = this.contents; 5760 // we will first collapse the whitespace per html 5761 // sort of. note this can break stuff yo!!!! 5762 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5763 string n = ""; 5764 bool lastWasWhitespace = indentationLevel > 0; 5765 foreach(char c; contents) { 5766 if(c.isSimpleWhite) { 5767 if(!lastWasWhitespace) 5768 n ~= ' '; 5769 lastWasWhitespace = true; 5770 } else { 5771 n ~= c; 5772 lastWasWhitespace = false; 5773 } 5774 } 5775 5776 contents = n; 5777 } 5778 5779 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5780 contents = contents.strip; 5781 } 5782 5783 auto e = htmlEntitiesEncode(contents); 5784 import std.algorithm.iteration : splitter; 5785 bool first = true; 5786 foreach(line; splitter(e, "\n")) { 5787 if(first) { 5788 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5789 first = false; 5790 } else { 5791 s ~= "\n"; 5792 if(insertComments) 5793 s ~= "<!--"; 5794 foreach(i; 0 .. indentationLevel) 5795 s ~= "\t"; 5796 if(insertComments) 5797 s ~= "-->"; 5798 } 5799 s ~= line.stripRight; 5800 } 5801 return s; 5802 } 5803 5804 ///. 5805 override Element appendChild(Element e) { 5806 assert(0, "Cannot append to a text node"); 5807 } 5808 5809 ///. 5810 string contents; 5811 // alias contents content; // I just mistype this a lot, 5812 } 5813 5814 /** 5815 There are subclasses of Element offering improved helper 5816 functions for the element in HTML. 5817 */ 5818 5819 /++ 5820 Represents a HTML link. This provides some convenience methods for manipulating query strings, but otherwise is sthe same Element interface. 5821 5822 Please note this object may not be used for all `<a>` tags. 5823 +/ 5824 /// Group: implementations 5825 class Link : Element { 5826 5827 /++ 5828 Constructs `<a href="that href">that text</a>`. 5829 +/ 5830 this(string href, string text) { 5831 super("a"); 5832 setAttribute("href", href); 5833 innerText = text; 5834 } 5835 5836 /// ditto 5837 this(Document _parentDocument) { 5838 super(_parentDocument); 5839 this.tagName = "a"; 5840 } 5841 5842 /+ 5843 /// Returns everything in the href EXCEPT the query string 5844 @property string targetSansQuery() { 5845 5846 } 5847 5848 ///. 5849 @property string domainName() { 5850 5851 } 5852 5853 ///. 5854 @property string path 5855 +/ 5856 /// This gets a variable from the URL's query string. 5857 string getValue(string name) { 5858 auto vars = variablesHash(); 5859 if(name in vars) 5860 return vars[name]; 5861 return null; 5862 } 5863 5864 private string[string] variablesHash() { 5865 string href = getAttribute("href"); 5866 if(href is null) 5867 return null; 5868 5869 auto ques = href.indexOf("?"); 5870 string str = ""; 5871 if(ques != -1) { 5872 str = href[ques+1..$]; 5873 5874 auto fragment = str.indexOf("#"); 5875 if(fragment != -1) 5876 str = str[0..fragment]; 5877 } 5878 5879 string[] variables = str.split("&"); 5880 5881 string[string] hash; 5882 5883 foreach(var; variables) { 5884 auto index = var.indexOf("="); 5885 if(index == -1) 5886 hash[var] = ""; 5887 else { 5888 hash[decodeUriComponent(var[0..index])] = decodeUriComponent(var[index + 1 .. $]); 5889 } 5890 } 5891 5892 return hash; 5893 } 5894 5895 /// Replaces all the stuff after a ? in the link at once with the given assoc array values. 5896 /*private*/ void updateQueryString(string[string] vars) { 5897 string href = getAttribute("href"); 5898 5899 auto question = href.indexOf("?"); 5900 if(question != -1) 5901 href = href[0..question]; 5902 5903 string frag = ""; 5904 auto fragment = href.indexOf("#"); 5905 if(fragment != -1) { 5906 frag = href[fragment..$]; 5907 href = href[0..fragment]; 5908 } 5909 5910 string query = "?"; 5911 bool first = true; 5912 foreach(name, value; vars) { 5913 if(!first) 5914 query ~= "&"; 5915 else 5916 first = false; 5917 5918 query ~= encodeUriComponent(name); 5919 if(value.length) 5920 query ~= "=" ~ encodeUriComponent(value); 5921 } 5922 5923 if(query != "?") 5924 href ~= query; 5925 5926 href ~= frag; 5927 5928 setAttribute("href", href); 5929 } 5930 5931 /// Sets or adds the variable with the given name to the given value 5932 /// It automatically URI encodes the values and takes care of the ? and &. 5933 override void setValue(string name, string variable) { 5934 auto vars = variablesHash(); 5935 vars[name] = variable; 5936 5937 updateQueryString(vars); 5938 } 5939 5940 override void setValue(string name, string[] variable) { 5941 assert(0, "not implemented FIXME"); 5942 } 5943 5944 /// Removes the given variable from the query string 5945 void removeValue(string name) { 5946 auto vars = variablesHash(); 5947 vars.remove(name); 5948 5949 updateQueryString(vars); 5950 } 5951 5952 /* 5953 ///. 5954 override string toString() { 5955 5956 } 5957 5958 ///. 5959 override string getAttribute(string name) { 5960 if(name == "href") { 5961 5962 } else 5963 return super.getAttribute(name); 5964 } 5965 */ 5966 } 5967 5968 /++ 5969 Represents a HTML form. This slightly specializes Element to add a few more convenience methods for adding and extracting form data. 5970 5971 Please note this object may not be used for all `<form>` tags. 5972 +/ 5973 /// Group: implementations 5974 class Form : Element { 5975 5976 ///. 5977 this(Document _parentDocument) { 5978 super(_parentDocument); 5979 tagName = "form"; 5980 } 5981 5982 /// Overrides of the base class implementations that more confirm to *my* conventions when writing form html. 5983 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 5984 auto t = this.querySelector("fieldset div"); 5985 if(t is null) 5986 return super.addField(label, name, type, fieldOptions); 5987 else 5988 return t.addField(label, name, type, fieldOptions); 5989 } 5990 5991 /// ditto 5992 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 5993 auto type = "text"; 5994 auto t = this.querySelector("fieldset div"); 5995 if(t is null) 5996 return super.addField(label, name, type, fieldOptions); 5997 else 5998 return t.addField(label, name, type, fieldOptions); 5999 } 6000 6001 /// ditto 6002 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6003 auto t = this.querySelector("fieldset div"); 6004 if(t is null) 6005 return super.addField(label, name, options, fieldOptions); 6006 else 6007 return t.addField(label, name, options, fieldOptions); 6008 } 6009 6010 /// ditto 6011 override void setValue(string field, string value) { 6012 setValue(field, value, true); 6013 } 6014 6015 override void setValue(string name, string[] variable) { 6016 assert(0, "not implemented FIXME"); 6017 } 6018 6019 // FIXME: doesn't handle arrays; multiple fields can have the same name 6020 6021 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6022 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6023 /// the checked/selected attribute from all, and adds it to the one matching the value. 6024 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6025 6026 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6027 /// Otherwise, it makes a new input with type=hidden to keep the value. 6028 void setValue(string field, string value, bool makeNew) { 6029 auto eles = getField(field); 6030 if(eles.length == 0) { 6031 if(makeNew) { 6032 addInput(field, value); 6033 return; 6034 } else 6035 throw new Exception("form field does not exist"); 6036 } 6037 6038 if(eles.length == 1) { 6039 auto e = eles[0]; 6040 switch(e.tagName) { 6041 default: assert(0); 6042 case "textarea": 6043 e.innerText = value; 6044 break; 6045 case "input": 6046 string type = e.getAttribute("type"); 6047 if(type is null) { 6048 e.value = value; 6049 return; 6050 } 6051 switch(type) { 6052 case "checkbox": 6053 case "radio": 6054 if(value.length && value != "false") 6055 e.setAttribute("checked", "checked"); 6056 else 6057 e.removeAttribute("checked"); 6058 break; 6059 default: 6060 e.value = value; 6061 return; 6062 } 6063 break; 6064 case "select": 6065 bool found = false; 6066 foreach(child; e.tree) { 6067 if(child.tagName != "option") 6068 continue; 6069 string val = child.getAttribute("value"); 6070 if(val is null) 6071 val = child.innerText; 6072 if(val == value) { 6073 child.setAttribute("selected", "selected"); 6074 found = true; 6075 } else 6076 child.removeAttribute("selected"); 6077 } 6078 6079 if(!found) { 6080 e.addChild("option", value) 6081 .setAttribute("selected", "selected"); 6082 } 6083 break; 6084 } 6085 } else { 6086 // assume radio boxes 6087 foreach(e; eles) { 6088 string val = e.getAttribute("value"); 6089 //if(val is null) 6090 // throw new Exception("don't know what to do with radio boxes with null value"); 6091 if(val == value) 6092 e.setAttribute("checked", "checked"); 6093 else 6094 e.removeAttribute("checked"); 6095 } 6096 } 6097 } 6098 6099 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6100 /// it makes no attempt to find and modify existing elements in the form to the new values. 6101 void addValueArray(string key, string[] arrayOfValues) { 6102 foreach(arr; arrayOfValues) 6103 addChild("input", key, arr); 6104 } 6105 6106 /// Gets the value of the field; what would be given if it submitted right now. (so 6107 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6108 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6109 string getValue(string field) { 6110 auto eles = getField(field); 6111 if(eles.length == 0) 6112 return ""; 6113 if(eles.length == 1) { 6114 auto e = eles[0]; 6115 switch(e.tagName) { 6116 default: assert(0); 6117 case "input": 6118 if(e.type == "checkbox") { 6119 if(e.checked) 6120 return e.value.length ? e.value : "checked"; 6121 return ""; 6122 } else 6123 return e.value; 6124 case "textarea": 6125 return e.innerText; 6126 case "select": 6127 foreach(child; e.tree) { 6128 if(child.tagName != "option") 6129 continue; 6130 if(child.selected) 6131 return child.value; 6132 } 6133 break; 6134 } 6135 } else { 6136 // assuming radio 6137 foreach(e; eles) { 6138 if(e.checked) 6139 return e.value; 6140 } 6141 } 6142 6143 return ""; 6144 } 6145 6146 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6147 /++ 6148 Returns the form's contents in application/x-www-form-urlencoded format. 6149 6150 Bugs: 6151 Doesn't handle repeated elements of the same name nor files. 6152 +/ 6153 string getPostableData() { 6154 bool[string] namesDone; 6155 6156 string ret; 6157 bool outputted = false; 6158 6159 foreach(e; getElementsBySelector("[name]")) { 6160 if(e.name in namesDone) 6161 continue; 6162 6163 if(outputted) 6164 ret ~= "&"; 6165 else 6166 outputted = true; 6167 6168 ret ~= encodeUriComponent(e.name) ~ "=" ~ encodeUriComponent(getValue(e.name)); 6169 6170 namesDone[e.name] = true; 6171 } 6172 6173 return ret; 6174 } 6175 6176 /// Gets the actual elements with the given name 6177 Element[] getField(string name) { 6178 Element[] ret; 6179 foreach(e; tree) { 6180 if(e.name == name) 6181 ret ~= e; 6182 } 6183 return ret; 6184 } 6185 6186 /// Grabs the <label> with the given for tag, if there is one. 6187 Element getLabel(string forId) { 6188 foreach(e; tree) 6189 if(e.tagName == "label" && e.getAttribute("for") == forId) 6190 return e; 6191 return null; 6192 } 6193 6194 /// Adds a new INPUT field to the end of the form with the given attributes. 6195 Element addInput(string name, string value, string type = "hidden") { 6196 auto e = new Element(parentDocument, "input", null, true); 6197 e.name = name; 6198 e.value = value; 6199 e.type = type; 6200 6201 appendChild(e); 6202 6203 return e; 6204 } 6205 6206 /// Removes the given field from the form. It finds the element and knocks it right out. 6207 void removeField(string name) { 6208 foreach(e; getField(name)) 6209 e.parentNode.removeChild(e); 6210 } 6211 6212 /+ 6213 /// Returns all form members. 6214 @property Element[] elements() { 6215 6216 } 6217 6218 ///. 6219 string opDispatch(string name)(string v = null) 6220 // filter things that should actually be attributes on the form 6221 if( name != "method" && name != "action" && name != "enctype" 6222 && name != "style" && name != "name" && name != "id" && name != "class") 6223 { 6224 6225 } 6226 +/ 6227 /+ 6228 void submit() { 6229 // take its elements and submit them through http 6230 } 6231 +/ 6232 } 6233 6234 import std.conv; 6235 6236 /++ 6237 Represents a HTML table. Has some convenience methods for working with tabular data. 6238 +/ 6239 /// Group: implementations 6240 class Table : Element { 6241 6242 /// You can make this yourself but you'd generally get one of these object out of a html parse or [Element.make] call. 6243 this(Document _parentDocument) { 6244 super(_parentDocument); 6245 tagName = "table"; 6246 } 6247 6248 /++ 6249 Creates an element with the given type and content. The argument can be an Element, Html, or other data which is converted to text with `to!string` 6250 6251 The element is $(I not) appended to the table. 6252 +/ 6253 Element th(T)(T t) { 6254 Element e; 6255 if(parentDocument !is null) 6256 e = parentDocument.createElement("th"); 6257 else 6258 e = Element.make("th"); 6259 static if(is(T == Html)) 6260 e.innerHTML = t; 6261 else static if(is(T : Element)) 6262 e.appendChild(t); 6263 else 6264 e.innerText = to!string(t); 6265 return e; 6266 } 6267 6268 /// ditto 6269 Element td(T)(T t) { 6270 Element e; 6271 if(parentDocument !is null) 6272 e = parentDocument.createElement("td"); 6273 else 6274 e = Element.make("td"); 6275 static if(is(T == Html)) 6276 e.innerHTML = t; 6277 else static if(is(T : Element)) 6278 e.appendChild(t); 6279 else 6280 e.innerText = to!string(t); 6281 return e; 6282 } 6283 6284 /++ 6285 Passes each argument to the [th] method for `appendHeaderRow` or [td] method for the others, appends them all to the `<tbody>` element for `appendRow`, `<thead>` element for `appendHeaderRow`, or a `<tfoot>` element for `appendFooterRow`, and ensures it is appended it to the table. 6286 +/ 6287 Element appendHeaderRow(T...)(T t) { 6288 return appendRowInternal("th", "thead", t); 6289 } 6290 6291 /// ditto 6292 Element appendFooterRow(T...)(T t) { 6293 return appendRowInternal("td", "tfoot", t); 6294 } 6295 6296 /// ditto 6297 Element appendRow(T...)(T t) { 6298 return appendRowInternal("td", "tbody", t); 6299 } 6300 6301 /++ 6302 Takes each argument as a class name and calls [Element.addClass] for each element in the column associated with that index. 6303 6304 Please note this does not use the html `<col>` element. 6305 +/ 6306 void addColumnClasses(string[] classes...) { 6307 auto grid = getGrid(); 6308 foreach(row; grid) 6309 foreach(i, cl; classes) { 6310 if(cl.length) 6311 if(i < row.length) 6312 row[i].addClass(cl); 6313 } 6314 } 6315 6316 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6317 Element row = Element.make("tr"); 6318 6319 foreach(e; t) { 6320 static if(is(typeof(e) : Element)) { 6321 if(e.tagName == "td" || e.tagName == "th") 6322 row.appendChild(e); 6323 else { 6324 Element a = Element.make(innerType); 6325 6326 a.appendChild(e); 6327 6328 row.appendChild(a); 6329 } 6330 } else static if(is(typeof(e) == Html)) { 6331 Element a = Element.make(innerType); 6332 a.innerHTML = e.source; 6333 row.appendChild(a); 6334 } else static if(is(typeof(e) == Element[])) { 6335 Element a = Element.make(innerType); 6336 foreach(ele; e) 6337 a.appendChild(ele); 6338 row.appendChild(a); 6339 } else static if(is(typeof(e) == string[])) { 6340 foreach(ele; e) { 6341 Element a = Element.make(innerType); 6342 a.innerText = to!string(ele); 6343 row.appendChild(a); 6344 } 6345 } else { 6346 Element a = Element.make(innerType); 6347 a.innerText = to!string(e); 6348 row.appendChild(a); 6349 } 6350 } 6351 6352 foreach(e; children) { 6353 if(e.tagName == findType) { 6354 e.appendChild(row); 6355 return row; 6356 } 6357 } 6358 6359 // the type was not found if we are here... let's add it so it is well-formed 6360 auto lol = this.addChild(findType); 6361 lol.appendChild(row); 6362 6363 return row; 6364 } 6365 6366 /// Returns the `<caption>` element of the table, creating one if it isn't there. 6367 Element captionElement() { 6368 Element cap; 6369 foreach(c; children) { 6370 if(c.tagName == "caption") { 6371 cap = c; 6372 break; 6373 } 6374 } 6375 6376 if(cap is null) { 6377 cap = Element.make("caption"); 6378 appendChild(cap); 6379 } 6380 6381 return cap; 6382 } 6383 6384 /// Returns or sets the text inside the `<caption>` element, creating that element if it isnt' there. 6385 @property string caption() { 6386 return captionElement().innerText; 6387 } 6388 6389 /// ditto 6390 @property void caption(string text) { 6391 captionElement().innerText = text; 6392 } 6393 6394 /// Gets the logical layout of the table as a rectangular grid of 6395 /// cells. It considers rowspan and colspan. A cell with a large 6396 /// span is represented in the grid by being referenced several times. 6397 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6398 /// <tfoot> portion if you pass one. 6399 /// 6400 /// Note: the rectangular grid might include null cells. 6401 /// 6402 /// This is kinda expensive so you should call once when you want the grid, 6403 /// then do lookups on the returned array. 6404 TableCell[][] getGrid(Element tablePortition = null) 6405 in { 6406 if(tablePortition is null) 6407 assert(tablePortition is null); 6408 else { 6409 assert(tablePortition !is null); 6410 assert(tablePortition.parentNode is this); 6411 assert( 6412 tablePortition.tagName == "tbody" 6413 || 6414 tablePortition.tagName == "tfoot" 6415 || 6416 tablePortition.tagName == "thead" 6417 ); 6418 } 6419 } 6420 do { 6421 if(tablePortition is null) 6422 tablePortition = this; 6423 6424 TableCell[][] ret; 6425 6426 // FIXME: will also return rows of sub tables! 6427 auto rows = tablePortition.getElementsByTagName("tr"); 6428 ret.length = rows.length; 6429 6430 int maxLength = 0; 6431 6432 int insertCell(int row, int position, TableCell cell) { 6433 if(row >= ret.length) 6434 return position; // not supposed to happen - a rowspan is prolly too big. 6435 6436 if(position == -1) { 6437 position++; 6438 foreach(item; ret[row]) { 6439 if(item is null) 6440 break; 6441 position++; 6442 } 6443 } 6444 6445 if(position < ret[row].length) 6446 ret[row][position] = cell; 6447 else 6448 foreach(i; ret[row].length .. position + 1) { 6449 if(i == position) 6450 ret[row] ~= cell; 6451 else 6452 ret[row] ~= null; 6453 } 6454 return position; 6455 } 6456 6457 foreach(i, rowElement; rows) { 6458 auto row = cast(TableRow) rowElement; 6459 assert(row !is null); 6460 assert(i < ret.length); 6461 6462 int position = 0; 6463 foreach(cellElement; rowElement.childNodes) { 6464 auto cell = cast(TableCell) cellElement; 6465 if(cell is null) 6466 continue; 6467 6468 // FIXME: colspan == 0 or rowspan == 0 6469 // is supposed to mean fill in the rest of 6470 // the table, not skip it 6471 foreach(int j; 0 .. cell.colspan) { 6472 foreach(int k; 0 .. cell.rowspan) 6473 // if the first row, always append. 6474 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6475 position++; 6476 } 6477 } 6478 6479 if(ret[i].length > maxLength) 6480 maxLength = cast(int) ret[i].length; 6481 } 6482 6483 // want to ensure it's rectangular 6484 foreach(ref r; ret) { 6485 foreach(i; r.length .. maxLength) 6486 r ~= null; 6487 } 6488 6489 return ret; 6490 } 6491 } 6492 6493 /// Represents a table row element - a <tr> 6494 /// Group: implementations 6495 class TableRow : Element { 6496 ///. 6497 this(Document _parentDocument) { 6498 super(_parentDocument); 6499 tagName = "tr"; 6500 } 6501 6502 // FIXME: the standard says there should be a lot more in here, 6503 // but meh, I never use it and it's a pain to implement. 6504 } 6505 6506 /// Represents anything that can be a table cell - <td> or <th> html. 6507 /// Group: implementations 6508 class TableCell : Element { 6509 ///. 6510 this(Document _parentDocument, string _tagName) { 6511 super(_parentDocument, _tagName); 6512 } 6513 6514 /// Gets and sets the row/colspan attributes as integers 6515 @property int rowspan() const { 6516 int ret = 1; 6517 auto it = getAttribute("rowspan"); 6518 if(it.length) 6519 ret = to!int(it); 6520 return ret; 6521 } 6522 6523 /// ditto 6524 @property int colspan() const { 6525 int ret = 1; 6526 auto it = getAttribute("colspan"); 6527 if(it.length) 6528 ret = to!int(it); 6529 return ret; 6530 } 6531 6532 /// ditto 6533 @property int rowspan(int i) { 6534 setAttribute("rowspan", to!string(i)); 6535 return i; 6536 } 6537 6538 /// ditto 6539 @property int colspan(int i) { 6540 setAttribute("colspan", to!string(i)); 6541 return i; 6542 } 6543 6544 } 6545 6546 6547 /// This is thrown on parse errors. 6548 /// Group: implementations 6549 class MarkupException : Exception { 6550 6551 ///. 6552 this(string message, string file = __FILE__, size_t line = __LINE__) { 6553 super(message, file, line); 6554 } 6555 } 6556 6557 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6558 /// Group: implementations 6559 class ElementNotFoundException : Exception { 6560 6561 /// type == kind of element you were looking for and search == a selector describing the search. 6562 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6563 this.searchContext = searchContext; 6564 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6565 } 6566 6567 Element searchContext; 6568 } 6569 6570 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6571 /// 6572 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6573 /// Group: core_functionality 6574 struct Html { 6575 /// This string holds the actual html. Use it to retrieve the contents. 6576 string source; 6577 } 6578 6579 // for the observers 6580 enum DomMutationOperations { 6581 setAttribute, 6582 removeAttribute, 6583 appendChild, // tagname, attributes[], innerHTML 6584 insertBefore, 6585 truncateChildren, 6586 removeChild, 6587 appendHtml, 6588 replaceHtml, 6589 appendText, 6590 replaceText, 6591 replaceTextOnly 6592 } 6593 6594 // and for observers too 6595 struct DomMutationEvent { 6596 DomMutationOperations operation; 6597 Element target; 6598 Element related; // what this means differs with the operation 6599 Element related2; 6600 string relatedString; 6601 string relatedString2; 6602 } 6603 6604 6605 private immutable static string[] htmlSelfClosedElements = [ 6606 // html 4 6607 "area","base","br","col","hr","img","input","link","meta","param", 6608 6609 // html 5 6610 "embed","source","track","wbr" 6611 ]; 6612 6613 private immutable static string[] htmlRawSourceElements = [ 6614 "script", "style" 6615 ]; 6616 6617 private immutable static string[] htmlInlineElements = [ 6618 "span", "strong", "em", "b", "i", "a" 6619 ]; 6620 6621 6622 static import std.conv; 6623 6624 /// helper function for decoding html entities 6625 int intFromHex(string hex) { 6626 int place = 1; 6627 int value = 0; 6628 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6629 int v; 6630 char q = hex[a]; 6631 if( q >= '0' && q <= '9') 6632 v = q - '0'; 6633 else if (q >= 'a' && q <= 'f') 6634 v = q - 'a' + 10; 6635 else if (q >= 'A' && q <= 'F') 6636 v = q - 'A' + 10; 6637 else throw new Exception("Illegal hex character: " ~ q); 6638 6639 value += v * place; 6640 6641 place *= 16; 6642 } 6643 6644 return value; 6645 } 6646 6647 6648 // CSS selector handling 6649 6650 // EXTENSIONS 6651 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6652 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6653 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6654 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6655 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6656 6657 6658 6659 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6660 // That might be useful to implement, though I do have parent selectors too. 6661 6662 ///. 6663 static immutable string[] selectorTokens = [ 6664 // It is important that the 2 character possibilities go first here for accurate lexing 6665 "~=", "*=", "|=", "^=", "$=", "!=", 6666 "::", ">>", 6667 "<<", // my any-parent extension (reciprocal of whitespace) 6668 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6669 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6670 ]; // other is white space or a name. 6671 6672 ///. 6673 sizediff_t idToken(string str, sizediff_t position) { 6674 sizediff_t tid = -1; 6675 char c = str[position]; 6676 foreach(a, token; selectorTokens) 6677 6678 if(c == token[0]) { 6679 if(token.length > 1) { 6680 if(position + 1 >= str.length || str[position+1] != token[1]) 6681 continue; // not this token 6682 } 6683 tid = a; 6684 break; 6685 } 6686 return tid; 6687 } 6688 6689 /// Parts of the CSS selector implementation 6690 // look, ma, no phobos! 6691 // new lexer by ketmar 6692 string[] lexSelector (string selstr) { 6693 6694 static sizediff_t idToken (string str, size_t stpos) { 6695 char c = str[stpos]; 6696 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6697 if (c == token[0]) { 6698 if (token.length > 1) { 6699 assert(token.length == 2, token); // we don't have 3-char tokens yet 6700 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6701 } 6702 return tidx; 6703 } 6704 } 6705 return -1; 6706 } 6707 6708 // skip spaces and comments 6709 static string removeLeadingBlanks (string str) { 6710 size_t curpos = 0; 6711 while (curpos < str.length) { 6712 immutable char ch = str[curpos]; 6713 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6714 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6715 // comment 6716 curpos += 2; 6717 while (curpos < str.length) { 6718 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6719 curpos += 2; 6720 break; 6721 } 6722 ++curpos; 6723 } 6724 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6725 ++curpos; 6726 6727 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6728 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6729 // That is not the same as ".foo.bar". If the space is stripped, important 6730 // information is lost, despite the tokens being separatable anyway. 6731 // 6732 // The parser really needs to be aware of the presence of a space. 6733 } else { 6734 break; 6735 } 6736 } 6737 return str[curpos..$]; 6738 } 6739 6740 static bool isBlankAt() (string str, size_t pos) { 6741 // we should consider unicode spaces too, but... unicode sux anyway. 6742 return 6743 (pos < str.length && // in string 6744 (str[pos] <= 32 || // space 6745 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6746 } 6747 6748 string[] tokens; 6749 // lexx it! 6750 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6751 if(selstr[0] == '\"' || selstr[0] == '\'') { 6752 auto end = selstr[0]; 6753 auto pos = 1; 6754 bool escaping; 6755 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6756 if(escaping) 6757 escaping = false; 6758 else if(selstr[pos] == '\\') 6759 escaping = true; 6760 pos++; 6761 } 6762 6763 // FIXME: do better unescaping 6764 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6765 if(pos+1 >= selstr.length) 6766 assert(0, selstr); 6767 selstr = selstr[pos + 1.. $]; 6768 continue; 6769 } 6770 6771 6772 // no tokens starts with escape 6773 immutable tid = idToken(selstr, 0); 6774 if (tid >= 0) { 6775 // special token 6776 tokens ~= selectorTokens[tid]; // it's funnier this way 6777 selstr = selstr[selectorTokens[tid].length..$]; 6778 continue; 6779 } 6780 // from start to space or special token 6781 size_t escapePos = size_t.max; 6782 size_t curpos = 0; // i can has chizburger^w escape at the start 6783 while (curpos < selstr.length) { 6784 if (selstr[curpos] == '\\') { 6785 // this is escape, just skip it and next char 6786 if (escapePos == size_t.max) escapePos = curpos; 6787 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6788 } else { 6789 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6790 ++curpos; 6791 } 6792 } 6793 // identifier 6794 if (escapePos != size_t.max) { 6795 // i hate it when it happens 6796 string id = selstr[0..escapePos]; 6797 while (escapePos < curpos) { 6798 if (curpos-escapePos < 2) break; 6799 id ~= selstr[escapePos+1]; // escaped char 6800 escapePos += 2; 6801 immutable stp = escapePos; 6802 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6803 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6804 } 6805 if (id.length > 0) tokens ~= id; 6806 } else { 6807 tokens ~= selstr[0..curpos]; 6808 } 6809 selstr = selstr[curpos..$]; 6810 } 6811 return tokens; 6812 } 6813 version(unittest_domd_lexer) unittest { 6814 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6815 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6816 assert(lexSelector(r" < <") == ["<", "<"]); 6817 assert(lexSelector(r" <<") == ["<<"]); 6818 assert(lexSelector(r" <</") == ["<<", "/"]); 6819 assert(lexSelector(r" <</*") == ["<<"]); 6820 assert(lexSelector(r" <\</*") == ["<", "<"]); 6821 assert(lexSelector(r"heh\") == ["heh"]); 6822 assert(lexSelector(r"alice \") == ["alice"]); 6823 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6824 } 6825 6826 /// ditto 6827 struct SelectorPart { 6828 string tagNameFilter; ///. 6829 string[] attributesPresent; /// [attr] 6830 string[2][] attributesEqual; /// [attr=value] 6831 string[2][] attributesStartsWith; /// [attr^=value] 6832 string[2][] attributesEndsWith; /// [attr$=value] 6833 // split it on space, then match to these 6834 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6835 // split it on dash, then match to these 6836 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6837 string[2][] attributesInclude; /// [attr*=value] 6838 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6839 6840 string[] hasSelectors; /// :has(this) 6841 string[] notSelectors; /// :not(this) 6842 6843 string[] isSelectors; /// :is(this) 6844 string[] whereSelectors; /// :where(this) 6845 6846 ParsedNth[] nthOfType; /// . 6847 ParsedNth[] nthLastOfType; /// . 6848 ParsedNth[] nthChild; /// . 6849 6850 bool firstChild; ///. 6851 bool lastChild; ///. 6852 6853 bool firstOfType; /// . 6854 bool lastOfType; /// . 6855 6856 bool emptyElement; ///. 6857 bool whitespaceOnly; /// 6858 bool oddChild; ///. 6859 bool evenChild; ///. 6860 6861 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6862 6863 bool rootElement; ///. 6864 6865 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6866 6867 bool isCleanSlateExceptSeparation() { 6868 auto cp = this; 6869 cp.separation = -1; 6870 return cp is SelectorPart.init; 6871 } 6872 6873 ///. 6874 string toString() { 6875 string ret; 6876 switch(separation) { 6877 default: assert(0); 6878 case -1: break; 6879 case 0: ret ~= " "; break; 6880 case 1: ret ~= " > "; break; 6881 case 2: ret ~= " + "; break; 6882 case 3: ret ~= " ~ "; break; 6883 case 4: ret ~= " < "; break; 6884 } 6885 ret ~= tagNameFilter; 6886 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6887 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6888 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6889 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6890 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6891 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6892 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6893 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6894 6895 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6896 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6897 6898 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 6899 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 6900 6901 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6902 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6903 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6904 6905 if(firstChild) ret ~= ":first-child"; 6906 if(lastChild) ret ~= ":last-child"; 6907 if(firstOfType) ret ~= ":first-of-type"; 6908 if(lastOfType) ret ~= ":last-of-type"; 6909 if(emptyElement) ret ~= ":empty"; 6910 if(whitespaceOnly) ret ~= ":whitespace-only"; 6911 if(oddChild) ret ~= ":odd-child"; 6912 if(evenChild) ret ~= ":even-child"; 6913 if(rootElement) ret ~= ":root"; 6914 if(scopeElement) ret ~= ":scope"; 6915 6916 return ret; 6917 } 6918 6919 // USEFUL 6920 /// Returns true if the given element matches this part 6921 bool matchElement(Element e, Element scopeElementNow = null) { 6922 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 6923 // Each individual call is reasonably fast already, but it adds up. 6924 if(e is null) return false; 6925 if(e.nodeType != 1) return false; 6926 6927 if(tagNameFilter != "" && tagNameFilter != "*") 6928 if(e.tagName != tagNameFilter) 6929 return false; 6930 if(firstChild) { 6931 if(e.parentNode is null) 6932 return false; 6933 if(e.parentNode.childElements[0] !is e) 6934 return false; 6935 } 6936 if(lastChild) { 6937 if(e.parentNode is null) 6938 return false; 6939 auto ce = e.parentNode.childElements; 6940 if(ce[$-1] !is e) 6941 return false; 6942 } 6943 if(firstOfType) { 6944 if(e.parentNode is null) 6945 return false; 6946 auto ce = e.parentNode.childElements; 6947 foreach(c; ce) { 6948 if(c.tagName == e.tagName) { 6949 if(c is e) 6950 return true; 6951 else 6952 return false; 6953 } 6954 } 6955 } 6956 if(lastOfType) { 6957 if(e.parentNode is null) 6958 return false; 6959 auto ce = e.parentNode.childElements; 6960 foreach_reverse(c; ce) { 6961 if(c.tagName == e.tagName) { 6962 if(c is e) 6963 return true; 6964 else 6965 return false; 6966 } 6967 } 6968 } 6969 if(scopeElement) { 6970 if(e !is scopeElementNow) 6971 return false; 6972 } 6973 if(emptyElement) { 6974 if(e.isEmpty()) 6975 return false; 6976 } 6977 if(whitespaceOnly) { 6978 if(e.innerText.strip.length) 6979 return false; 6980 } 6981 if(rootElement) { 6982 if(e.parentNode !is null) 6983 return false; 6984 } 6985 if(oddChild || evenChild) { 6986 if(e.parentNode is null) 6987 return false; 6988 foreach(i, child; e.parentNode.childElements) { 6989 if(child is e) { 6990 if(oddChild && !(i&1)) 6991 return false; 6992 if(evenChild && (i&1)) 6993 return false; 6994 break; 6995 } 6996 } 6997 } 6998 6999 bool matchWithSeparator(string attr, string value, string separator) { 7000 foreach(s; attr.split(separator)) 7001 if(s == value) 7002 return true; 7003 return false; 7004 } 7005 7006 foreach(a; attributesPresent) 7007 if(a !in e.attributes) 7008 return false; 7009 foreach(a; attributesEqual) 7010 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7011 return false; 7012 foreach(a; attributesNotEqual) 7013 // FIXME: maybe it should say null counts... this just bit me. 7014 // I did [attr][attr!=value] to work around. 7015 // 7016 // if it's null, it's not equal, right? 7017 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7018 if(e.getAttribute(a[0]) == a[1]) 7019 return false; 7020 foreach(a; attributesInclude) 7021 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7022 return false; 7023 foreach(a; attributesStartsWith) 7024 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7025 return false; 7026 foreach(a; attributesEndsWith) 7027 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7028 return false; 7029 foreach(a; attributesIncludesSeparatedBySpaces) 7030 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7031 return false; 7032 foreach(a; attributesIncludesSeparatedByDashes) 7033 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7034 return false; 7035 foreach(a; hasSelectors) { 7036 if(e.querySelector(a) is null) 7037 return false; 7038 } 7039 foreach(a; notSelectors) { 7040 auto sel = Selector(a); 7041 if(sel.matchesElement(e)) 7042 return false; 7043 } 7044 foreach(a; isSelectors) { 7045 auto sel = Selector(a); 7046 if(!sel.matchesElement(e)) 7047 return false; 7048 } 7049 foreach(a; whereSelectors) { 7050 auto sel = Selector(a); 7051 if(!sel.matchesElement(e)) 7052 return false; 7053 } 7054 7055 foreach(a; nthChild) { 7056 if(e.parentNode is null) 7057 return false; 7058 7059 auto among = e.parentNode.childElements; 7060 7061 if(!a.solvesFor(among, e)) 7062 return false; 7063 } 7064 foreach(a; nthOfType) { 7065 if(e.parentNode is null) 7066 return false; 7067 7068 auto among = e.parentNode.childElements(e.tagName); 7069 7070 if(!a.solvesFor(among, e)) 7071 return false; 7072 } 7073 foreach(a; nthLastOfType) { 7074 if(e.parentNode is null) 7075 return false; 7076 7077 auto among = retro(e.parentNode.childElements(e.tagName)); 7078 7079 if(!a.solvesFor(among, e)) 7080 return false; 7081 } 7082 7083 return true; 7084 } 7085 } 7086 7087 struct ParsedNth { 7088 int multiplier; 7089 int adder; 7090 7091 string of; 7092 7093 this(string text) { 7094 auto original = text; 7095 consumeWhitespace(text); 7096 if(text.startsWith("odd")) { 7097 multiplier = 2; 7098 adder = 1; 7099 7100 text = text[3 .. $]; 7101 } else if(text.startsWith("even")) { 7102 multiplier = 2; 7103 adder = 1; 7104 7105 text = text[4 .. $]; 7106 } else { 7107 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7108 consumeWhitespace(text); 7109 if(text.length && text[0] == 'n') { 7110 multiplier = n; 7111 text = text[1 .. $]; 7112 consumeWhitespace(text); 7113 if(text.length) { 7114 if(text[0] == '+') { 7115 text = text[1 .. $]; 7116 adder = parseNumber(text); 7117 } else if(text[0] == '-') { 7118 text = text[1 .. $]; 7119 adder = -parseNumber(text); 7120 } else if(text[0] == 'o') { 7121 // continue, this is handled below 7122 } else 7123 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7124 } 7125 } else { 7126 adder = n; 7127 } 7128 } 7129 7130 consumeWhitespace(text); 7131 if(text.startsWith("of")) { 7132 text = text[2 .. $]; 7133 consumeWhitespace(text); 7134 of = text[0 .. $]; 7135 } 7136 } 7137 7138 string toString() { 7139 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7140 } 7141 7142 bool solvesFor(R)(R elements, Element e) { 7143 int idx = 1; 7144 bool found = false; 7145 foreach(ele; elements) { 7146 if(of.length) { 7147 auto sel = Selector(of); 7148 if(!sel.matchesElement(ele)) 7149 continue; 7150 } 7151 if(ele is e) { 7152 found = true; 7153 break; 7154 } 7155 idx++; 7156 } 7157 if(!found) return false; 7158 7159 // multiplier* n + adder = idx 7160 // if there is a solution for integral n, it matches 7161 7162 idx -= adder; 7163 if(multiplier) { 7164 if(idx % multiplier == 0) 7165 return true; 7166 } else { 7167 return idx == 0; 7168 } 7169 return false; 7170 } 7171 7172 private void consumeWhitespace(ref string text) { 7173 while(text.length && text[0] == ' ') 7174 text = text[1 .. $]; 7175 } 7176 7177 private int parseNumber(ref string text) { 7178 consumeWhitespace(text); 7179 if(text.length == 0) return 0; 7180 bool negative = text[0] == '-'; 7181 if(text[0] == '+') 7182 text = text[1 .. $]; 7183 if(negative) text = text[1 .. $]; 7184 int i = 0; 7185 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7186 i++; 7187 if(i == 0) 7188 return 0; 7189 int cool = to!int(text[0 .. i]); 7190 text = text[i .. $]; 7191 return negative ? -cool : cool; 7192 } 7193 } 7194 7195 // USEFUL 7196 /// ditto 7197 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts, Element scopeElementNow = null) { 7198 Element[] ret; 7199 if(!parts.length) { 7200 return [start]; // the null selector only matches the start point; it 7201 // is what terminates the recursion 7202 } 7203 7204 auto part = parts[0]; 7205 //writeln("checking ", part, " against ", start, " with ", part.separation); 7206 switch(part.separation) { 7207 default: assert(0); 7208 case -1: 7209 case 0: // tree 7210 foreach(e; start.tree) { 7211 if(part.separation == 0 && start is e) 7212 continue; // space doesn't match itself! 7213 if(part.matchElement(e, scopeElementNow)) { 7214 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7215 } 7216 } 7217 break; 7218 case 1: // children 7219 foreach(e; start.childNodes) { 7220 if(part.matchElement(e, scopeElementNow)) { 7221 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7222 } 7223 } 7224 break; 7225 case 2: // next-sibling 7226 auto e = start.nextSibling("*"); 7227 if(part.matchElement(e, scopeElementNow)) 7228 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7229 break; 7230 case 3: // younger sibling 7231 auto tmp = start.parentNode; 7232 if(tmp !is null) { 7233 sizediff_t pos = -1; 7234 auto children = tmp.childElements; 7235 foreach(i, child; children) { 7236 if(child is start) { 7237 pos = i; 7238 break; 7239 } 7240 } 7241 assert(pos != -1); 7242 foreach(e; children[pos+1..$]) { 7243 if(part.matchElement(e, scopeElementNow)) 7244 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7245 } 7246 } 7247 break; 7248 case 4: // immediate parent node, an extension of mine to walk back up the tree 7249 auto e = start.parentNode; 7250 if(part.matchElement(e, scopeElementNow)) { 7251 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7252 } 7253 /* 7254 Example of usefulness: 7255 7256 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7257 7258 table th < tr 7259 7260 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7261 */ 7262 break; 7263 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7264 /* 7265 Like with the < operator, this is best used to find some parent of a particular known element. 7266 7267 Say you have an anchor inside a 7268 */ 7269 } 7270 7271 return ret; 7272 } 7273 7274 /++ 7275 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7276 7277 See_Also: 7278 $(LIST 7279 * [Element.querySelector] 7280 * [Element.querySelectorAll] 7281 * [Element.matches] 7282 * [Element.closest] 7283 * [Document.querySelector] 7284 * [Document.querySelectorAll] 7285 ) 7286 +/ 7287 /// Group: core_functionality 7288 struct Selector { 7289 SelectorComponent[] components; 7290 string original; 7291 /++ 7292 Parses the selector string and constructs the usable structure. 7293 +/ 7294 this(string cssSelector) { 7295 components = parseSelectorString(cssSelector); 7296 original = cssSelector; 7297 } 7298 7299 /++ 7300 Returns true if the given element matches this selector, 7301 considered relative to an arbitrary element. 7302 7303 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7304 with [std.algorithm.iteration.filter]: 7305 7306 --- 7307 Selector sel = Selector("foo > bar"); 7308 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7309 --- 7310 +/ 7311 bool matchesElement(Element e, Element relativeTo = null) { 7312 foreach(component; components) 7313 if(component.matchElement(e, relativeTo)) 7314 return true; 7315 7316 return false; 7317 } 7318 7319 /++ 7320 Reciprocal of [Element.querySelectorAll] 7321 +/ 7322 Element[] getMatchingElements(Element start, Element relativeTo = null) { 7323 Element[] ret; 7324 foreach(component; components) 7325 ret ~= getElementsBySelectorParts(start, component.parts, relativeTo); 7326 return removeDuplicates(ret); 7327 } 7328 7329 /++ 7330 Like [getMatchingElements], but returns a lazy range. Be careful 7331 about mutating the dom as you iterate through this. 7332 +/ 7333 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7334 import std.algorithm.iteration; 7335 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7336 } 7337 7338 7339 /// Returns the string this was built from 7340 string toString() { 7341 return original; 7342 } 7343 7344 /++ 7345 Returns a string from the parsed result 7346 7347 7348 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7349 +/ 7350 string parsedToString() { 7351 string ret; 7352 7353 foreach(idx, component; components) { 7354 if(idx) ret ~= ", "; 7355 ret ~= component.toString(); 7356 } 7357 7358 return ret; 7359 } 7360 } 7361 7362 ///. 7363 struct SelectorComponent { 7364 ///. 7365 SelectorPart[] parts; 7366 7367 ///. 7368 string toString() { 7369 string ret; 7370 foreach(part; parts) 7371 ret ~= part.toString(); 7372 return ret; 7373 } 7374 7375 // USEFUL 7376 ///. 7377 Element[] getElements(Element start, Element relativeTo = null) { 7378 return removeDuplicates(getElementsBySelectorParts(start, parts, relativeTo)); 7379 } 7380 7381 // USEFUL (but not implemented) 7382 /// If relativeTo == null, it assumes the root of the parent document. 7383 bool matchElement(Element e, Element relativeTo = null) { 7384 if(e is null) return false; 7385 Element where = e; 7386 int lastSeparation = -1; 7387 7388 auto lparts = parts; 7389 7390 if(parts.length && parts[0].separation > 0) { 7391 throw new Exception("invalid selector"); 7392 /+ 7393 // if it starts with a non-trivial separator, inject 7394 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7395 // which implies html 7396 7397 // however, if it is a child-matching selector and there are no children, 7398 // bail out early as it obviously cannot match. 7399 bool hasNonTextChildren = false; 7400 foreach(c; e.children) 7401 if(c.nodeType != 3) { 7402 hasNonTextChildren = true; 7403 break; 7404 } 7405 if(!hasNonTextChildren) 7406 return false; 7407 7408 // there is probably a MUCH better way to do this. 7409 auto dummy = SelectorPart.init; 7410 dummy.tagNameFilter = "*"; 7411 dummy.separation = 0; 7412 lparts = dummy ~ lparts; 7413 +/ 7414 } 7415 7416 foreach(part; retro(lparts)) { 7417 7418 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7419 // writeln(parts); 7420 7421 if(lastSeparation == -1) { 7422 if(!part.matchElement(where, relativeTo)) 7423 return false; 7424 } else if(lastSeparation == 0) { // generic parent 7425 // need to go up the whole chain 7426 where = where.parentNode; 7427 7428 while(where !is null) { 7429 if(part.matchElement(where, relativeTo)) 7430 break; 7431 7432 if(where is relativeTo) 7433 return false; 7434 7435 where = where.parentNode; 7436 } 7437 7438 if(where is null) 7439 return false; 7440 } else if(lastSeparation == 1) { // the > operator 7441 where = where.parentNode; 7442 7443 if(!part.matchElement(where, relativeTo)) 7444 return false; 7445 } else if(lastSeparation == 2) { // the + operator 7446 //writeln("WHERE", where, " ", part); 7447 where = where.previousSibling("*"); 7448 7449 if(!part.matchElement(where, relativeTo)) 7450 return false; 7451 } else if(lastSeparation == 3) { // the ~ operator 7452 where = where.previousSibling("*"); 7453 while(where !is null) { 7454 if(part.matchElement(where, relativeTo)) 7455 break; 7456 7457 if(where is relativeTo) 7458 return false; 7459 7460 where = where.previousSibling("*"); 7461 } 7462 7463 if(where is null) 7464 return false; 7465 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7466 // FIXME 7467 } 7468 7469 lastSeparation = part.separation; 7470 7471 /* 7472 /+ 7473 I commented this to magically make unittest pass and I think the reason it works 7474 when commented is that I inject a :scope iff there's a selector at top level now 7475 and if not, it follows the (frankly stupid) w3c standard behavior at arbitrary id 7476 asduiwh . but me injecting the :scope also acts as a terminating condition. 7477 7478 tbh this prolly needs like a trillion more tests. 7479 +/ 7480 if(where is relativeTo) 7481 return false; // at end of line, if we aren't done by now, the match fails 7482 */ 7483 } 7484 return true; // if we got here, it is a success 7485 } 7486 7487 // the string should NOT have commas. Use parseSelectorString for that instead 7488 ///. 7489 static SelectorComponent fromString(string selector) { 7490 return parseSelector(lexSelector(selector)); 7491 } 7492 } 7493 7494 ///. 7495 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7496 SelectorComponent[] ret; 7497 auto tokens = lexSelector(selector); // this will parse commas too 7498 // and now do comma-separated slices (i haz phobosophobia!) 7499 int parensCount = 0; 7500 while (tokens.length > 0) { 7501 size_t end = 0; 7502 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7503 if(tokens[end] == "(") parensCount++; 7504 if(tokens[end] == ")") parensCount--; 7505 ++end; 7506 } 7507 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7508 if (tokens.length-end < 2) break; 7509 tokens = tokens[end+1..$]; 7510 } 7511 return ret; 7512 } 7513 7514 ///. 7515 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7516 SelectorComponent s; 7517 7518 SelectorPart current; 7519 void commit() { 7520 // might as well skip null items 7521 if(!current.isCleanSlateExceptSeparation()) { 7522 s.parts ~= current; 7523 current = current.init; // start right over 7524 } 7525 } 7526 enum State { 7527 Starting, 7528 ReadingClass, 7529 ReadingId, 7530 ReadingAttributeSelector, 7531 ReadingAttributeComparison, 7532 ExpectingAttributeCloser, 7533 ReadingPseudoClass, 7534 ReadingAttributeValue, 7535 7536 SkippingFunctionalSelector, 7537 } 7538 State state = State.Starting; 7539 string attributeName, attributeValue, attributeComparison; 7540 int parensCount; 7541 foreach(idx, token; tokens) { 7542 string readFunctionalSelector() { 7543 string s; 7544 if(tokens[idx + 1] != "(") 7545 throw new Exception("parse error"); 7546 int pc = 1; 7547 foreach(t; tokens[idx + 2 .. $]) { 7548 if(t == "(") 7549 pc++; 7550 if(t == ")") 7551 pc--; 7552 if(pc == 0) 7553 break; 7554 s ~= t; 7555 } 7556 7557 return s; 7558 } 7559 7560 sizediff_t tid = -1; 7561 foreach(i, item; selectorTokens) 7562 if(token == item) { 7563 tid = i; 7564 break; 7565 } 7566 final switch(state) { 7567 case State.Starting: // fresh, might be reading an operator or a tagname 7568 if(tid == -1) { 7569 if(!caseSensitiveTags) 7570 token = token.toLower(); 7571 7572 if(current.isCleanSlateExceptSeparation()) { 7573 current.tagNameFilter = token; 7574 // default thing, see comment under "*" below 7575 if(current.separation == -1) current.separation = 0; 7576 } else { 7577 // if it was already set, we must see two thingies 7578 // separated by whitespace... 7579 commit(); 7580 current.separation = 0; // tree 7581 current.tagNameFilter = token; 7582 } 7583 } else { 7584 // Selector operators 7585 switch(token) { 7586 case "*": 7587 current.tagNameFilter = "*"; 7588 // the idea here is if we haven't actually set a separation 7589 // yet (e.g. the > operator), it should assume the generic 7590 // whitespace (descendant) mode to avoid matching self with -1 7591 if(current.separation == -1) current.separation = 0; 7592 break; 7593 case " ": 7594 // If some other separation has already been set, 7595 // this is irrelevant whitespace, so we should skip it. 7596 // this happens in the case of "foo > bar" for example. 7597 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7598 continue; 7599 commit(); 7600 current.separation = 0; // tree 7601 break; 7602 case ">>": 7603 commit(); 7604 current.separation = 0; // alternate syntax for tree from html5 css 7605 break; 7606 case ">": 7607 commit(); 7608 current.separation = 1; // child 7609 break; 7610 case "+": 7611 commit(); 7612 current.separation = 2; // sibling directly after 7613 break; 7614 case "~": 7615 commit(); 7616 current.separation = 3; // any sibling after 7617 break; 7618 case "<": 7619 commit(); 7620 current.separation = 4; // immediate parent of 7621 break; 7622 case "[": 7623 state = State.ReadingAttributeSelector; 7624 if(current.separation == -1) current.separation = 0; 7625 break; 7626 case ".": 7627 state = State.ReadingClass; 7628 if(current.separation == -1) current.separation = 0; 7629 break; 7630 case "#": 7631 state = State.ReadingId; 7632 if(current.separation == -1) current.separation = 0; 7633 break; 7634 case ":": 7635 case "::": 7636 state = State.ReadingPseudoClass; 7637 if(current.separation == -1) current.separation = 0; 7638 break; 7639 7640 default: 7641 import arsd.core; 7642 throw ArsdException!"CSS Selector Problem"(token, tokens, cast(int) state); 7643 } 7644 } 7645 break; 7646 case State.ReadingClass: 7647 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7648 state = State.Starting; 7649 break; 7650 case State.ReadingId: 7651 current.attributesEqual ~= ["id", token]; 7652 state = State.Starting; 7653 break; 7654 case State.ReadingPseudoClass: 7655 switch(token) { 7656 case "first-of-type": 7657 current.firstOfType = true; 7658 break; 7659 case "last-of-type": 7660 current.lastOfType = true; 7661 break; 7662 case "only-of-type": 7663 current.firstOfType = true; 7664 current.lastOfType = true; 7665 break; 7666 case "first-child": 7667 current.firstChild = true; 7668 break; 7669 case "last-child": 7670 current.lastChild = true; 7671 break; 7672 case "only-child": 7673 current.firstChild = true; 7674 current.lastChild = true; 7675 break; 7676 case "scope": 7677 current.scopeElement = true; 7678 break; 7679 case "empty": 7680 // one with no children 7681 current.emptyElement = true; 7682 break; 7683 case "whitespace-only": 7684 current.whitespaceOnly = true; 7685 break; 7686 case "link": 7687 current.attributesPresent ~= "href"; 7688 break; 7689 case "root": 7690 current.rootElement = true; 7691 break; 7692 case "lang": 7693 state = State.SkippingFunctionalSelector; 7694 continue; 7695 case "nth-child": 7696 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7697 state = State.SkippingFunctionalSelector; 7698 continue; 7699 case "nth-of-type": 7700 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7701 state = State.SkippingFunctionalSelector; 7702 continue; 7703 case "nth-last-of-type": 7704 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7705 state = State.SkippingFunctionalSelector; 7706 continue; 7707 case "nth-last-child": 7708 // FIXME 7709 //current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7710 state = State.SkippingFunctionalSelector; 7711 continue; 7712 case "is": 7713 state = State.SkippingFunctionalSelector; 7714 current.isSelectors ~= readFunctionalSelector(); 7715 continue; // now the rest of the parser skips past the parens we just handled 7716 case "where": 7717 state = State.SkippingFunctionalSelector; 7718 current.whereSelectors ~= readFunctionalSelector(); 7719 continue; // now the rest of the parser skips past the parens we just handled 7720 case "not": 7721 state = State.SkippingFunctionalSelector; 7722 current.notSelectors ~= readFunctionalSelector(); 7723 continue; // now the rest of the parser skips past the parens we just handled 7724 case "has": 7725 state = State.SkippingFunctionalSelector; 7726 current.hasSelectors ~= readFunctionalSelector(); 7727 continue; // now the rest of the parser skips past the parens we just handled 7728 // back to standards though not quite right lol 7729 case "disabled": 7730 current.attributesPresent ~= "disabled"; 7731 break; 7732 case "checked": 7733 current.attributesPresent ~= "checked"; 7734 break; 7735 7736 case "visited", "active", "hover", "target", "focus", "selected": 7737 current.attributesPresent ~= "nothing"; 7738 // FIXME 7739 /+ 7740 // extensions not implemented 7741 //case "text": // takes the text in the element and wraps it in an element, returning it 7742 +/ 7743 goto case; 7744 case "before", "after": 7745 current.attributesPresent ~= "FIXME"; 7746 7747 break; 7748 // My extensions 7749 case "odd-child": 7750 current.oddChild = true; 7751 break; 7752 case "even-child": 7753 current.evenChild = true; 7754 break; 7755 default: 7756 //if(token.indexOf("lang") == -1) 7757 //assert(0, token); 7758 break; 7759 } 7760 state = State.Starting; 7761 break; 7762 case State.SkippingFunctionalSelector: 7763 if(token == "(") { 7764 parensCount++; 7765 } else if(token == ")") { 7766 parensCount--; 7767 } 7768 7769 if(parensCount == 0) 7770 state = State.Starting; 7771 break; 7772 case State.ReadingAttributeSelector: 7773 attributeName = token; 7774 attributeComparison = null; 7775 attributeValue = null; 7776 state = State.ReadingAttributeComparison; 7777 break; 7778 case State.ReadingAttributeComparison: 7779 // FIXME: these things really should be quotable in the proper lexer... 7780 if(token != "]") { 7781 if(token.indexOf("=") == -1) { 7782 // not a comparison; consider it 7783 // part of the attribute 7784 attributeValue ~= token; 7785 } else { 7786 attributeComparison = token; 7787 state = State.ReadingAttributeValue; 7788 } 7789 break; 7790 } 7791 goto case; 7792 case State.ExpectingAttributeCloser: 7793 if(token != "]") { 7794 // not the closer; consider it part of comparison 7795 if(attributeComparison == "") 7796 attributeName ~= token; 7797 else 7798 attributeValue ~= token; 7799 break; 7800 } 7801 7802 // Selector operators 7803 switch(attributeComparison) { 7804 default: assert(0); 7805 case "": 7806 current.attributesPresent ~= attributeName; 7807 break; 7808 case "=": 7809 current.attributesEqual ~= [attributeName, attributeValue]; 7810 break; 7811 case "|=": 7812 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7813 break; 7814 case "~=": 7815 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7816 break; 7817 case "$=": 7818 current.attributesEndsWith ~= [attributeName, attributeValue]; 7819 break; 7820 case "^=": 7821 current.attributesStartsWith ~= [attributeName, attributeValue]; 7822 break; 7823 case "*=": 7824 current.attributesInclude ~= [attributeName, attributeValue]; 7825 break; 7826 case "!=": 7827 current.attributesNotEqual ~= [attributeName, attributeValue]; 7828 break; 7829 } 7830 7831 state = State.Starting; 7832 break; 7833 case State.ReadingAttributeValue: 7834 attributeValue = token; 7835 state = State.ExpectingAttributeCloser; 7836 break; 7837 } 7838 } 7839 7840 commit(); 7841 7842 return s; 7843 } 7844 7845 ///. 7846 Element[] removeDuplicates(Element[] input) { 7847 Element[] ret; 7848 7849 bool[Element] already; 7850 foreach(e; input) { 7851 if(e in already) continue; 7852 already[e] = true; 7853 ret ~= e; 7854 } 7855 7856 return ret; 7857 } 7858 7859 // done with CSS selector handling 7860 7861 /++ 7862 This delegate is called if you call [Element.computedStyle] to attach an object to the element 7863 that holds stylesheet information. You can rebind it to something else to return a subclass 7864 if you want to hold more per-element extension data than the normal computed style object holds 7865 (e.g. layout info as well). 7866 7867 The default is `return new CssStyle(null, element.style);` 7868 7869 History: 7870 Added September 13, 2024 (dub v11.6) 7871 +/ 7872 CssStyle function(Element e) computedStyleFactory = &defaultComputedStyleFactory; 7873 7874 /// ditto 7875 CssStyle defaultComputedStyleFactory(Element e) { 7876 return new CssStyle(null, e.style); // gives at least something to work with 7877 } 7878 7879 7880 // FIXME: use the better parser from html.d 7881 /// This is probably not useful to you unless you're writing a browser or something like that. 7882 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7883 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7884 class CssStyle { 7885 ///. 7886 this(string rule, string content) { 7887 rule = rule.strip(); 7888 content = content.strip(); 7889 7890 if(content.length == 0) 7891 return; 7892 7893 originatingRule = rule; 7894 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7895 7896 foreach(part; content.split(";")) { 7897 part = part.strip(); 7898 if(part.length == 0) 7899 continue; 7900 auto idx = part.indexOf(":"); 7901 if(idx == -1) 7902 continue; 7903 //throw new Exception("Bad css rule (no colon): " ~ part); 7904 7905 Property p; 7906 7907 p.name = part[0 .. idx].strip(); 7908 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7909 p.givenExplicitly = true; 7910 p.specificity = originatingSpecificity; 7911 7912 properties ~= p; 7913 7914 } 7915 7916 foreach(property; properties) 7917 expandShortForm(property, originatingSpecificity); 7918 } 7919 7920 ///. 7921 Specificity getSpecificityOfRule(string rule) { 7922 Specificity s; 7923 if(rule.length == 0) { // inline 7924 s.important = 2; 7925 } else { 7926 // SO. WRONG. 7927 foreach(ch; rule) { 7928 if(ch == '.') 7929 s.classes++; 7930 if(ch == '#') 7931 s.ids++; 7932 if(ch == ' ') 7933 s.tags++; 7934 if(ch == ',') 7935 break; 7936 } 7937 // FIXME 7938 } 7939 7940 return s; 7941 } 7942 7943 string originatingRule; ///. 7944 Specificity originatingSpecificity; ///. 7945 7946 ///. 7947 union Specificity { 7948 uint score; ///. 7949 // version(little_endian) 7950 ///. 7951 struct { 7952 ubyte tags; ///. 7953 ubyte classes; ///. 7954 ubyte ids; ///. 7955 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 7956 } 7957 } 7958 7959 ///. 7960 struct Property { 7961 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 7962 string name; ///. 7963 string value; ///. 7964 Specificity specificity; ///. 7965 // do we care about the original source rule? 7966 } 7967 7968 ///. 7969 Property[] properties; 7970 7971 ///. 7972 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 7973 string name = unCamelCase(nameGiven); 7974 if(value is null) 7975 return getValue(name); 7976 else 7977 return setValue(name, value, Specificity(0x02000000) /* inline specificity */); 7978 } 7979 7980 /// takes dash style name 7981 string getValue(string name) { 7982 foreach(property; properties) 7983 if(property.name == name) 7984 return property.value; 7985 return null; 7986 } 7987 7988 /// takes dash style name 7989 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 7990 value = value.replace("! important", "!important"); 7991 if(value.indexOf("!important") != -1) { 7992 newSpecificity.important = 1; // FIXME 7993 value = value.replace("!important", "").strip(); 7994 } 7995 7996 foreach(ref property; properties) 7997 if(property.name == name) { 7998 if(newSpecificity.score >= property.specificity.score) { 7999 property.givenExplicitly = explicit; 8000 expandShortForm(property, newSpecificity); 8001 property.specificity = newSpecificity; 8002 return (property.value = value); 8003 } else { 8004 if(name == "display") 8005 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8006 return value; // do nothing - the specificity is too low 8007 } 8008 } 8009 8010 // it's not here... 8011 8012 Property p; 8013 p.givenExplicitly = true; 8014 p.name = name; 8015 p.value = value; 8016 p.specificity = originatingSpecificity; 8017 8018 properties ~= p; 8019 expandShortForm(p, originatingSpecificity); 8020 8021 return value; 8022 } 8023 8024 private void expandQuadShort(string name, string value, Specificity specificity) { 8025 auto parts = value.split(" "); 8026 switch(parts.length) { 8027 case 1: 8028 setValue(name ~"-left", parts[0], specificity, false); 8029 setValue(name ~"-right", parts[0], specificity, false); 8030 setValue(name ~"-top", parts[0], specificity, false); 8031 setValue(name ~"-bottom", parts[0], specificity, false); 8032 break; 8033 case 2: 8034 setValue(name ~"-left", parts[1], specificity, false); 8035 setValue(name ~"-right", parts[1], specificity, false); 8036 setValue(name ~"-top", parts[0], specificity, false); 8037 setValue(name ~"-bottom", parts[0], specificity, false); 8038 break; 8039 case 3: 8040 setValue(name ~"-top", parts[0], specificity, false); 8041 setValue(name ~"-right", parts[1], specificity, false); 8042 setValue(name ~"-bottom", parts[2], specificity, false); 8043 setValue(name ~"-left", parts[2], specificity, false); 8044 8045 break; 8046 case 4: 8047 setValue(name ~"-top", parts[0], specificity, false); 8048 setValue(name ~"-right", parts[1], specificity, false); 8049 setValue(name ~"-bottom", parts[2], specificity, false); 8050 setValue(name ~"-left", parts[3], specificity, false); 8051 break; 8052 default: 8053 // assert(0, value); 8054 } 8055 } 8056 8057 ///. 8058 void expandShortForm(Property p, Specificity specificity) { 8059 switch(p.name) { 8060 case "margin": 8061 case "padding": 8062 expandQuadShort(p.name, p.value, specificity); 8063 break; 8064 case "border": 8065 case "outline": 8066 setValue(p.name ~ "-left", p.value, specificity, false); 8067 setValue(p.name ~ "-right", p.value, specificity, false); 8068 setValue(p.name ~ "-top", p.value, specificity, false); 8069 setValue(p.name ~ "-bottom", p.value, specificity, false); 8070 break; 8071 8072 case "border-top": 8073 case "border-bottom": 8074 case "border-left": 8075 case "border-right": 8076 case "outline-top": 8077 case "outline-bottom": 8078 case "outline-left": 8079 case "outline-right": 8080 8081 default: {} 8082 } 8083 } 8084 8085 ///. 8086 override string toString() { 8087 string ret; 8088 if(originatingRule.length) 8089 ret = originatingRule ~ " {"; 8090 8091 foreach(property; properties) { 8092 if(!property.givenExplicitly) 8093 continue; // skip the inferred shit 8094 8095 if(originatingRule.length) 8096 ret ~= "\n\t"; 8097 else 8098 ret ~= " "; 8099 8100 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8101 } 8102 8103 if(originatingRule.length) 8104 ret ~= "\n}\n"; 8105 8106 return ret; 8107 } 8108 } 8109 8110 string cssUrl(string url) { 8111 return "url(\"" ~ url ~ "\")"; 8112 } 8113 8114 /// This probably isn't useful, unless you're writing a browser or something like that. 8115 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8116 /// as text. 8117 /// 8118 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8119 /// that you can apply to your documents to build the complete computedStyle object. 8120 class StyleSheet { 8121 ///. 8122 CssStyle[] rules; 8123 8124 ///. 8125 this(string source) { 8126 // FIXME: handle @ rules and probably could improve lexer 8127 // add nesting? 8128 int state; 8129 string currentRule; 8130 string currentValue; 8131 8132 string* currentThing = ¤tRule; 8133 foreach(c; source) { 8134 handle: switch(state) { 8135 default: assert(0); 8136 case 0: // starting - we assume we're reading a rule 8137 switch(c) { 8138 case '@': 8139 state = 4; 8140 break; 8141 case '/': 8142 state = 1; 8143 break; 8144 case '{': 8145 currentThing = ¤tValue; 8146 break; 8147 case '}': 8148 if(currentThing is ¤tValue) { 8149 rules ~= new CssStyle(currentRule, currentValue); 8150 8151 currentRule = ""; 8152 currentValue = ""; 8153 8154 currentThing = ¤tRule; 8155 } else { 8156 // idk what is going on here. 8157 // check sveit.com to reproduce 8158 currentRule = ""; 8159 currentValue = ""; 8160 } 8161 break; 8162 default: 8163 (*currentThing) ~= c; 8164 } 8165 break; 8166 case 1: // expecting * 8167 if(c == '*') 8168 state = 2; 8169 else { 8170 state = 0; 8171 (*currentThing) ~= "/" ~ c; 8172 } 8173 break; 8174 case 2: // inside comment 8175 if(c == '*') 8176 state = 3; 8177 break; 8178 case 3: // expecting / to end comment 8179 if(c == '/') 8180 state = 0; 8181 else 8182 state = 2; // it's just a comment so no need to append 8183 break; 8184 case 4: 8185 if(c == '{') 8186 state = 5; 8187 if(c == ';') 8188 state = 0; // just skipping import 8189 break; 8190 case 5: 8191 if(c == '}') 8192 state = 0; // skipping font face probably 8193 } 8194 } 8195 } 8196 8197 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8198 void apply(Document document) { 8199 foreach(rule; rules) { 8200 if(rule.originatingRule.length == 0) 8201 continue; // this shouldn't happen here in a stylesheet 8202 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8203 // note: this should be a different object than the inline style 8204 // since givenExplicitly is likely destroyed here 8205 auto current = element.computedStyle; 8206 8207 foreach(item; rule.properties) 8208 current.setValue(item.name, item.value, item.specificity); 8209 } 8210 } 8211 } 8212 } 8213 8214 8215 /// This is kinda private; just a little utility container for use by the ElementStream class. 8216 final class Stack(T) { 8217 this() { 8218 internalLength = 0; 8219 arr = initialBuffer[]; 8220 } 8221 8222 ///. 8223 void push(T t) { 8224 if(internalLength >= arr.length) { 8225 auto oldarr = arr; 8226 if(arr.length < 4096) 8227 arr = new T[arr.length * 2]; 8228 else 8229 arr = new T[arr.length + 4096]; 8230 arr[0 .. oldarr.length] = oldarr[]; 8231 } 8232 8233 arr[internalLength] = t; 8234 internalLength++; 8235 } 8236 8237 ///. 8238 T pop() { 8239 assert(internalLength); 8240 internalLength--; 8241 return arr[internalLength]; 8242 } 8243 8244 ///. 8245 T peek() { 8246 assert(internalLength); 8247 return arr[internalLength - 1]; 8248 } 8249 8250 ///. 8251 @property bool empty() { 8252 return internalLength ? false : true; 8253 } 8254 8255 ///. 8256 private T[] arr; 8257 private size_t internalLength; 8258 private T[64] initialBuffer; 8259 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8260 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8261 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8262 } 8263 8264 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8265 final class ElementStream { 8266 8267 ///. 8268 @property Element front() { 8269 return current.element; 8270 } 8271 8272 /// Use Element.tree instead. 8273 this(Element start) { 8274 current.element = start; 8275 current.childPosition = -1; 8276 isEmpty = false; 8277 stack = new Stack!(Current); 8278 } 8279 8280 /* 8281 Handle it 8282 handle its children 8283 8284 */ 8285 8286 ///. 8287 void popFront() { 8288 more: 8289 if(isEmpty) return; 8290 8291 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8292 8293 current.childPosition++; 8294 if(current.childPosition >= current.element.children.length) { 8295 if(stack.empty()) 8296 isEmpty = true; 8297 else { 8298 current = stack.pop(); 8299 goto more; 8300 } 8301 } else { 8302 stack.push(current); 8303 current.element = current.element.children[current.childPosition]; 8304 current.childPosition = -1; 8305 } 8306 } 8307 8308 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8309 void currentKilled() { 8310 if(stack.empty) // should never happen 8311 isEmpty = true; 8312 else { 8313 current = stack.pop(); 8314 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8315 } 8316 } 8317 8318 ///. 8319 @property bool empty() { 8320 return isEmpty; 8321 } 8322 8323 private: 8324 8325 struct Current { 8326 Element element; 8327 int childPosition; 8328 } 8329 8330 Current current; 8331 8332 Stack!(Current) stack; 8333 8334 bool isEmpty; 8335 } 8336 8337 8338 8339 // unbelievable. 8340 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8341 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8342 static import std.algorithm; 8343 auto found = std.algorithm.find(haystack, needle); 8344 if(found.length == 0) 8345 return -1; 8346 return haystack.length - found.length; 8347 } 8348 8349 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8350 assert(position < arr.length); 8351 T[] ret; 8352 ret.length = arr.length + what.length; 8353 int a = 0; 8354 foreach(i; arr[0..position+1]) 8355 ret[a++] = i; 8356 8357 foreach(i; what) 8358 ret[a++] = i; 8359 8360 foreach(i; arr[position+1..$]) 8361 ret[a++] = i; 8362 8363 return ret; 8364 } 8365 8366 package bool isInArray(T)(T item, T[] arr) { 8367 foreach(i; arr) 8368 if(item == i) 8369 return true; 8370 return false; 8371 } 8372 8373 private string[string] aadup(in string[string] arr) { 8374 string[string] ret; 8375 foreach(k, v; arr) 8376 ret[k] = v; 8377 return ret; 8378 } 8379 8380 private AttributesHolder aadup(const AttributesHolder arr) { 8381 AttributesHolder ret; 8382 foreach(k, v; arr) 8383 ret[k] = v; 8384 return ret; 8385 } 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 // These MUST be sorted. See generatedomcases.d for a program to generate it if you need to add more than a few (otherwise maybe you can work it in yourself but yikes) 8402 8403 immutable string[] availableEntities = 8404 ["AElig", "AElig", "AMP", "AMP", "Aacute", "Aacute", "Abreve", "Abreve", "Acirc", "Acirc", "Acy", "Acy", "Afr", "Afr", "Agrave", "Agrave", "Alpha", "Alpha", "Amacr", "Amacr", "And", "And", "Aogon", "Aogon", "Aopf", "Aopf", "ApplyFunction", "ApplyFunction", "Aring", "Aring", "Ascr", "Ascr", "Assign", "Assign", "Atilde", 8405 "Atilde", "Auml", "Auml", "Backslash", "Backslash", "Barv", "Barv", "Barwed", "Barwed", "Bcy", "Bcy", "Because", "Because", "Bernoullis", "Bernoullis", "Beta", "Beta", "Bfr", "Bfr", "Bopf", "Bopf", "Breve", "Breve", "Bscr", "Bscr", "Bumpeq", "Bumpeq", "CHcy", "CHcy", "COPY", "COPY", "Cacute", "Cacute", "Cap", "Cap", "CapitalDifferentialD", 8406 "CapitalDifferentialD", "Cayleys", "Cayleys", "Ccaron", "Ccaron", "Ccedil", "Ccedil", "Ccirc", "Ccirc", "Cconint", "Cconint", "Cdot", "Cdot", "Cedilla", "Cedilla", "CenterDot", "CenterDot", "Cfr", "Cfr", "Chi", "Chi", "CircleDot", "CircleDot", "CircleMinus", "CircleMinus", "CirclePlus", "CirclePlus", "CircleTimes", "CircleTimes", 8407 "ClockwiseContourIntegral", "ClockwiseContourIntegral", "CloseCurlyDoubleQuote", "CloseCurlyDoubleQuote", "CloseCurlyQuote", "CloseCurlyQuote", "Colon", "Colon", "Colone", "Colone", "Congruent", "Congruent", "Conint", "Conint", "ContourIntegral", "ContourIntegral", "Copf", "Copf", "Coproduct", "Coproduct", "CounterClockwiseContourIntegral", 8408 "CounterClockwiseContourIntegral", "Cross", "Cross", "Cscr", "Cscr", "Cup", "Cup", "CupCap", "CupCap", "DD", "DD", "DDotrahd", "DDotrahd", "DJcy", "DJcy", "DScy", "DScy", "DZcy", "DZcy", "Dagger", "Dagger", "Darr", "Darr", "Dashv", "Dashv", "Dcaron", "Dcaron", "Dcy", "Dcy", "Del", "Del", "Delta", "Delta", "Dfr", "Dfr", 8409 "DiacriticalAcute", "DiacriticalAcute", "DiacriticalDot", "DiacriticalDot", "DiacriticalDoubleAcute", "DiacriticalDoubleAcute", "DiacriticalGrave", "DiacriticalGrave", "DiacriticalTilde", "DiacriticalTilde", "Diamond", "Diamond", "DifferentialD", "DifferentialD", "Dopf", "Dopf", "Dot", "Dot", "DotDot", "DotDot", "DotEqual", 8410 "DotEqual", "DoubleContourIntegral", "DoubleContourIntegral", "DoubleDot", "DoubleDot", "DoubleDownArrow", "DoubleDownArrow", "DoubleLeftArrow", "DoubleLeftArrow", "DoubleLeftRightArrow", "DoubleLeftRightArrow", "DoubleLeftTee", "DoubleLeftTee", "DoubleLongLeftArrow", "DoubleLongLeftArrow", "DoubleLongLeftRightArrow", 8411 "DoubleLongLeftRightArrow", "DoubleLongRightArrow", "DoubleLongRightArrow", "DoubleRightArrow", "DoubleRightArrow", "DoubleRightTee", "DoubleRightTee", "DoubleUpArrow", "DoubleUpArrow", "DoubleUpDownArrow", "DoubleUpDownArrow", "DoubleVerticalBar", "DoubleVerticalBar", "DownArrow", "DownArrow", "DownArrowBar", "DownArrowBar", 8412 "DownArrowUpArrow", "DownArrowUpArrow", "DownBreve", "DownBreve", "DownLeftRightVector", "DownLeftRightVector", "DownLeftTeeVector", "DownLeftTeeVector", "DownLeftVector", "DownLeftVector", "DownLeftVectorBar", "DownLeftVectorBar", "DownRightTeeVector", "DownRightTeeVector", "DownRightVector", "DownRightVector", "DownRightVectorBar", 8413 "DownRightVectorBar", "DownTee", "DownTee", "DownTeeArrow", "DownTeeArrow", "Downarrow", "Downarrow", "Dscr", "Dscr", "Dstrok", "Dstrok", "ENG", "ENG", "ETH", "ETH", "Eacute", "Eacute", "Ecaron", "Ecaron", "Ecirc", "Ecirc", "Ecy", "Ecy", "Edot", "Edot", "Efr", "Efr", "Egrave", "Egrave", "Element", "Element", "Emacr", "Emacr", 8414 "EmptySmallSquare", "EmptySmallSquare", "EmptyVerySmallSquare", "EmptyVerySmallSquare", "Eogon", "Eogon", "Eopf", "Eopf", "Epsilon", "Epsilon", "Equal", "Equal", "EqualTilde", "EqualTilde", "Equilibrium", "Equilibrium", "Escr", "Escr", "Esim", "Esim", "Eta", "Eta", "Euml", "Euml", "Exists", "Exists", "ExponentialE", "ExponentialE", 8415 "Fcy", "Fcy", "Ffr", "Ffr", "FilledSmallSquare", "FilledSmallSquare", "FilledVerySmallSquare", "FilledVerySmallSquare", "Fopf", "Fopf", "ForAll", "ForAll", "Fouriertrf", "Fouriertrf", "Fscr", "Fscr", "GJcy", "GJcy", "GT", "GT", "Gamma", "Gamma", "Gammad", "Gammad", "Gbreve", "Gbreve", "Gcedil", "Gcedil", "Gcirc", "Gcirc", 8416 "Gcy", "Gcy", "Gdot", "Gdot", "Gfr", "Gfr", "Gg", "Gg", "Gopf", "Gopf", "GreaterEqual", "GreaterEqual", "GreaterEqualLess", "GreaterEqualLess", "GreaterFullEqual", "GreaterFullEqual", "GreaterGreater", "GreaterGreater", "GreaterLess", "GreaterLess", "GreaterSlantEqual", "GreaterSlantEqual", "GreaterTilde", "GreaterTilde", 8417 "Gscr", "Gscr", "Gt", "Gt", "HARDcy", "HARDcy", "Hacek", "Hacek", "Hat", "Hat", "Hcirc", "Hcirc", "Hfr", "Hfr", "HilbertSpace", "HilbertSpace", "Hopf", "Hopf", "HorizontalLine", "HorizontalLine", "Hscr", "Hscr", "Hstrok", "Hstrok", "HumpDownHump", "HumpDownHump", "HumpEqual", "HumpEqual", "IEcy", "IEcy", "IJlig", "IJlig", 8418 "IOcy", "IOcy", "Iacute", "Iacute", "Icirc", "Icirc", "Icy", "Icy", "Idot", "Idot", "Ifr", "Ifr", "Igrave", "Igrave", "Im", "Im", "Imacr", "Imacr", "ImaginaryI", "ImaginaryI", "Implies", "Implies", "Int", "Int", "Integral", "Integral", "Intersection", "Intersection", "InvisibleComma", "InvisibleComma", "InvisibleTimes", 8419 "InvisibleTimes", "Iogon", "Iogon", "Iopf", "Iopf", "Iota", "Iota", "Iscr", "Iscr", "Itilde", "Itilde", "Iukcy", "Iukcy", "Iuml", "Iuml", "Jcirc", "Jcirc", "Jcy", "Jcy", "Jfr", "Jfr", "Jopf", "Jopf", "Jscr", "Jscr", "Jsercy", "Jsercy", "Jukcy", "Jukcy", "KHcy", "KHcy", "KJcy", "KJcy", "Kappa", "Kappa", "Kcedil", "Kcedil", 8420 "Kcy", "Kcy", "Kfr", "Kfr", "Kopf", "Kopf", "Kscr", "Kscr", "LJcy", "LJcy", "LT", "LT", "Lacute", "Lacute", "Lambda", "Lambda", "Lang", "Lang", "Laplacetrf", "Laplacetrf", "Larr", "Larr", "Lcaron", "Lcaron", "Lcedil", "Lcedil", "Lcy", "Lcy", "LeftAngleBracket", "LeftAngleBracket", "LeftArrow", "LeftArrow", "LeftArrowBar", 8421 "LeftArrowBar", "LeftArrowRightArrow", "LeftArrowRightArrow", "LeftCeiling", "LeftCeiling", "LeftDoubleBracket", "LeftDoubleBracket", "LeftDownTeeVector", "LeftDownTeeVector", "LeftDownVector", "LeftDownVector", "LeftDownVectorBar", "LeftDownVectorBar", "LeftFloor", "LeftFloor", "LeftRightArrow", "LeftRightArrow", "LeftRightVector", 8422 "LeftRightVector", "LeftTee", "LeftTee", "LeftTeeArrow", "LeftTeeArrow", "LeftTeeVector", "LeftTeeVector", "LeftTriangle", "LeftTriangle", "LeftTriangleBar", "LeftTriangleBar", "LeftTriangleEqual", "LeftTriangleEqual", "LeftUpDownVector", "LeftUpDownVector", "LeftUpTeeVector", "LeftUpTeeVector", "LeftUpVector", "LeftUpVector", 8423 "LeftUpVectorBar", "LeftUpVectorBar", "LeftVector", "LeftVector", "LeftVectorBar", "LeftVectorBar", "Leftarrow", "Leftarrow", "Leftrightarrow", "Leftrightarrow", "LessEqualGreater", "LessEqualGreater", "LessFullEqual", "LessFullEqual", "LessGreater", "LessGreater", "LessLess", "LessLess", "LessSlantEqual", "LessSlantEqual", 8424 "LessTilde", "LessTilde", "Lfr", "Lfr", "Ll", "Ll", "Lleftarrow", "Lleftarrow", "Lmidot", "Lmidot", "LongLeftArrow", "LongLeftArrow", "LongLeftRightArrow", "LongLeftRightArrow", "LongRightArrow", "LongRightArrow", "Longleftarrow", "Longleftarrow", "Longleftrightarrow", "Longleftrightarrow", "Longrightarrow", "Longrightarrow", 8425 "Lopf", "Lopf", "LowerLeftArrow", "LowerLeftArrow", "LowerRightArrow", "LowerRightArrow", "Lscr", "Lscr", "Lsh", "Lsh", "Lstrok", "Lstrok", "Lt", "Lt", "Map", "Map", "Mcy", "Mcy", "MediumSpace", "MediumSpace", "Mellintrf", "Mellintrf", "Mfr", "Mfr", "MinusPlus", "MinusPlus", "Mopf", "Mopf", "Mscr", "Mscr", "Mu", "Mu", 8426 "NJcy", "NJcy", "Nacute", "Nacute", "Ncaron", "Ncaron", "Ncedil", "Ncedil", "Ncy", "Ncy", "NegativeMediumSpace", "NegativeMediumSpace", "NegativeThickSpace", "NegativeThickSpace", "NegativeThinSpace", "NegativeThinSpace", "NegativeVeryThinSpace", "NegativeVeryThinSpace", "NestedGreaterGreater", "NestedGreaterGreater", 8427 "NestedLessLess", "NestedLessLess", "NewLine", "NewLine", "Nfr", "Nfr", "NoBreak", "NoBreak", "NonBreakingSpace", "NonBreakingSpace", "Nopf", "Nopf", "Not", "Not", "NotCongruent", "NotCongruent", "NotCupCap", "NotCupCap", "NotDoubleVerticalBar", "NotDoubleVerticalBar", "NotElement", "NotElement", "NotEqual", "NotEqual", 8428 "NotExists", "NotExists", "NotGreater", "NotGreater", "NotGreaterEqual", "NotGreaterEqual", "NotGreaterLess", "NotGreaterLess", "NotGreaterTilde", "NotGreaterTilde", "NotLeftTriangle", "NotLeftTriangle", "NotLeftTriangleEqual", "NotLeftTriangleEqual", "NotLess", "NotLess", "NotLessEqual", "NotLessEqual", "NotLessGreater", 8429 "NotLessGreater", "NotLessTilde", "NotLessTilde", "NotPrecedes", "NotPrecedes", "NotPrecedesSlantEqual", "NotPrecedesSlantEqual", "NotReverseElement", "NotReverseElement", "NotRightTriangle", "NotRightTriangle", "NotRightTriangleEqual", "NotRightTriangleEqual", "NotSquareSubsetEqual", "NotSquareSubsetEqual", "NotSquareSupersetEqual", 8430 "NotSquareSupersetEqual", "NotSubsetEqual", "NotSubsetEqual", "NotSucceeds", "NotSucceeds", "NotSucceedsSlantEqual", "NotSucceedsSlantEqual", "NotSupersetEqual", "NotSupersetEqual", "NotTilde", "NotTilde", "NotTildeEqual", "NotTildeEqual", "NotTildeFullEqual", "NotTildeFullEqual", "NotTildeTilde", "NotTildeTilde", "NotVerticalBar", 8431 "NotVerticalBar", "Nscr", "Nscr", "Ntilde", "Ntilde", "Nu", "Nu", "OElig", "OElig", "Oacute", "Oacute", "Ocirc", "Ocirc", "Ocy", "Ocy", "Odblac", "Odblac", "Ofr", "Ofr", "Ograve", "Ograve", "Omacr", "Omacr", "Omega", "Omega", "Omicron", "Omicron", "Oopf", "Oopf", "OpenCurlyDoubleQuote", "OpenCurlyDoubleQuote", "OpenCurlyQuote", 8432 "OpenCurlyQuote", "Or", "Or", "Oscr", "Oscr", "Oslash", "Oslash", "Otilde", "Otilde", "Otimes", "Otimes", "Ouml", "Ouml", "OverBar", "OverBar", "OverBrace", "OverBrace", "OverBracket", "OverBracket", "OverParenthesis", "OverParenthesis", "PartialD", "PartialD", "Pcy", "Pcy", "Pfr", "Pfr", "Phi", "Phi", "Pi", "Pi", "PlusMinus", 8433 "PlusMinus", "Poincareplane", "Poincareplane", "Popf", "Popf", "Pr", "Pr", "Precedes", "Precedes", "PrecedesEqual", "PrecedesEqual", "PrecedesSlantEqual", "PrecedesSlantEqual", "PrecedesTilde", "PrecedesTilde", "Prime", "Prime", "Product", "Product", "Proportion", "Proportion", "Proportional", "Proportional", "Pscr", "Pscr", 8434 "Psi", "Psi", "QUOT", "QUOT", "Qfr", "Qfr", "Qopf", "Qopf", "Qscr", "Qscr", "RBarr", "RBarr", "REG", "REG", "Racute", "Racute", "Rang", "Rang", "Rarr", "Rarr", "Rarrtl", "Rarrtl", "Rcaron", "Rcaron", "Rcedil", "Rcedil", "Rcy", "Rcy", "Re", "Re", "ReverseElement", "ReverseElement", "ReverseEquilibrium", "ReverseEquilibrium", 8435 "ReverseUpEquilibrium", "ReverseUpEquilibrium", "Rfr", "Rfr", "Rho", "Rho", "RightAngleBracket", "RightAngleBracket", "RightArrow", "RightArrow", "RightArrowBar", "RightArrowBar", "RightArrowLeftArrow", "RightArrowLeftArrow", "RightCeiling", "RightCeiling", "RightDoubleBracket", "RightDoubleBracket", "RightDownTeeVector", 8436 "RightDownTeeVector", "RightDownVector", "RightDownVector", "RightDownVectorBar", "RightDownVectorBar", "RightFloor", "RightFloor", "RightTee", "RightTee", "RightTeeArrow", "RightTeeArrow", "RightTeeVector", "RightTeeVector", "RightTriangle", "RightTriangle", "RightTriangleBar", "RightTriangleBar", "RightTriangleEqual", 8437 "RightTriangleEqual", "RightUpDownVector", "RightUpDownVector", "RightUpTeeVector", "RightUpTeeVector", "RightUpVector", "RightUpVector", "RightUpVectorBar", "RightUpVectorBar", "RightVector", "RightVector", "RightVectorBar", "RightVectorBar", "Rightarrow", "Rightarrow", "Ropf", "Ropf", "RoundImplies", "RoundImplies", 8438 "Rrightarrow", "Rrightarrow", "Rscr", "Rscr", "Rsh", "Rsh", "RuleDelayed", "RuleDelayed", "SHCHcy", "SHCHcy", "SHcy", "SHcy", "SOFTcy", "SOFTcy", "Sacute", "Sacute", "Sc", "Sc", "Scaron", "Scaron", "Scedil", "Scedil", "Scirc", "Scirc", "Scy", "Scy", "Sfr", "Sfr", "ShortDownArrow", "ShortDownArrow", "ShortLeftArrow", "ShortLeftArrow", 8439 "ShortRightArrow", "ShortRightArrow", "ShortUpArrow", "ShortUpArrow", "Sigma", "Sigma", "SmallCircle", "SmallCircle", "Sopf", "Sopf", "Sqrt", "Sqrt", "Square", "Square", "SquareIntersection", "SquareIntersection", "SquareSubset", "SquareSubset", "SquareSubsetEqual", "SquareSubsetEqual", "SquareSuperset", "SquareSuperset", 8440 "SquareSupersetEqual", "SquareSupersetEqual", "SquareUnion", "SquareUnion", "Sscr", "Sscr", "Star", "Star", "Sub", "Sub", "Subset", "Subset", "SubsetEqual", "SubsetEqual", "Succeeds", "Succeeds", "SucceedsEqual", "SucceedsEqual", "SucceedsSlantEqual", "SucceedsSlantEqual", "SucceedsTilde", "SucceedsTilde", "SuchThat", 8441 "SuchThat", "Sum", "Sum", "Sup", "Sup", "Superset", "Superset", "SupersetEqual", "SupersetEqual", "Supset", "Supset", "THORN", "THORN", "TRADE", "TRADE", "TSHcy", "TSHcy", "TScy", "TScy", "Tab", "Tab", "Tau", "Tau", "Tcaron", "Tcaron", "Tcedil", "Tcedil", "Tcy", "Tcy", "Tfr", "Tfr", "Therefore", "Therefore", "Theta", "Theta", 8442 "ThinSpace", "ThinSpace", "Tilde", "Tilde", "TildeEqual", "TildeEqual", "TildeFullEqual", "TildeFullEqual", "TildeTilde", "TildeTilde", "Topf", "Topf", "TripleDot", "TripleDot", "Tscr", "Tscr", "Tstrok", "Tstrok", "Uacute", "Uacute", "Uarr", "Uarr", "Uarrocir", "Uarrocir", "Ubrcy", "Ubrcy", "Ubreve", "Ubreve", "Ucirc", 8443 "Ucirc", "Ucy", "Ucy", "Udblac", "Udblac", "Ufr", "Ufr", "Ugrave", "Ugrave", "Umacr", "Umacr", "UnderBar", "UnderBar", "UnderBrace", "UnderBrace", "UnderBracket", "UnderBracket", "UnderParenthesis", "UnderParenthesis", "Union", "Union", "UnionPlus", "UnionPlus", "Uogon", "Uogon", "Uopf", "Uopf", "UpArrow", "UpArrow", "UpArrowBar", 8444 "UpArrowBar", "UpArrowDownArrow", "UpArrowDownArrow", "UpDownArrow", "UpDownArrow", "UpEquilibrium", "UpEquilibrium", "UpTee", "UpTee", "UpTeeArrow", "UpTeeArrow", "Uparrow", "Uparrow", "Updownarrow", "Updownarrow", "UpperLeftArrow", "UpperLeftArrow", "UpperRightArrow", "UpperRightArrow", "Upsi", "Upsi", "Upsilon", "Upsilon", 8445 "Uring", "Uring", "Uscr", "Uscr", "Utilde", "Utilde", "Uuml", "Uuml", "VDash", "VDash", "Vbar", "Vbar", "Vcy", "Vcy", "Vdash", "Vdash", "Vdashl", "Vdashl", "Vee", "Vee", "Verbar", "Verbar", "Vert", "Vert", "VerticalBar", "VerticalBar", "VerticalLine", "VerticalLine", "VerticalSeparator", "VerticalSeparator", "VerticalTilde", 8446 "VerticalTilde", "VeryThinSpace", "VeryThinSpace", "Vfr", "Vfr", "Vopf", "Vopf", "Vscr", "Vscr", "Vvdash", "Vvdash", "Wcirc", "Wcirc", "Wedge", "Wedge", "Wfr", "Wfr", "Wopf", "Wopf", "Wscr", "Wscr", "Xfr", "Xfr", "Xi", "Xi", "Xopf", "Xopf", "Xscr", "Xscr", "YAcy", "YAcy", "YIcy", "YIcy", "YUcy", "YUcy", "Yacute", "Yacute", 8447 "Ycirc", "Ycirc", "Ycy", "Ycy", "Yfr", "Yfr", "Yopf", "Yopf", "Yscr", "Yscr", "Yuml", "Yuml", "ZHcy", "ZHcy", "Zacute", "Zacute", "Zcaron", "Zcaron", "Zcy", "Zcy", "Zdot", "Zdot", "ZeroWidthSpace", "ZeroWidthSpace", "Zeta", "Zeta", "Zfr", "Zfr", "Zopf", "Zopf", "Zscr", "Zscr", "aacute", "aacute", "abreve", "abreve", "ac", 8448 "ac", "acd", "acd", "acirc", "acirc", "acute", "acute", "acy", "acy", "aelig", "aelig", "af", "af", "afr", "afr", "agrave", "agrave", "alefsym", "alefsym", "aleph", "aleph", "alpha", "alpha", "amacr", "amacr", "amalg", "amalg", "and", "and", "andand", "andand", "andd", "andd", "andslope", "andslope", "andv", "andv", "ang", 8449 "ang", "ange", "ange", "angle", "angle", "angmsd", "angmsd", "angmsdaa", "angmsdaa", "angmsdab", "angmsdab", "angmsdac", "angmsdac", "angmsdad", "angmsdad", "angmsdae", "angmsdae", "angmsdaf", "angmsdaf", "angmsdag", "angmsdag", "angmsdah", "angmsdah", "angrt", "angrt", "angrtvb", "angrtvb", "angrtvbd", "angrtvbd", "angsph", 8450 "angsph", "angst", "angst", "angzarr", "angzarr", "aogon", "aogon", "aopf", "aopf", "ap", "ap", "apE", "apE", "apacir", "apacir", "ape", "ape", "apid", "apid", "approx", "approx", "approxeq", "approxeq", "aring", "aring", "ascr", "ascr", "ast", "ast", "asymp", "asymp", "asympeq", "asympeq", "atilde", "atilde", "auml", 8451 "auml", "awconint", "awconint", "awint", "awint", "bNot", "bNot", "backcong", "backcong", "backepsilon", "backepsilon", "backprime", "backprime", "backsim", "backsim", "backsimeq", "backsimeq", "barvee", "barvee", "barwed", "barwed", "barwedge", "barwedge", "bbrk", "bbrk", "bbrktbrk", "bbrktbrk", "bcong", "bcong", "bcy", 8452 "bcy", "bdquo", "bdquo", "becaus", "becaus", "because", "because", "bemptyv", "bemptyv", "bepsi", "bepsi", "bernou", "bernou", "beta", "beta", "beth", "beth", "between", "between", "bfr", "bfr", "bigcap", "bigcap", "bigcirc", "bigcirc", "bigcup", "bigcup", "bigodot", "bigodot", "bigoplus", "bigoplus", "bigotimes", "bigotimes", 8453 "bigsqcup", "bigsqcup", "bigstar", "bigstar", "bigtriangledown", "bigtriangledown", "bigtriangleup", "bigtriangleup", "biguplus", "biguplus", "bigvee", "bigvee", "bigwedge", "bigwedge", "bkarow", "bkarow", "blacklozenge", "blacklozenge", "blacksquare", "blacksquare", "blacktriangle", "blacktriangle", "blacktriangledown", 8454 "blacktriangledown", "blacktriangleleft", "blacktriangleleft", "blacktriangleright", "blacktriangleright", "blank", "blank", "blk12", "blk12", "blk14", "blk14", "blk34", "blk34", "block", "block", "bnot", "bnot", "bopf", "bopf", "bot", "bot", "bottom", "bottom", "bowtie", "bowtie", "boxDL", "boxDL", "boxDR", "boxDR", "boxDl", 8455 "boxDl", "boxDr", "boxDr", "boxH", "boxH", "boxHD", "boxHD", "boxHU", "boxHU", "boxHd", "boxHd", "boxHu", "boxHu", "boxUL", "boxUL", "boxUR", "boxUR", "boxUl", "boxUl", "boxUr", "boxUr", "boxV", "boxV", "boxVH", "boxVH", "boxVL", "boxVL", "boxVR", "boxVR", "boxVh", "boxVh", "boxVl", "boxVl", "boxVr", "boxVr", "boxbox", 8456 "boxbox", "boxdL", "boxdL", "boxdR", "boxdR", "boxdl", "boxdl", "boxdr", "boxdr", "boxh", "boxh", "boxhD", "boxhD", "boxhU", "boxhU", "boxhd", "boxhd", "boxhu", "boxhu", "boxminus", "boxminus", "boxplus", "boxplus", "boxtimes", "boxtimes", "boxuL", "boxuL", "boxuR", "boxuR", "boxul", "boxul", "boxur", "boxur", "boxv", 8457 "boxv", "boxvH", "boxvH", "boxvL", "boxvL", "boxvR", "boxvR", "boxvh", "boxvh", "boxvl", "boxvl", "boxvr", "boxvr", "bprime", "bprime", "breve", "breve", "brvbar", "brvbar", "bscr", "bscr", "bsemi", "bsemi", "bsim", "bsim", "bsime", "bsime", "bsol", "bsol", "bsolb", "bsolb", "bsolhsub", "bsolhsub", "bull", "bull", "bullet", 8458 "bullet", "bump", "bump", "bumpE", "bumpE", "bumpe", "bumpe", "bumpeq", "bumpeq", "cacute", "cacute", "cap", "cap", "capand", "capand", "capbrcup", "capbrcup", "capcap", "capcap", "capcup", "capcup", "capdot", "capdot", "caret", "caret", "caron", "caron", "ccaps", "ccaps", "ccaron", "ccaron", "ccedil", "ccedil", "ccirc", 8459 "ccirc", "ccups", "ccups", "ccupssm", "ccupssm", "cdot", "cdot", "cedil", "cedil", "cemptyv", "cemptyv", "cent", "cent", "centerdot", "centerdot", "cfr", "cfr", "chcy", "chcy", "check", "check", "checkmark", "checkmark", "chi", "chi", "cir", "cir", "cirE", "cirE", "circ", "circ", "circeq", "circeq", "circlearrowleft", 8460 "circlearrowleft", "circlearrowright", "circlearrowright", "circledR", "circledR", "circledS", "circledS", "circledast", "circledast", "circledcirc", "circledcirc", "circleddash", "circleddash", "cire", "cire", "cirfnint", "cirfnint", "cirmid", "cirmid", "cirscir", "cirscir", "clubs", "clubs", "clubsuit", "clubsuit", "colon", 8461 "colon", "colone", "colone", "coloneq", "coloneq", "comma", "comma", "commat", "commat", "comp", "comp", "compfn", "compfn", "complement", "complement", "complexes", "complexes", "cong", "cong", "congdot", "congdot", "conint", "conint", "copf", "copf", "coprod", "coprod", "copy", "copy", "copysr", "copysr", "crarr", "crarr", 8462 "cross", "cross", "cscr", "cscr", "csub", "csub", "csube", "csube", "csup", "csup", "csupe", "csupe", "ctdot", "ctdot", "cudarrl", "cudarrl", "cudarrr", "cudarrr", "cuepr", "cuepr", "cuesc", "cuesc", "cularr", "cularr", "cularrp", "cularrp", "cup", "cup", "cupbrcap", "cupbrcap", "cupcap", "cupcap", "cupcup", "cupcup", 8463 "cupdot", "cupdot", "cupor", "cupor", "curarr", "curarr", "curarrm", "curarrm", "curlyeqprec", "curlyeqprec", "curlyeqsucc", "curlyeqsucc", "curlyvee", "curlyvee", "curlywedge", "curlywedge", "curren", "curren", "curvearrowleft", "curvearrowleft", "curvearrowright", "curvearrowright", "cuvee", "cuvee", "cuwed", "cuwed", 8464 "cwconint", "cwconint", "cwint", "cwint", "cylcty", "cylcty", "dArr", "dArr", "dHar", "dHar", "dagger", "dagger", "daleth", "daleth", "darr", "darr", "dash", "dash", "dashv", "dashv", "dbkarow", "dbkarow", "dblac", "dblac", "dcaron", "dcaron", "dcy", "dcy", "dd", "dd", "ddagger", "ddagger", "ddarr", "ddarr", "ddotseq", 8465 "ddotseq", "deg", "deg", "delta", "delta", "demptyv", "demptyv", "dfisht", "dfisht", "dfr", "dfr", "dharl", "dharl", "dharr", "dharr", "diam", "diam", "diamond", "diamond", "diamondsuit", "diamondsuit", "diams", "diams", "die", "die", "digamma", "digamma", "disin", "disin", "div", "div", "divide", "divide", "divideontimes", 8466 "divideontimes", "divonx", "divonx", "djcy", "djcy", "dlcorn", "dlcorn", "dlcrop", "dlcrop", "dollar", "dollar", "dopf", "dopf", "dot", "dot", "doteq", "doteq", "doteqdot", "doteqdot", "dotminus", "dotminus", "dotplus", "dotplus", "dotsquare", "dotsquare", "doublebarwedge", "doublebarwedge", "downarrow", "downarrow", "downdownarrows", 8467 "downdownarrows", "downharpoonleft", "downharpoonleft", "downharpoonright", "downharpoonright", "drbkarow", "drbkarow", "drcorn", "drcorn", "drcrop", "drcrop", "dscr", "dscr", "dscy", "dscy", "dsol", "dsol", "dstrok", "dstrok", "dtdot", "dtdot", "dtri", "dtri", "dtrif", "dtrif", "duarr", "duarr", "duhar", "duhar", "dwangle", 8468 "dwangle", "dzcy", "dzcy", "dzigrarr", "dzigrarr", "eDDot", "eDDot", "eDot", "eDot", "eacute", "eacute", "easter", "easter", "ecaron", "ecaron", "ecir", "ecir", "ecirc", "ecirc", "ecolon", "ecolon", "ecy", "ecy", "edot", "edot", "ee", "ee", "efDot", "efDot", "efr", "efr", "eg", "eg", "egrave", "egrave", "egs", "egs", "egsdot", 8469 "egsdot", "el", "el", "elinters", "elinters", "ell", "ell", "els", "els", "elsdot", "elsdot", "emacr", "emacr", "empty", "empty", "emptyset", "emptyset", "emptyv", "emptyv", "emsp", "emsp", "emsp13", "emsp13", "emsp14", "emsp14", "eng", "eng", "ensp", "ensp", "eogon", "eogon", "eopf", "eopf", "epar", "epar", "eparsl", 8470 "eparsl", "eplus", "eplus", "epsi", "epsi", "epsilon", "epsilon", "epsiv", "epsiv", "eqcirc", "eqcirc", "eqcolon", "eqcolon", "eqsim", "eqsim", "eqslantgtr", "eqslantgtr", "eqslantless", "eqslantless", "equals", "equals", "equest", "equest", "equiv", "equiv", "equivDD", "equivDD", "eqvparsl", "eqvparsl", "erDot", "erDot", 8471 "erarr", "erarr", "escr", "escr", "esdot", "esdot", "esim", "esim", "eta", "eta", "eth", "eth", "euml", "euml", "euro", "euro", "excl", "excl", "exist", "exist", "expectation", "expectation", "exponentiale", "exponentiale", "fallingdotseq", "fallingdotseq", "fcy", "fcy", "female", "female", "ffilig", "ffilig", "fflig", 8472 "fflig", "ffllig", "ffllig", "ffr", "ffr", "filig", "filig", "flat", "flat", "fllig", "fllig", "fltns", "fltns", "fnof", "fnof", "fopf", "fopf", "forall", "forall", "fork", "fork", "forkv", "forkv", "fpartint", "fpartint", "frac12", "frac12", "frac13", "frac13", "frac14", "frac14", "frac15", "frac15", "frac16", "frac16", 8473 "frac18", "frac18", "frac23", "frac23", "frac25", "frac25", "frac34", "frac34", "frac35", "frac35", "frac38", "frac38", "frac45", "frac45", "frac56", "frac56", "frac58", "frac58", "frac78", "frac78", "frasl", "frasl", "frown", "frown", "fscr", "fscr", "gE", "gE", "gEl", "gEl", "gacute", "gacute", "gamma", "gamma", "gammad", 8474 "gammad", "gap", "gap", "gbreve", "gbreve", "gcirc", "gcirc", "gcy", "gcy", "gdot", "gdot", "ge", "ge", "gel", "gel", "geq", "geq", "geqq", "geqq", "geqslant", "geqslant", "ges", "ges", "gescc", "gescc", "gesdot", "gesdot", "gesdoto", "gesdoto", "gesdotol", "gesdotol", "gesles", "gesles", "gfr", "gfr", "gg", "gg", "ggg", 8475 "ggg", "gimel", "gimel", "gjcy", "gjcy", "gl", "gl", "glE", "glE", "gla", "gla", "glj", "glj", "gnE", "gnE", "gnap", "gnap", "gnapprox", "gnapprox", "gne", "gne", "gneq", "gneq", "gneqq", "gneqq", "gnsim", "gnsim", "gopf", "gopf", "grave", "grave", "gscr", "gscr", "gsim", "gsim", "gsime", "gsime", "gsiml", "gsiml", "gtcc", 8476 "gtcc", "gtcir", "gtcir", "gtdot", "gtdot", "gtlPar", "gtlPar", "gtquest", "gtquest", "gtrapprox", "gtrapprox", "gtrarr", "gtrarr", "gtrdot", "gtrdot", "gtreqless", "gtreqless", "gtreqqless", "gtreqqless", "gtrless", "gtrless", "gtrsim", "gtrsim", "hArr", "hArr", "hairsp", "hairsp", "half", "half", "hamilt", "hamilt", 8477 "hardcy", "hardcy", "harr", "harr", "harrcir", "harrcir", "harrw", "harrw", "hbar", "hbar", "hcirc", "hcirc", "hearts", "hearts", "heartsuit", "heartsuit", "hellip", "hellip", "hercon", "hercon", "hfr", "hfr", "hksearow", "hksearow", "hkswarow", "hkswarow", "hoarr", "hoarr", "homtht", "homtht", "hookleftarrow", "hookleftarrow", 8478 "hookrightarrow", "hookrightarrow", "hopf", "hopf", "horbar", "horbar", "hscr", "hscr", "hslash", "hslash", "hstrok", "hstrok", "hybull", "hybull", "hyphen", "hyphen", "iacute", "iacute", "ic", "ic", "icirc", "icirc", "icy", "icy", "iecy", "iecy", "iexcl", "iexcl", "iff", "iff", "ifr", "ifr", "igrave", "igrave", "ii", 8479 "ii", "iiiint", "iiiint", "iiint", "iiint", "iinfin", "iinfin", "iiota", "iiota", "ijlig", "ijlig", "imacr", "imacr", "image", "image", "imagline", "imagline", "imagpart", "imagpart", "imath", "imath", "imof", "imof", "imped", "imped", "in", "in", "incare", "incare", "infin", "infin", "infintie", "infintie", "inodot", 8480 "inodot", "int", "int", "intcal", "intcal", "integers", "integers", "intercal", "intercal", "intlarhk", "intlarhk", "intprod", "intprod", "iocy", "iocy", "iogon", "iogon", "iopf", "iopf", "iota", "iota", "iprod", "iprod", "iquest", "iquest", "iscr", "iscr", "isin", "isin", "isinE", "isinE", "isindot", "isindot", "isins", 8481 "isins", "isinsv", "isinsv", "isinv", "isinv", "it", "it", "itilde", "itilde", "iukcy", "iukcy", "iuml", "iuml", "jcirc", "jcirc", "jcy", "jcy", "jfr", "jfr", "jmath", "jmath", "jopf", "jopf", "jscr", "jscr", "jsercy", "jsercy", "jukcy", "jukcy", "kappa", "kappa", "kappav", "kappav", "kcedil", "kcedil", "kcy", "kcy", "kfr", 8482 "kfr", "kgreen", "kgreen", "khcy", "khcy", "kjcy", "kjcy", "kopf", "kopf", "kscr", "kscr", "lAarr", "lAarr", "lArr", "lArr", "lAtail", "lAtail", "lBarr", "lBarr", "lE", "lE", "lEg", "lEg", "lHar", "lHar", "lacute", "lacute", "laemptyv", "laemptyv", "lagran", "lagran", "lambda", "lambda", "lang", "lang", "langd", "langd", 8483 "langle", "langle", "lap", "lap", "laquo", "laquo", "larr", "larr", "larrb", "larrb", "larrbfs", "larrbfs", "larrfs", "larrfs", "larrhk", "larrhk", "larrlp", "larrlp", "larrpl", "larrpl", "larrsim", "larrsim", "larrtl", "larrtl", "lat", "lat", "latail", "latail", "late", "late", "lbarr", "lbarr", "lbbrk", "lbbrk", "lbrace", 8484 "lbrace", "lbrack", "lbrack", "lbrke", "lbrke", "lbrksld", "lbrksld", "lbrkslu", "lbrkslu", "lcaron", "lcaron", "lcedil", "lcedil", "lceil", "lceil", "lcub", "lcub", "lcy", "lcy", "ldca", "ldca", "ldquo", "ldquo", "ldquor", "ldquor", "ldrdhar", "ldrdhar", "ldrushar", "ldrushar", "ldsh", "ldsh", "le", "le", "leftarrow", 8485 "leftarrow", "leftarrowtail", "leftarrowtail", "leftharpoondown", "leftharpoondown", "leftharpoonup", "leftharpoonup", "leftleftarrows", "leftleftarrows", "leftrightarrow", "leftrightarrow", "leftrightarrows", "leftrightarrows", "leftrightharpoons", "leftrightharpoons", "leftrightsquigarrow", "leftrightsquigarrow", "leftthreetimes", 8486 "leftthreetimes", "leg", "leg", "leq", "leq", "leqq", "leqq", "leqslant", "leqslant", "les", "les", "lescc", "lescc", "lesdot", "lesdot", "lesdoto", "lesdoto", "lesdotor", "lesdotor", "lesges", "lesges", "lessapprox", "lessapprox", "lessdot", "lessdot", "lesseqgtr", "lesseqgtr", "lesseqqgtr", "lesseqqgtr", "lessgtr", "lessgtr", 8487 "lesssim", "lesssim", "lfisht", "lfisht", "lfloor", "lfloor", "lfr", "lfr", "lg", "lg", "lgE", "lgE", "lhard", "lhard", "lharu", "lharu", "lharul", "lharul", "lhblk", "lhblk", "ljcy", "ljcy", "ll", "ll", "llarr", "llarr", "llcorner", "llcorner", "llhard", "llhard", "lltri", "lltri", "lmidot", "lmidot", "lmoust", "lmoust", 8488 "lmoustache", "lmoustache", "lnE", "lnE", "lnap", "lnap", "lnapprox", "lnapprox", "lne", "lne", "lneq", "lneq", "lneqq", "lneqq", "lnsim", "lnsim", "loang", "loang", "loarr", "loarr", "lobrk", "lobrk", "longleftarrow", "longleftarrow", "longleftrightarrow", "longleftrightarrow", "longmapsto", "longmapsto", "longrightarrow", 8489 "longrightarrow", "looparrowleft", "looparrowleft", "looparrowright", "looparrowright", "lopar", "lopar", "lopf", "lopf", "loplus", "loplus", "lotimes", "lotimes", "lowast", "lowast", "lowbar", "lowbar", "loz", "loz", "lozenge", "lozenge", "lozf", "lozf", "lpar", "lpar", "lparlt", "lparlt", "lrarr", "lrarr", "lrcorner", 8490 "lrcorner", "lrhar", "lrhar", "lrhard", "lrhard", "lrm", "lrm", "lrtri", "lrtri", "lsaquo", "lsaquo", "lscr", "lscr", "lsh", "lsh", "lsim", "lsim", "lsime", "lsime", "lsimg", "lsimg", "lsqb", "lsqb", "lsquo", "lsquo", "lsquor", "lsquor", "lstrok", "lstrok", "ltcc", "ltcc", "ltcir", "ltcir", "ltdot", "ltdot", "lthree", 8491 "lthree", "ltimes", "ltimes", "ltlarr", "ltlarr", "ltquest", "ltquest", "ltrPar", "ltrPar", "ltri", "ltri", "ltrie", "ltrie", "ltrif", "ltrif", "lurdshar", "lurdshar", "luruhar", "luruhar", "mDDot", "mDDot", "macr", "macr", "male", "male", "malt", "malt", "maltese", "maltese", "map", "map", "mapsto", "mapsto", "mapstodown", 8492 "mapstodown", "mapstoleft", "mapstoleft", "mapstoup", "mapstoup", "marker", "marker", "mcomma", "mcomma", "mcy", "mcy", "mdash", "mdash", "measuredangle", "measuredangle", "mfr", "mfr", "mho", "mho", "micro", "micro", "mid", "mid", "midast", "midast", "midcir", "midcir", "middot", "middot", "minus", "minus", "minusb", 8493 "minusb", "minusd", "minusd", "minusdu", "minusdu", "mlcp", "mlcp", "mldr", "mldr", "mnplus", "mnplus", "models", "models", "mopf", "mopf", "mp", "mp", "mscr", "mscr", "mstpos", "mstpos", "mu", "mu", "multimap", "multimap", "mumap", "mumap", "nLeftarrow", "nLeftarrow", "nLeftrightarrow", "nLeftrightarrow", "nRightarrow", 8494 "nRightarrow", "nVDash", "nVDash", "nVdash", "nVdash", "nabla", "nabla", "nacute", "nacute", "nap", "nap", "napos", "napos", "napprox", "napprox", "natur", "natur", "natural", "natural", "naturals", "naturals", "nbsp", "nbsp", "ncap", "ncap", "ncaron", "ncaron", "ncedil", "ncedil", "ncong", "ncong", "ncup", "ncup", "ncy", 8495 "ncy", "ndash", "ndash", "ne", "ne", "neArr", "neArr", "nearhk", "nearhk", "nearr", "nearr", "nearrow", "nearrow", "nequiv", "nequiv", "nesear", "nesear", "nexist", "nexist", "nexists", "nexists", "nfr", "nfr", "nge", "nge", "ngeq", "ngeq", "ngsim", "ngsim", "ngt", "ngt", "ngtr", "ngtr", "nhArr", "nhArr", "nharr", "nharr", 8496 "nhpar", "nhpar", "ni", "ni", "nis", "nis", "nisd", "nisd", "niv", "niv", "njcy", "njcy", "nlArr", "nlArr", "nlarr", "nlarr", "nldr", "nldr", "nle", "nle", "nleftarrow", "nleftarrow", "nleftrightarrow", "nleftrightarrow", "nleq", "nleq", "nless", "nless", "nlsim", "nlsim", "nlt", "nlt", "nltri", "nltri", "nltrie", "nltrie", 8497 "nmid", "nmid", "nopf", "nopf", "not", "not", "notin", "notin", "notinva", "notinva", "notinvb", "notinvb", "notinvc", "notinvc", "notni", "notni", "notniva", "notniva", "notnivb", "notnivb", "notnivc", "notnivc", "npar", "npar", "nparallel", "nparallel", "npolint", "npolint", "npr", "npr", "nprcue", "nprcue", "nprec", 8498 "nprec", "nrArr", "nrArr", "nrarr", "nrarr", "nrightarrow", "nrightarrow", "nrtri", "nrtri", "nrtrie", "nrtrie", "nsc", "nsc", "nsccue", "nsccue", "nscr", "nscr", "nshortmid", "nshortmid", "nshortparallel", "nshortparallel", "nsim", "nsim", "nsime", "nsime", "nsimeq", "nsimeq", "nsmid", "nsmid", "nspar", "nspar", "nsqsube", 8499 "nsqsube", "nsqsupe", "nsqsupe", "nsub", "nsub", "nsube", "nsube", "nsubseteq", "nsubseteq", "nsucc", "nsucc", "nsup", "nsup", "nsupe", "nsupe", "nsupseteq", "nsupseteq", "ntgl", "ntgl", "ntilde", "ntilde", "ntlg", "ntlg", "ntriangleleft", "ntriangleleft", "ntrianglelefteq", "ntrianglelefteq", "ntriangleright", "ntriangleright", 8500 "ntrianglerighteq", "ntrianglerighteq", "nu", "nu", "num", "num", "numero", "numero", "numsp", "numsp", "nvDash", "nvDash", "nvHarr", "nvHarr", "nvdash", "nvdash", "nvinfin", "nvinfin", "nvlArr", "nvlArr", "nvrArr", "nvrArr", "nwArr", "nwArr", "nwarhk", "nwarhk", "nwarr", "nwarr", "nwarrow", "nwarrow", "nwnear", "nwnear", 8501 "oS", "oS", "oacute", "oacute", "oast", "oast", "ocir", "ocir", "ocirc", "ocirc", "ocy", "ocy", "odash", "odash", "odblac", "odblac", "odiv", "odiv", "odot", "odot", "odsold", "odsold", "oelig", "oelig", "ofcir", "ofcir", "ofr", "ofr", "ogon", "ogon", "ograve", "ograve", "ogt", "ogt", "ohbar", "ohbar", "ohm", "ohm", "oint", 8502 "oint", "olarr", "olarr", "olcir", "olcir", "olcross", "olcross", "oline", "oline", "olt", "olt", "omacr", "omacr", "omega", "omega", "omicron", "omicron", "omid", "omid", "ominus", "ominus", "oopf", "oopf", "opar", "opar", "operp", "operp", "oplus", "oplus", "or", "or", "orarr", "orarr", "ord", "ord", "order", "order", 8503 "orderof", "orderof", "ordf", "ordf", "ordm", "ordm", "origof", "origof", "oror", "oror", "orslope", "orslope", "orv", "orv", "oscr", "oscr", "oslash", "oslash", "osol", "osol", "otilde", "otilde", "otimes", "otimes", "otimesas", "otimesas", "ouml", "ouml", "ovbar", "ovbar", "par", "par", "para", "para", "parallel", "parallel", 8504 "parsim", "parsim", "parsl", "parsl", "part", "part", "pcy", "pcy", "percnt", "percnt", "period", "period", "permil", "permil", "perp", "perp", "pertenk", "pertenk", "pfr", "pfr", "phi", "phi", "phiv", "phiv", "phmmat", "phmmat", "phone", "phone", "pi", "pi", "pitchfork", "pitchfork", "piv", "piv", "planck", "planck", 8505 "planckh", "planckh", "plankv", "plankv", "plus", "plus", "plusacir", "plusacir", "plusb", "plusb", "pluscir", "pluscir", "plusdo", "plusdo", "plusdu", "plusdu", "pluse", "pluse", "plusmn", "plusmn", "plussim", "plussim", "plustwo", "plustwo", "pm", "pm", "pointint", "pointint", "popf", "popf", "pound", "pound", "pr", 8506 "pr", "prE", "prE", "prap", "prap", "prcue", "prcue", "pre", "pre", "prec", "prec", "precapprox", "precapprox", "preccurlyeq", "preccurlyeq", "preceq", "preceq", "precnapprox", "precnapprox", "precneqq", "precneqq", "precnsim", "precnsim", "precsim", "precsim", "prime", "prime", "primes", "primes", "prnE", "prnE", "prnap", 8507 "prnap", "prnsim", "prnsim", "prod", "prod", "profalar", "profalar", "profline", "profline", "profsurf", "profsurf", "prop", "prop", "propto", "propto", "prsim", "prsim", "prurel", "prurel", "pscr", "pscr", "psi", "psi", "puncsp", "puncsp", "qfr", "qfr", "qint", "qint", "qopf", "qopf", "qprime", "qprime", "qscr", "qscr", 8508 "quaternions", "quaternions", "quatint", "quatint", "quest", "quest", "questeq", "questeq", "rAarr", "rAarr", "rArr", "rArr", "rAtail", "rAtail", "rBarr", "rBarr", "rHar", "rHar", "racute", "racute", "radic", "radic", "raemptyv", "raemptyv", "rang", "rang", "rangd", "rangd", "range", "range", "rangle", "rangle", "raquo", 8509 "raquo", "rarr", "rarr", "rarrap", "rarrap", "rarrb", "rarrb", "rarrbfs", "rarrbfs", "rarrc", "rarrc", "rarrfs", "rarrfs", "rarrhk", "rarrhk", "rarrlp", "rarrlp", "rarrpl", "rarrpl", "rarrsim", "rarrsim", "rarrtl", "rarrtl", "rarrw", "rarrw", "ratail", "ratail", "ratio", "ratio", "rationals", "rationals", "rbarr", "rbarr", 8510 "rbbrk", "rbbrk", "rbrace", "rbrace", "rbrack", "rbrack", "rbrke", "rbrke", "rbrksld", "rbrksld", "rbrkslu", "rbrkslu", "rcaron", "rcaron", "rcedil", "rcedil", "rceil", "rceil", "rcub", "rcub", "rcy", "rcy", "rdca", "rdca", "rdldhar", "rdldhar", "rdquo", "rdquo", "rdquor", "rdquor", "rdsh", "rdsh", "real", "real", "realine", 8511 "realine", "realpart", "realpart", "reals", "reals", "rect", "rect", "reg", "reg", "rfisht", "rfisht", "rfloor", "rfloor", "rfr", "rfr", "rhard", "rhard", "rharu", "rharu", "rharul", "rharul", "rho", "rho", "rhov", "rhov", "rightarrow", "rightarrow", "rightarrowtail", "rightarrowtail", "rightharpoondown", "rightharpoondown", 8512 "rightharpoonup", "rightharpoonup", "rightleftarrows", "rightleftarrows", "rightleftharpoons", "rightleftharpoons", "rightrightarrows", "rightrightarrows", "rightsquigarrow", "rightsquigarrow", "rightthreetimes", "rightthreetimes", "ring", "ring", "risingdotseq", "risingdotseq", "rlarr", "rlarr", "rlhar", "rlhar", "rlm", 8513 "rlm", "rmoust", "rmoust", "rmoustache", "rmoustache", "rnmid", "rnmid", "roang", "roang", "roarr", "roarr", "robrk", "robrk", "ropar", "ropar", "ropf", "ropf", "roplus", "roplus", "rotimes", "rotimes", "rpar", "rpar", "rpargt", "rpargt", "rppolint", "rppolint", "rrarr", "rrarr", "rsaquo", "rsaquo", "rscr", "rscr", "rsh", 8514 "rsh", "rsqb", "rsqb", "rsquo", "rsquo", "rsquor", "rsquor", "rthree", "rthree", "rtimes", "rtimes", "rtri", "rtri", "rtrie", "rtrie", "rtrif", "rtrif", "rtriltri", "rtriltri", "ruluhar", "ruluhar", "rx", "rx", "sacute", "sacute", "sbquo", "sbquo", "sc", "sc", "scE", "scE", "scap", "scap", "scaron", "scaron", "sccue", 8515 "sccue", "sce", "sce", "scedil", "scedil", "scirc", "scirc", "scnE", "scnE", "scnap", "scnap", "scnsim", "scnsim", "scpolint", "scpolint", "scsim", "scsim", "scy", "scy", "sdot", "sdot", "sdotb", "sdotb", "sdote", "sdote", "seArr", "seArr", "searhk", "searhk", "searr", "searr", "searrow", "searrow", "sect", "sect", "semi", 8516 "semi", "seswar", "seswar", "setminus", "setminus", "setmn", "setmn", "sext", "sext", "sfr", "sfr", "sfrown", "sfrown", "sharp", "sharp", "shchcy", "shchcy", "shcy", "shcy", "shortmid", "shortmid", "shortparallel", "shortparallel", "shy", "shy", "sigma", "sigma", "sigmaf", "sigmaf", "sigmav", "sigmav", "sim", "sim", "simdot", 8517 "simdot", "sime", "sime", "simeq", "simeq", "simg", "simg", "simgE", "simgE", "siml", "siml", "simlE", "simlE", "simne", "simne", "simplus", "simplus", "simrarr", "simrarr", "slarr", "slarr", "smallsetminus", "smallsetminus", "smashp", "smashp", "smeparsl", "smeparsl", "smid", "smid", "smile", "smile", "smt", "smt", "smte", 8518 "smte", "softcy", "softcy", "sol", "sol", "solb", "solb", "solbar", "solbar", "sopf", "sopf", "spades", "spades", "spadesuit", "spadesuit", "spar", "spar", "sqcap", "sqcap", "sqcup", "sqcup", "sqsub", "sqsub", "sqsube", "sqsube", "sqsubset", "sqsubset", "sqsubseteq", "sqsubseteq", "sqsup", "sqsup", "sqsupe", "sqsupe", 8519 "sqsupset", "sqsupset", "sqsupseteq", "sqsupseteq", "squ", "squ", "square", "square", "squarf", "squarf", "squf", "squf", "srarr", "srarr", "sscr", "sscr", "ssetmn", "ssetmn", "ssmile", "ssmile", "sstarf", "sstarf", "star", "star", "starf", "starf", "straightepsilon", "straightepsilon", "straightphi", "straightphi", "strns", 8520 "strns", "sub", "sub", "subE", "subE", "subdot", "subdot", "sube", "sube", "subedot", "subedot", "submult", "submult", "subnE", "subnE", "subne", "subne", "subplus", "subplus", "subrarr", "subrarr", "subset", "subset", "subseteq", "subseteq", "subseteqq", "subseteqq", "subsetneq", "subsetneq", "subsetneqq", "subsetneqq", 8521 "subsim", "subsim", "subsub", "subsub", "subsup", "subsup", "succ", "succ", "succapprox", "succapprox", "succcurlyeq", "succcurlyeq", "succeq", "succeq", "succnapprox", "succnapprox", "succneqq", "succneqq", "succnsim", "succnsim", "succsim", "succsim", "sum", "sum", "sung", "sung", "sup", "sup", "sup1", "sup1", "sup2", 8522 "sup2", "sup3", "sup3", "supE", "supE", "supdot", "supdot", "supdsub", "supdsub", "supe", "supe", "supedot", "supedot", "suphsol", "suphsol", "suphsub", "suphsub", "suplarr", "suplarr", "supmult", "supmult", "supnE", "supnE", "supne", "supne", "supplus", "supplus", "supset", "supset", "supseteq", "supseteq", "supseteqq", 8523 "supseteqq", "supsetneq", "supsetneq", "supsetneqq", "supsetneqq", "supsim", "supsim", "supsub", "supsub", "supsup", "supsup", "swArr", "swArr", "swarhk", "swarhk", "swarr", "swarr", "swarrow", "swarrow", "swnwar", "swnwar", "szlig", "szlig", "target", "target", "tau", "tau", "tbrk", "tbrk", "tcaron", "tcaron", "tcedil", 8524 "tcedil", "tcy", "tcy", "tdot", "tdot", "telrec", "telrec", "tfr", "tfr", "there4", "there4", "therefore", "therefore", "theta", "theta", "thetasym", "thetasym", "thetav", "thetav", "thickapprox", "thickapprox", "thicksim", "thicksim", "thinsp", "thinsp", "thkap", "thkap", "thksim", "thksim", "thorn", "thorn", "tilde", 8525 "tilde", "times", "times", "timesb", "timesb", "timesbar", "timesbar", "timesd", "timesd", "tint", "tint", "toea", "toea", "top", "top", "topbot", "topbot", "topcir", "topcir", "topf", "topf", "topfork", "topfork", "tosa", "tosa", "tprime", "tprime", "trade", "trade", "triangle", "triangle", "triangledown", "triangledown", 8526 "triangleleft", "triangleleft", "trianglelefteq", "trianglelefteq", "triangleq", "triangleq", "triangleright", "triangleright", "trianglerighteq", "trianglerighteq", "tridot", "tridot", "trie", "trie", "triminus", "triminus", "triplus", "triplus", "trisb", "trisb", "tritime", "tritime", "trpezium", "trpezium", "tscr", 8527 "tscr", "tscy", "tscy", "tshcy", "tshcy", "tstrok", "tstrok", "twixt", "twixt", "twoheadleftarrow", "twoheadleftarrow", "twoheadrightarrow", "twoheadrightarrow", "uArr", "uArr", "uHar", "uHar", "uacute", "uacute", "uarr", "uarr", "ubrcy", "ubrcy", "ubreve", "ubreve", "ucirc", "ucirc", "ucy", "ucy", "udarr", "udarr", "udblac", 8528 "udblac", "udhar", "udhar", "ufisht", "ufisht", "ufr", "ufr", "ugrave", "ugrave", "uharl", "uharl", "uharr", "uharr", "uhblk", "uhblk", "ulcorn", "ulcorn", "ulcorner", "ulcorner", "ulcrop", "ulcrop", "ultri", "ultri", "umacr", "umacr", "uml", "uml", "uogon", "uogon", "uopf", "uopf", "uparrow", "uparrow", "updownarrow", 8529 "updownarrow", "upharpoonleft", "upharpoonleft", "upharpoonright", "upharpoonright", "uplus", "uplus", "upsi", "upsi", "upsih", "upsih", "upsilon", "upsilon", "upuparrows", "upuparrows", "urcorn", "urcorn", "urcorner", "urcorner", "urcrop", "urcrop", "uring", "uring", "urtri", "urtri", "uscr", "uscr", "utdot", "utdot", 8530 "utilde", "utilde", "utri", "utri", "utrif", "utrif", "uuarr", "uuarr", "uuml", "uuml", "uwangle", "uwangle", "vArr", "vArr", "vBar", "vBar", "vBarv", "vBarv", "vDash", "vDash", "vangrt", "vangrt", "varepsilon", "varepsilon", "varkappa", "varkappa", "varnothing", "varnothing", "varphi", "varphi", "varpi", "varpi", "varpropto", 8531 "varpropto", "varr", "varr", "varrho", "varrho", "varsigma", "varsigma", "vartheta", "vartheta", "vartriangleleft", "vartriangleleft", "vartriangleright", "vartriangleright", "vcy", "vcy", "vdash", "vdash", "vee", "vee", "veebar", "veebar", "veeeq", "veeeq", "vellip", "vellip", "verbar", "verbar", "vert", "vert", "vfr", 8532 "vfr", "vltri", "vltri", "vopf", "vopf", "vprop", "vprop", "vrtri", "vrtri", "vscr", "vscr", "vzigzag", "vzigzag", "wcirc", "wcirc", "wedbar", "wedbar", "wedge", "wedge", "wedgeq", "wedgeq", "weierp", "weierp", "wfr", "wfr", "wopf", "wopf", "wp", "wp", "wr", "wr", "wreath", "wreath", "wscr", "wscr", "xcap", "xcap", "xcirc", 8533 "xcirc", "xcup", "xcup", "xdtri", "xdtri", "xfr", "xfr", "xhArr", "xhArr", "xharr", "xharr", "xi", "xi", "xlArr", "xlArr", "xlarr", "xlarr", "xmap", "xmap", "xnis", "xnis", "xodot", "xodot", "xopf", "xopf", "xoplus", "xoplus", "xotime", "xotime", "xrArr", "xrArr", "xrarr", "xrarr", "xscr", "xscr", "xsqcup", "xsqcup", "xuplus", 8534 "xuplus", "xutri", "xutri", "xvee", "xvee", "xwedge", "xwedge", "yacute", "yacute", "yacy", "yacy", "ycirc", "ycirc", "ycy", "ycy", "yen", "yen", "yfr", "yfr", "yicy", "yicy", "yopf", "yopf", "yscr", "yscr", "yucy", "yucy", "yuml", "yuml", "zacute", "zacute", "zcaron", "zcaron", "zcy", "zcy", "zdot", "zdot", "zeetrf", 8535 "zeetrf", "zeta", "zeta", "zfr", "zfr", "zhcy", "zhcy", "zigrarr", "zigrarr", "zopf", "zopf", "zscr", "zscr", "zwj", "zwj", "zwnj", "zwnj", ]; 8536 8537 immutable dchar[] availableEntitiesValues = 8538 ['\u00c6', '\u00c6', '\u0026', '\u0026', '\u00c1', '\u00c1', '\u0102', '\u0102', '\u00c2', '\u00c2', '\u0410', '\u0410', '\U0001d504', '\U0001d504', '\u00c0', '\u00c0', '\u0391', '\u0391', '\u0100', '\u0100', '\u2a53', '\u2a53', '\u0104', '\u0104', '\U0001d538', '\U0001d538', '\u2061', '\u2061', '\u00c5', '\u00c5', '\U0001d49c', '\U0001d49c', '\u2254', '\u2254', '\u00c3', 8539 '\u00c3', '\u00c4', '\u00c4', '\u2216', '\u2216', '\u2ae7', '\u2ae7', '\u2306', '\u2306', '\u0411', '\u0411', '\u2235', '\u2235', '\u212c', '\u212c', '\u0392', '\u0392', '\U0001d505', '\U0001d505', '\U0001d539', '\U0001d539', '\u02d8', '\u02d8', '\u212c', '\u212c', '\u224e', '\u224e', '\u0427', '\u0427', '\u00a9', '\u00a9', '\u0106', '\u0106', '\u22d2', '\u22d2', '\u2145', 8540 '\u2145', '\u212d', '\u212d', '\u010c', '\u010c', '\u00c7', '\u00c7', '\u0108', '\u0108', '\u2230', '\u2230', '\u010a', '\u010a', '\u00b8', '\u00b8', '\u00b7', '\u00b7', '\u212d', '\u212d', '\u03a7', '\u03a7', '\u2299', '\u2299', '\u2296', '\u2296', '\u2295', '\u2295', '\u2297', '\u2297', 8541 '\u2232', '\u2232', '\u201d', '\u201d', '\u2019', '\u2019', '\u2237', '\u2237', '\u2a74', '\u2a74', '\u2261', '\u2261', '\u222f', '\u222f', '\u222e', '\u222e', '\u2102', '\u2102', '\u2210', '\u2210', '\u2233', 8542 '\u2233', '\u2a2f', '\u2a2f', '\U0001d49e', '\U0001d49e', '\u22d3', '\u22d3', '\u224d', '\u224d', '\u2145', '\u2145', '\u2911', '\u2911', '\u0402', '\u0402', '\u0405', '\u0405', '\u040f', '\u040f', '\u2021', '\u2021', '\u21a1', '\u21a1', '\u2ae4', '\u2ae4', '\u010e', '\u010e', '\u0414', '\u0414', '\u2207', '\u2207', '\u0394', '\u0394', '\U0001d507', '\U0001d507', 8543 '\u00b4', '\u00b4', '\u02d9', '\u02d9', '\u02dd', '\u02dd', '\u0060', '\u0060', '\u02dc', '\u02dc', '\u22c4', '\u22c4', '\u2146', '\u2146', '\U0001d53b', '\U0001d53b', '\u00a8', '\u00a8', '\u20dc', '\u20dc', '\u2250', 8544 '\u2250', '\u222f', '\u222f', '\u00a8', '\u00a8', '\u21d3', '\u21d3', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u2ae4', '\u2ae4', '\u27f8', '\u27f8', '\u27fa', 8545 '\u27fa', '\u27f9', '\u27f9', '\u21d2', '\u21d2', '\u22a8', '\u22a8', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2225', '\u2225', '\u2193', '\u2193', '\u2913', '\u2913', 8546 '\u21f5', '\u21f5', '\u0311', '\u0311', '\u2950', '\u2950', '\u295e', '\u295e', '\u21bd', '\u21bd', '\u2956', '\u2956', '\u295f', '\u295f', '\u21c1', '\u21c1', '\u2957', 8547 '\u2957', '\u22a4', '\u22a4', '\u21a7', '\u21a7', '\u21d3', '\u21d3', '\U0001d49f', '\U0001d49f', '\u0110', '\u0110', '\u014a', '\u014a', '\u00d0', '\u00d0', '\u00c9', '\u00c9', '\u011a', '\u011a', '\u00ca', '\u00ca', '\u042d', '\u042d', '\u0116', '\u0116', '\U0001d508', '\U0001d508', '\u00c8', '\u00c8', '\u2208', '\u2208', '\u0112', '\u0112', 8548 '\u25fb', '\u25fb', '\u25ab', '\u25ab', '\u0118', '\u0118', '\U0001d53c', '\U0001d53c', '\u0395', '\u0395', '\u2a75', '\u2a75', '\u2242', '\u2242', '\u21cc', '\u21cc', '\u2130', '\u2130', '\u2a73', '\u2a73', '\u0397', '\u0397', '\u00cb', '\u00cb', '\u2203', '\u2203', '\u2147', '\u2147', 8549 '\u0424', '\u0424', '\U0001d509', '\U0001d509', '\u25fc', '\u25fc', '\u25aa', '\u25aa', '\U0001d53d', '\U0001d53d', '\u2200', '\u2200', '\u2131', '\u2131', '\u2131', '\u2131', '\u0403', '\u0403', '\u003e', '\u003e', '\u0393', '\u0393', '\u03dc', '\u03dc', '\u011e', '\u011e', '\u0122', '\u0122', '\u011c', '\u011c', 8550 '\u0413', '\u0413', '\u0120', '\u0120', '\U0001d50a', '\U0001d50a', '\u22d9', '\u22d9', '\U0001d53e', '\U0001d53e', '\u2265', '\u2265', '\u22db', '\u22db', '\u2267', '\u2267', '\u2aa2', '\u2aa2', '\u2277', '\u2277', '\u2a7e', '\u2a7e', '\u2273', '\u2273', 8551 '\U0001d4a2', '\U0001d4a2', '\u226b', '\u226b', '\u042a', '\u042a', '\u02c7', '\u02c7', '\u005e', '\u005e', '\u0124', '\u0124', '\u210c', '\u210c', '\u210b', '\u210b', '\u210d', '\u210d', '\u2500', '\u2500', '\u210b', '\u210b', '\u0126', '\u0126', '\u224e', '\u224e', '\u224f', '\u224f', '\u0415', '\u0415', '\u0132', '\u0132', 8552 '\u0401', '\u0401', '\u00cd', '\u00cd', '\u00ce', '\u00ce', '\u0418', '\u0418', '\u0130', '\u0130', '\u2111', '\u2111', '\u00cc', '\u00cc', '\u2111', '\u2111', '\u012a', '\u012a', '\u2148', '\u2148', '\u21d2', '\u21d2', '\u222c', '\u222c', '\u222b', '\u222b', '\u22c2', '\u22c2', '\u2063', '\u2063', '\u2062', 8553 '\u2062', '\u012e', '\u012e', '\U0001d540', '\U0001d540', '\u0399', '\u0399', '\u2110', '\u2110', '\u0128', '\u0128', '\u0406', '\u0406', '\u00cf', '\u00cf', '\u0134', '\u0134', '\u0419', '\u0419', '\U0001d50d', '\U0001d50d', '\U0001d541', '\U0001d541', '\U0001d4a5', '\U0001d4a5', '\u0408', '\u0408', '\u0404', '\u0404', '\u0425', '\u0425', '\u040c', '\u040c', '\u039a', '\u039a', '\u0136', '\u0136', 8554 '\u041a', '\u041a', '\U0001d50e', '\U0001d50e', '\U0001d542', '\U0001d542', '\U0001d4a6', '\U0001d4a6', '\u0409', '\u0409', '\u003c', '\u003c', '\u0139', '\u0139', '\u039b', '\u039b', '\u27ea', '\u27ea', '\u2112', '\u2112', '\u219e', '\u219e', '\u013d', '\u013d', '\u013b', '\u013b', '\u041b', '\u041b', '\u27e8', '\u27e8', '\u2190', '\u2190', '\u21e4', 8555 '\u21e4', '\u21c6', '\u21c6', '\u2308', '\u2308', '\u27e6', '\u27e6', '\u2961', '\u2961', '\u21c3', '\u21c3', '\u2959', '\u2959', '\u230a', '\u230a', '\u2194', '\u2194', '\u294e', 8556 '\u294e', '\u22a3', '\u22a3', '\u21a4', '\u21a4', '\u295a', '\u295a', '\u22b2', '\u22b2', '\u29cf', '\u29cf', '\u22b4', '\u22b4', '\u2951', '\u2951', '\u2960', '\u2960', '\u21bf', '\u21bf', 8557 '\u2958', '\u2958', '\u21bc', '\u21bc', '\u2952', '\u2952', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u22da', '\u22da', '\u2266', '\u2266', '\u2276', '\u2276', '\u2aa1', '\u2aa1', '\u2a7d', '\u2a7d', 8558 '\u2272', '\u2272', '\U0001d50f', '\U0001d50f', '\u22d8', '\u22d8', '\u21da', '\u21da', '\u013f', '\u013f', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27f6', '\u27f6', '\u27f8', '\u27f8', '\u27fa', '\u27fa', '\u27f9', '\u27f9', 8559 '\U0001d543', '\U0001d543', '\u2199', '\u2199', '\u2198', '\u2198', '\u2112', '\u2112', '\u21b0', '\u21b0', '\u0141', '\u0141', '\u226a', '\u226a', '\u2905', '\u2905', '\u041c', '\u041c', '\u205f', '\u205f', '\u2133', '\u2133', '\U0001d510', '\U0001d510', '\u2213', '\u2213', '\U0001d544', '\U0001d544', '\u2133', '\u2133', '\u039c', '\u039c', 8560 '\u040a', '\u040a', '\u0143', '\u0143', '\u0147', '\u0147', '\u0145', '\u0145', '\u041d', '\u041d', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u226b', '\u226b', 8561 '\u226a', '\u226a', '\u000a', '\u000a', '\U0001d511', '\U0001d511', '\u2060', '\u2060', '\u00a0', '\u00a0', '\u2115', '\u2115', '\u2aec', '\u2aec', '\u2262', '\u2262', '\u226d', '\u226d', '\u2226', '\u2226', '\u2209', '\u2209', '\u2260', '\u2260', 8562 '\u2204', '\u2204', '\u226f', '\u226f', '\u2271', '\u2271', '\u2279', '\u2279', '\u2275', '\u2275', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u226e', '\u226e', '\u2270', '\u2270', '\u2278', 8563 '\u2278', '\u2274', '\u2274', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u220c', '\u220c', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u22e2', '\u22e2', '\u22e3', 8564 '\u22e3', '\u2288', '\u2288', '\u2281', '\u2281', '\u22e1', '\u22e1', '\u2289', '\u2289', '\u2241', '\u2241', '\u2244', '\u2244', '\u2247', '\u2247', '\u2249', '\u2249', '\u2224', 8565 '\u2224', '\U0001d4a9', '\U0001d4a9', '\u00d1', '\u00d1', '\u039d', '\u039d', '\u0152', '\u0152', '\u00d3', '\u00d3', '\u00d4', '\u00d4', '\u041e', '\u041e', '\u0150', '\u0150', '\U0001d512', '\U0001d512', '\u00d2', '\u00d2', '\u014c', '\u014c', '\u03a9', '\u03a9', '\u039f', '\u039f', '\U0001d546', '\U0001d546', '\u201c', '\u201c', '\u2018', 8566 '\u2018', '\u2a54', '\u2a54', '\U0001d4aa', '\U0001d4aa', '\u00d8', '\u00d8', '\u00d5', '\u00d5', '\u2a37', '\u2a37', '\u00d6', '\u00d6', '\u203e', '\u203e', '\u23de', '\u23de', '\u23b4', '\u23b4', '\u23dc', '\u23dc', '\u2202', '\u2202', '\u041f', '\u041f', '\U0001d513', '\U0001d513', '\u03a6', '\u03a6', '\u03a0', '\u03a0', '\u00b1', 8567 '\u00b1', '\u210c', '\u210c', '\u2119', '\u2119', '\u2abb', '\u2abb', '\u227a', '\u227a', '\u2aaf', '\u2aaf', '\u227c', '\u227c', '\u227e', '\u227e', '\u2033', '\u2033', '\u220f', '\u220f', '\u2237', '\u2237', '\u221d', '\u221d', '\U0001d4ab', '\U0001d4ab', 8568 '\u03a8', '\u03a8', '\u0022', '\u0022', '\U0001d514', '\U0001d514', '\u211a', '\u211a', '\U0001d4ac', '\U0001d4ac', '\u2910', '\u2910', '\u00ae', '\u00ae', '\u0154', '\u0154', '\u27eb', '\u27eb', '\u21a0', '\u21a0', '\u2916', '\u2916', '\u0158', '\u0158', '\u0156', '\u0156', '\u0420', '\u0420', '\u211c', '\u211c', '\u220b', '\u220b', '\u21cb', '\u21cb', 8569 '\u296f', '\u296f', '\u211c', '\u211c', '\u03a1', '\u03a1', '\u27e9', '\u27e9', '\u2192', '\u2192', '\u21e5', '\u21e5', '\u21c4', '\u21c4', '\u2309', '\u2309', '\u27e7', '\u27e7', '\u295d', 8570 '\u295d', '\u21c2', '\u21c2', '\u2955', '\u2955', '\u230b', '\u230b', '\u22a2', '\u22a2', '\u21a6', '\u21a6', '\u295b', '\u295b', '\u22b3', '\u22b3', '\u29d0', '\u29d0', '\u22b5', 8571 '\u22b5', '\u294f', '\u294f', '\u295c', '\u295c', '\u21be', '\u21be', '\u2954', '\u2954', '\u21c0', '\u21c0', '\u2953', '\u2953', '\u21d2', '\u21d2', '\u211d', '\u211d', '\u2970', '\u2970', 8572 '\u21db', '\u21db', '\u211b', '\u211b', '\u21b1', '\u21b1', '\u29f4', '\u29f4', '\u0429', '\u0429', '\u0428', '\u0428', '\u042c', '\u042c', '\u015a', '\u015a', '\u2abc', '\u2abc', '\u0160', '\u0160', '\u015e', '\u015e', '\u015c', '\u015c', '\u0421', '\u0421', '\U0001d516', '\U0001d516', '\u2193', '\u2193', '\u2190', '\u2190', 8573 '\u2192', '\u2192', '\u2191', '\u2191', '\u03a3', '\u03a3', '\u2218', '\u2218', '\U0001d54a', '\U0001d54a', '\u221a', '\u221a', '\u25a1', '\u25a1', '\u2293', '\u2293', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', 8574 '\u2292', '\u2292', '\u2294', '\u2294', '\U0001d4ae', '\U0001d4ae', '\u22c6', '\u22c6', '\u22d0', '\u22d0', '\u22d0', '\u22d0', '\u2286', '\u2286', '\u227b', '\u227b', '\u2ab0', '\u2ab0', '\u227d', '\u227d', '\u227f', '\u227f', '\u220b', 8575 '\u220b', '\u2211', '\u2211', '\u22d1', '\u22d1', '\u2283', '\u2283', '\u2287', '\u2287', '\u22d1', '\u22d1', '\u00de', '\u00de', '\u2122', '\u2122', '\u040b', '\u040b', '\u0426', '\u0426', '\u0009', '\u0009', '\u03a4', '\u03a4', '\u0164', '\u0164', '\u0162', '\u0162', '\u0422', '\u0422', '\U0001d517', '\U0001d517', '\u2234', '\u2234', '\u0398', '\u0398', 8576 '\u2009', '\u2009', '\u223c', '\u223c', '\u2243', '\u2243', '\u2245', '\u2245', '\u2248', '\u2248', '\U0001d54b', '\U0001d54b', '\u20db', '\u20db', '\U0001d4af', '\U0001d4af', '\u0166', '\u0166', '\u00da', '\u00da', '\u219f', '\u219f', '\u2949', '\u2949', '\u040e', '\u040e', '\u016c', '\u016c', '\u00db', 8577 '\u00db', '\u0423', '\u0423', '\u0170', '\u0170', '\U0001d518', '\U0001d518', '\u00d9', '\u00d9', '\u016a', '\u016a', '\u005f', '\u005f', '\u23df', '\u23df', '\u23b5', '\u23b5', '\u23dd', '\u23dd', '\u22c3', '\u22c3', '\u228e', '\u228e', '\u0172', '\u0172', '\U0001d54c', '\U0001d54c', '\u2191', '\u2191', '\u2912', 8578 '\u2912', '\u21c5', '\u21c5', '\u2195', '\u2195', '\u296e', '\u296e', '\u22a5', '\u22a5', '\u21a5', '\u21a5', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2196', '\u2196', '\u2197', '\u2197', '\u03d2', '\u03d2', '\u03a5', '\u03a5', 8579 '\u016e', '\u016e', '\U0001d4b0', '\U0001d4b0', '\u0168', '\u0168', '\u00dc', '\u00dc', '\u22ab', '\u22ab', '\u2aeb', '\u2aeb', '\u0412', '\u0412', '\u22a9', '\u22a9', '\u2ae6', '\u2ae6', '\u22c1', '\u22c1', '\u2016', '\u2016', '\u2016', '\u2016', '\u2223', '\u2223', '\u007c', '\u007c', '\u2758', '\u2758', '\u2240', 8580 '\u2240', '\u200a', '\u200a', '\U0001d519', '\U0001d519', '\U0001d54d', '\U0001d54d', '\U0001d4b1', '\U0001d4b1', '\u22aa', '\u22aa', '\u0174', '\u0174', '\u22c0', '\u22c0', '\U0001d51a', '\U0001d51a', '\U0001d54e', '\U0001d54e', '\U0001d4b2', '\U0001d4b2', '\U0001d51b', '\U0001d51b', '\u039e', '\u039e', '\U0001d54f', '\U0001d54f', '\U0001d4b3', '\U0001d4b3', '\u042f', '\u042f', '\u0407', '\u0407', '\u042e', '\u042e', '\u00dd', '\u00dd', 8581 '\u0176', '\u0176', '\u042b', '\u042b', '\U0001d51c', '\U0001d51c', '\U0001d550', '\U0001d550', '\U0001d4b4', '\U0001d4b4', '\u0178', '\u0178', '\u0416', '\u0416', '\u0179', '\u0179', '\u017d', '\u017d', '\u0417', '\u0417', '\u017b', '\u017b', '\u200b', '\u200b', '\u0396', '\u0396', '\u2128', '\u2128', '\u2124', '\u2124', '\U0001d4b5', '\U0001d4b5', '\u00e1', '\u00e1', '\u0103', '\u0103', '\u223e', 8582 '\u223e', '\u223f', '\u223f', '\u00e2', '\u00e2', '\u00b4', '\u00b4', '\u0430', '\u0430', '\u00e6', '\u00e6', '\u2061', '\u2061', '\U0001d51e', '\U0001d51e', '\u00e0', '\u00e0', '\u2135', '\u2135', '\u2135', '\u2135', '\u03b1', '\u03b1', '\u0101', '\u0101', '\u2a3f', '\u2a3f', '\u2227', '\u2227', '\u2a55', '\u2a55', '\u2a5c', '\u2a5c', '\u2a58', '\u2a58', '\u2a5a', '\u2a5a', '\u2220', 8583 '\u2220', '\u29a4', '\u29a4', '\u2220', '\u2220', '\u2221', '\u2221', '\u29a8', '\u29a8', '\u29a9', '\u29a9', '\u29aa', '\u29aa', '\u29ab', '\u29ab', '\u29ac', '\u29ac', '\u29ad', '\u29ad', '\u29ae', '\u29ae', '\u29af', '\u29af', '\u221f', '\u221f', '\u22be', '\u22be', '\u299d', '\u299d', '\u2222', 8584 '\u2222', '\u00c5', '\u00c5', '\u237c', '\u237c', '\u0105', '\u0105', '\U0001d552', '\U0001d552', '\u2248', '\u2248', '\u2a70', '\u2a70', '\u2a6f', '\u2a6f', '\u224a', '\u224a', '\u224b', '\u224b', '\u2248', '\u2248', '\u224a', '\u224a', '\u00e5', '\u00e5', '\U0001d4b6', '\U0001d4b6', '\u002a', '\u002a', '\u2248', '\u2248', '\u224d', '\u224d', '\u00e3', '\u00e3', '\u00e4', 8585 '\u00e4', '\u2233', '\u2233', '\u2a11', '\u2a11', '\u2aed', '\u2aed', '\u224c', '\u224c', '\u03f6', '\u03f6', '\u2035', '\u2035', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u22bd', '\u22bd', '\u2305', '\u2305', '\u2305', '\u2305', '\u23b5', '\u23b5', '\u23b6', '\u23b6', '\u224c', '\u224c', '\u0431', 8586 '\u0431', '\u201e', '\u201e', '\u2235', '\u2235', '\u2235', '\u2235', '\u29b0', '\u29b0', '\u03f6', '\u03f6', '\u212c', '\u212c', '\u03b2', '\u03b2', '\u2136', '\u2136', '\u226c', '\u226c', '\U0001d51f', '\U0001d51f', '\u22c2', '\u22c2', '\u25ef', '\u25ef', '\u22c3', '\u22c3', '\u2a00', '\u2a00', '\u2a01', '\u2a01', '\u2a02', '\u2a02', 8587 '\u2a06', '\u2a06', '\u2605', '\u2605', '\u25bd', '\u25bd', '\u25b3', '\u25b3', '\u2a04', '\u2a04', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u290d', '\u290d', '\u29eb', '\u29eb', '\u25aa', '\u25aa', '\u25b4', '\u25b4', '\u25be', 8588 '\u25be', '\u25c2', '\u25c2', '\u25b8', '\u25b8', '\u2423', '\u2423', '\u2592', '\u2592', '\u2591', '\u2591', '\u2593', '\u2593', '\u2588', '\u2588', '\u2310', '\u2310', '\U0001d553', '\U0001d553', '\u22a5', '\u22a5', '\u22a5', '\u22a5', '\u22c8', '\u22c8', '\u2557', '\u2557', '\u2554', '\u2554', '\u2556', 8589 '\u2556', '\u2553', '\u2553', '\u2550', '\u2550', '\u2566', '\u2566', '\u2569', '\u2569', '\u2564', '\u2564', '\u2567', '\u2567', '\u255d', '\u255d', '\u255a', '\u255a', '\u255c', '\u255c', '\u2559', '\u2559', '\u2551', '\u2551', '\u256c', '\u256c', '\u2563', '\u2563', '\u2560', '\u2560', '\u256b', '\u256b', '\u2562', '\u2562', '\u255f', '\u255f', '\u29c9', 8590 '\u29c9', '\u2555', '\u2555', '\u2552', '\u2552', '\u2510', '\u2510', '\u250c', '\u250c', '\u2500', '\u2500', '\u2565', '\u2565', '\u2568', '\u2568', '\u252c', '\u252c', '\u2534', '\u2534', '\u229f', '\u229f', '\u229e', '\u229e', '\u22a0', '\u22a0', '\u255b', '\u255b', '\u2558', '\u2558', '\u2518', '\u2518', '\u2514', '\u2514', '\u2502', 8591 '\u2502', '\u256a', '\u256a', '\u2561', '\u2561', '\u255e', '\u255e', '\u253c', '\u253c', '\u2524', '\u2524', '\u251c', '\u251c', '\u2035', '\u2035', '\u02d8', '\u02d8', '\u00a6', '\u00a6', '\U0001d4b7', '\U0001d4b7', '\u204f', '\u204f', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u005c', '\u005c', '\u29c5', '\u29c5', '\u27c8', '\u27c8', '\u2022', '\u2022', '\u2022', 8592 '\u2022', '\u224e', '\u224e', '\u2aae', '\u2aae', '\u224f', '\u224f', '\u224f', '\u224f', '\u0107', '\u0107', '\u2229', '\u2229', '\u2a44', '\u2a44', '\u2a49', '\u2a49', '\u2a4b', '\u2a4b', '\u2a47', '\u2a47', '\u2a40', '\u2a40', '\u2041', '\u2041', '\u02c7', '\u02c7', '\u2a4d', '\u2a4d', '\u010d', '\u010d', '\u00e7', '\u00e7', '\u0109', 8593 '\u0109', '\u2a4c', '\u2a4c', '\u2a50', '\u2a50', '\u010b', '\u010b', '\u00b8', '\u00b8', '\u29b2', '\u29b2', '\u00a2', '\u00a2', '\u00b7', '\u00b7', '\U0001d520', '\U0001d520', '\u0447', '\u0447', '\u2713', '\u2713', '\u2713', '\u2713', '\u03c7', '\u03c7', '\u25cb', '\u25cb', '\u29c3', '\u29c3', '\u02c6', '\u02c6', '\u2257', '\u2257', '\u21ba', 8594 '\u21ba', '\u21bb', '\u21bb', '\u00ae', '\u00ae', '\u24c8', '\u24c8', '\u229b', '\u229b', '\u229a', '\u229a', '\u229d', '\u229d', '\u2257', '\u2257', '\u2a10', '\u2a10', '\u2aef', '\u2aef', '\u29c2', '\u29c2', '\u2663', '\u2663', '\u2663', '\u2663', '\u003a', 8595 '\u003a', '\u2254', '\u2254', '\u2254', '\u2254', '\u002c', '\u002c', '\u0040', '\u0040', '\u2201', '\u2201', '\u2218', '\u2218', '\u2201', '\u2201', '\u2102', '\u2102', '\u2245', '\u2245', '\u2a6d', '\u2a6d', '\u222e', '\u222e', '\U0001d554', '\U0001d554', '\u2210', '\u2210', '\u00a9', '\u00a9', '\u2117', '\u2117', '\u21b5', '\u21b5', 8596 '\u2717', '\u2717', '\U0001d4b8', '\U0001d4b8', '\u2acf', '\u2acf', '\u2ad1', '\u2ad1', '\u2ad0', '\u2ad0', '\u2ad2', '\u2ad2', '\u22ef', '\u22ef', '\u2938', '\u2938', '\u2935', '\u2935', '\u22de', '\u22de', '\u22df', '\u22df', '\u21b6', '\u21b6', '\u293d', '\u293d', '\u222a', '\u222a', '\u2a48', '\u2a48', '\u2a46', '\u2a46', '\u2a4a', '\u2a4a', 8597 '\u228d', '\u228d', '\u2a45', '\u2a45', '\u21b7', '\u21b7', '\u293c', '\u293c', '\u22de', '\u22de', '\u22df', '\u22df', '\u22ce', '\u22ce', '\u22cf', '\u22cf', '\u00a4', '\u00a4', '\u21b6', '\u21b6', '\u21b7', '\u21b7', '\u22ce', '\u22ce', '\u22cf', '\u22cf', 8598 '\u2232', '\u2232', '\u2231', '\u2231', '\u232d', '\u232d', '\u21d3', '\u21d3', '\u2965', '\u2965', '\u2020', '\u2020', '\u2138', '\u2138', '\u2193', '\u2193', '\u2010', '\u2010', '\u22a3', '\u22a3', '\u290f', '\u290f', '\u02dd', '\u02dd', '\u010f', '\u010f', '\u0434', '\u0434', '\u2146', '\u2146', '\u2021', '\u2021', '\u21ca', '\u21ca', '\u2a77', 8599 '\u2a77', '\u00b0', '\u00b0', '\u03b4', '\u03b4', '\u29b1', '\u29b1', '\u297f', '\u297f', '\U0001d521', '\U0001d521', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u22c4', '\u22c4', '\u22c4', '\u22c4', '\u2666', '\u2666', '\u2666', '\u2666', '\u00a8', '\u00a8', '\u03dd', '\u03dd', '\u22f2', '\u22f2', '\u00f7', '\u00f7', '\u00f7', '\u00f7', '\u22c7', 8600 '\u22c7', '\u22c7', '\u22c7', '\u0452', '\u0452', '\u231e', '\u231e', '\u230d', '\u230d', '\u0024', '\u0024', '\U0001d555', '\U0001d555', '\u02d9', '\u02d9', '\u2250', '\u2250', '\u2251', '\u2251', '\u2238', '\u2238', '\u2214', '\u2214', '\u22a1', '\u22a1', '\u2306', '\u2306', '\u2193', '\u2193', '\u21ca', 8601 '\u21ca', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u2910', '\u2910', '\u231f', '\u231f', '\u230c', '\u230c', '\U0001d4b9', '\U0001d4b9', '\u0455', '\u0455', '\u29f6', '\u29f6', '\u0111', '\u0111', '\u22f1', '\u22f1', '\u25bf', '\u25bf', '\u25be', '\u25be', '\u21f5', '\u21f5', '\u296f', '\u296f', '\u29a6', 8602 '\u29a6', '\u045f', '\u045f', '\u27ff', '\u27ff', '\u2a77', '\u2a77', '\u2251', '\u2251', '\u00e9', '\u00e9', '\u2a6e', '\u2a6e', '\u011b', '\u011b', '\u2256', '\u2256', '\u00ea', '\u00ea', '\u2255', '\u2255', '\u044d', '\u044d', '\u0117', '\u0117', '\u2147', '\u2147', '\u2252', '\u2252', '\U0001d522', '\U0001d522', '\u2a9a', '\u2a9a', '\u00e8', '\u00e8', '\u2a96', '\u2a96', '\u2a98', 8603 '\u2a98', '\u2a99', '\u2a99', '\u23e7', '\u23e7', '\u2113', '\u2113', '\u2a95', '\u2a95', '\u2a97', '\u2a97', '\u0113', '\u0113', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2003', '\u2003', '\u2004', '\u2004', '\u2005', '\u2005', '\u014b', '\u014b', '\u2002', '\u2002', '\u0119', '\u0119', '\U0001d556', '\U0001d556', '\u22d5', '\u22d5', '\u29e3', 8604 '\u29e3', '\u2a71', '\u2a71', '\u03b5', '\u03b5', '\u03b5', '\u03b5', '\u03f5', '\u03f5', '\u2256', '\u2256', '\u2255', '\u2255', '\u2242', '\u2242', '\u2a96', '\u2a96', '\u2a95', '\u2a95', '\u003d', '\u003d', '\u225f', '\u225f', '\u2261', '\u2261', '\u2a78', '\u2a78', '\u29e5', '\u29e5', '\u2253', '\u2253', 8605 '\u2971', '\u2971', '\u212f', '\u212f', '\u2250', '\u2250', '\u2242', '\u2242', '\u03b7', '\u03b7', '\u00f0', '\u00f0', '\u00eb', '\u00eb', '\u20ac', '\u20ac', '\u0021', '\u0021', '\u2203', '\u2203', '\u2130', '\u2130', '\u2147', '\u2147', '\u2252', '\u2252', '\u0444', '\u0444', '\u2640', '\u2640', '\ufb03', '\ufb03', '\ufb00', 8606 '\ufb00', '\ufb04', '\ufb04', '\U0001d523', '\U0001d523', '\ufb01', '\ufb01', '\u266d', '\u266d', '\ufb02', '\ufb02', '\u25b1', '\u25b1', '\u0192', '\u0192', '\U0001d557', '\U0001d557', '\u2200', '\u2200', '\u22d4', '\u22d4', '\u2ad9', '\u2ad9', '\u2a0d', '\u2a0d', '\u00bd', '\u00bd', '\u2153', '\u2153', '\u00bc', '\u00bc', '\u2155', '\u2155', '\u2159', '\u2159', 8607 '\u215b', '\u215b', '\u2154', '\u2154', '\u2156', '\u2156', '\u00be', '\u00be', '\u2157', '\u2157', '\u215c', '\u215c', '\u2158', '\u2158', '\u215a', '\u215a', '\u215d', '\u215d', '\u215e', '\u215e', '\u2044', '\u2044', '\u2322', '\u2322', '\U0001d4bb', '\U0001d4bb', '\u2267', '\u2267', '\u2a8c', '\u2a8c', '\u01f5', '\u01f5', '\u03b3', '\u03b3', '\u03dd', 8608 '\u03dd', '\u2a86', '\u2a86', '\u011f', '\u011f', '\u011d', '\u011d', '\u0433', '\u0433', '\u0121', '\u0121', '\u2265', '\u2265', '\u22db', '\u22db', '\u2265', '\u2265', '\u2267', '\u2267', '\u2a7e', '\u2a7e', '\u2a7e', '\u2a7e', '\u2aa9', '\u2aa9', '\u2a80', '\u2a80', '\u2a82', '\u2a82', '\u2a84', '\u2a84', '\u2a94', '\u2a94', '\U0001d524', '\U0001d524', '\u226b', '\u226b', '\u22d9', 8609 '\u22d9', '\u2137', '\u2137', '\u0453', '\u0453', '\u2277', '\u2277', '\u2a92', '\u2a92', '\u2aa5', '\u2aa5', '\u2aa4', '\u2aa4', '\u2269', '\u2269', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a88', '\u2a88', '\u2a88', '\u2a88', '\u2269', '\u2269', '\u22e7', '\u22e7', '\U0001d558', '\U0001d558', '\u0060', '\u0060', '\u210a', '\u210a', '\u2273', '\u2273', '\u2a8e', '\u2a8e', '\u2a90', '\u2a90', '\u2aa7', 8610 '\u2aa7', '\u2a7a', '\u2a7a', '\u22d7', '\u22d7', '\u2995', '\u2995', '\u2a7c', '\u2a7c', '\u2a86', '\u2a86', '\u2978', '\u2978', '\u22d7', '\u22d7', '\u22db', '\u22db', '\u2a8c', '\u2a8c', '\u2277', '\u2277', '\u2273', '\u2273', '\u21d4', '\u21d4', '\u200a', '\u200a', '\u00bd', '\u00bd', '\u210b', '\u210b', 8611 '\u044a', '\u044a', '\u2194', '\u2194', '\u2948', '\u2948', '\u21ad', '\u21ad', '\u210f', '\u210f', '\u0125', '\u0125', '\u2665', '\u2665', '\u2665', '\u2665', '\u2026', '\u2026', '\u22b9', '\u22b9', '\U0001d525', '\U0001d525', '\u2925', '\u2925', '\u2926', '\u2926', '\u21ff', '\u21ff', '\u223b', '\u223b', '\u21a9', '\u21a9', 8612 '\u21aa', '\u21aa', '\U0001d559', '\U0001d559', '\u2015', '\u2015', '\U0001d4bd', '\U0001d4bd', '\u210f', '\u210f', '\u0127', '\u0127', '\u2043', '\u2043', '\u2010', '\u2010', '\u00ed', '\u00ed', '\u2063', '\u2063', '\u00ee', '\u00ee', '\u0438', '\u0438', '\u0435', '\u0435', '\u00a1', '\u00a1', '\u21d4', '\u21d4', '\U0001d526', '\U0001d526', '\u00ec', '\u00ec', '\u2148', 8613 '\u2148', '\u2a0c', '\u2a0c', '\u222d', '\u222d', '\u29dc', '\u29dc', '\u2129', '\u2129', '\u0133', '\u0133', '\u012b', '\u012b', '\u2111', '\u2111', '\u2110', '\u2110', '\u2111', '\u2111', '\u0131', '\u0131', '\u22b7', '\u22b7', '\u01b5', '\u01b5', '\u2208', '\u2208', '\u2105', '\u2105', '\u221e', '\u221e', '\u29dd', '\u29dd', '\u0131', 8614 '\u0131', '\u222b', '\u222b', '\u22ba', '\u22ba', '\u2124', '\u2124', '\u22ba', '\u22ba', '\u2a17', '\u2a17', '\u2a3c', '\u2a3c', '\u0451', '\u0451', '\u012f', '\u012f', '\U0001d55a', '\U0001d55a', '\u03b9', '\u03b9', '\u2a3c', '\u2a3c', '\u00bf', '\u00bf', '\U0001d4be', '\U0001d4be', '\u2208', '\u2208', '\u22f9', '\u22f9', '\u22f5', '\u22f5', '\u22f4', 8615 '\u22f4', '\u22f3', '\u22f3', '\u2208', '\u2208', '\u2062', '\u2062', '\u0129', '\u0129', '\u0456', '\u0456', '\u00ef', '\u00ef', '\u0135', '\u0135', '\u0439', '\u0439', '\U0001d527', '\U0001d527', '\u0237', '\u0237', '\U0001d55b', '\U0001d55b', '\U0001d4bf', '\U0001d4bf', '\u0458', '\u0458', '\u0454', '\u0454', '\u03ba', '\u03ba', '\u03f0', '\u03f0', '\u0137', '\u0137', '\u043a', '\u043a', '\U0001d528', 8616 '\U0001d528', '\u0138', '\u0138', '\u0445', '\u0445', '\u045c', '\u045c', '\U0001d55c', '\U0001d55c', '\U0001d4c0', '\U0001d4c0', '\u21da', '\u21da', '\u21d0', '\u21d0', '\u291b', '\u291b', '\u290e', '\u290e', '\u2266', '\u2266', '\u2a8b', '\u2a8b', '\u2962', '\u2962', '\u013a', '\u013a', '\u29b4', '\u29b4', '\u2112', '\u2112', '\u03bb', '\u03bb', '\u27e8', '\u27e8', '\u2991', '\u2991', 8617 '\u27e8', '\u27e8', '\u2a85', '\u2a85', '\u00ab', '\u00ab', '\u2190', '\u2190', '\u21e4', '\u21e4', '\u291f', '\u291f', '\u291d', '\u291d', '\u21a9', '\u21a9', '\u21ab', '\u21ab', '\u2939', '\u2939', '\u2973', '\u2973', '\u21a2', '\u21a2', '\u2aab', '\u2aab', '\u2919', '\u2919', '\u2aad', '\u2aad', '\u290c', '\u290c', '\u2772', '\u2772', '\u007b', 8618 '\u007b', '\u005b', '\u005b', '\u298b', '\u298b', '\u298f', '\u298f', '\u298d', '\u298d', '\u013e', '\u013e', '\u013c', '\u013c', '\u2308', '\u2308', '\u007b', '\u007b', '\u043b', '\u043b', '\u2936', '\u2936', '\u201c', '\u201c', '\u201e', '\u201e', '\u2967', '\u2967', '\u294b', '\u294b', '\u21b2', '\u21b2', '\u2264', '\u2264', '\u2190', 8619 '\u2190', '\u21a2', '\u21a2', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u21c7', '\u21c7', '\u2194', '\u2194', '\u21c6', '\u21c6', '\u21cb', '\u21cb', '\u21ad', '\u21ad', '\u22cb', 8620 '\u22cb', '\u22da', '\u22da', '\u2264', '\u2264', '\u2266', '\u2266', '\u2a7d', '\u2a7d', '\u2a7d', '\u2a7d', '\u2aa8', '\u2aa8', '\u2a7f', '\u2a7f', '\u2a81', '\u2a81', '\u2a83', '\u2a83', '\u2a93', '\u2a93', '\u2a85', '\u2a85', '\u22d6', '\u22d6', '\u22da', '\u22da', '\u2a8b', '\u2a8b', '\u2276', '\u2276', 8621 '\u2272', '\u2272', '\u297c', '\u297c', '\u230a', '\u230a', '\U0001d529', '\U0001d529', '\u2276', '\u2276', '\u2a91', '\u2a91', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u296a', '\u296a', '\u2584', '\u2584', '\u0459', '\u0459', '\u226a', '\u226a', '\u21c7', '\u21c7', '\u231e', '\u231e', '\u296b', '\u296b', '\u25fa', '\u25fa', '\u0140', '\u0140', '\u23b0', '\u23b0', 8622 '\u23b0', '\u23b0', '\u2268', '\u2268', '\u2a89', '\u2a89', '\u2a89', '\u2a89', '\u2a87', '\u2a87', '\u2a87', '\u2a87', '\u2268', '\u2268', '\u22e6', '\u22e6', '\u27ec', '\u27ec', '\u21fd', '\u21fd', '\u27e6', '\u27e6', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27fc', '\u27fc', '\u27f6', 8623 '\u27f6', '\u21ab', '\u21ab', '\u21ac', '\u21ac', '\u2985', '\u2985', '\U0001d55d', '\U0001d55d', '\u2a2d', '\u2a2d', '\u2a34', '\u2a34', '\u2217', '\u2217', '\u005f', '\u005f', '\u25ca', '\u25ca', '\u25ca', '\u25ca', '\u29eb', '\u29eb', '\u0028', '\u0028', '\u2993', '\u2993', '\u21c6', '\u21c6', '\u231f', 8624 '\u231f', '\u21cb', '\u21cb', '\u296d', '\u296d', '\u200e', '\u200e', '\u22bf', '\u22bf', '\u2039', '\u2039', '\U0001d4c1', '\U0001d4c1', '\u21b0', '\u21b0', '\u2272', '\u2272', '\u2a8d', '\u2a8d', '\u2a8f', '\u2a8f', '\u005b', '\u005b', '\u2018', '\u2018', '\u201a', '\u201a', '\u0142', '\u0142', '\u2aa6', '\u2aa6', '\u2a79', '\u2a79', '\u22d6', '\u22d6', '\u22cb', 8625 '\u22cb', '\u22c9', '\u22c9', '\u2976', '\u2976', '\u2a7b', '\u2a7b', '\u2996', '\u2996', '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u25c2', '\u25c2', '\u294a', '\u294a', '\u2966', '\u2966', '\u223a', '\u223a', '\u00af', '\u00af', '\u2642', '\u2642', '\u2720', '\u2720', '\u2720', '\u2720', '\u21a6', '\u21a6', '\u21a6', '\u21a6', '\u21a7', 8626 '\u21a7', '\u21a4', '\u21a4', '\u21a5', '\u21a5', '\u25ae', '\u25ae', '\u2a29', '\u2a29', '\u043c', '\u043c', '\u2014', '\u2014', '\u2221', '\u2221', '\U0001d52a', '\U0001d52a', '\u2127', '\u2127', '\u00b5', '\u00b5', '\u2223', '\u2223', '\u002a', '\u002a', '\u2af0', '\u2af0', '\u00b7', '\u00b7', '\u2212', '\u2212', '\u229f', 8627 '\u229f', '\u2238', '\u2238', '\u2a2a', '\u2a2a', '\u2adb', '\u2adb', '\u2026', '\u2026', '\u2213', '\u2213', '\u22a7', '\u22a7', '\U0001d55e', '\U0001d55e', '\u2213', '\u2213', '\U0001d4c2', '\U0001d4c2', '\u223e', '\u223e', '\u03bc', '\u03bc', '\u22b8', '\u22b8', '\u22b8', '\u22b8', '\u21cd', '\u21cd', '\u21ce', '\u21ce', '\u21cf', 8628 '\u21cf', '\u22af', '\u22af', '\u22ae', '\u22ae', '\u2207', '\u2207', '\u0144', '\u0144', '\u2249', '\u2249', '\u0149', '\u0149', '\u2249', '\u2249', '\u266e', '\u266e', '\u266e', '\u266e', '\u2115', '\u2115', '\u00a0', '\u00a0', '\u2a43', '\u2a43', '\u0148', '\u0148', '\u0146', '\u0146', '\u2247', '\u2247', '\u2a42', '\u2a42', '\u043d', 8629 '\u043d', '\u2013', '\u2013', '\u2260', '\u2260', '\u21d7', '\u21d7', '\u2924', '\u2924', '\u2197', '\u2197', '\u2197', '\u2197', '\u2262', '\u2262', '\u2928', '\u2928', '\u2204', '\u2204', '\u2204', '\u2204', '\U0001d52b', '\U0001d52b', '\u2271', '\u2271', '\u2271', '\u2271', '\u2275', '\u2275', '\u226f', '\u226f', '\u226f', '\u226f', '\u21ce', '\u21ce', '\u21ae', '\u21ae', 8630 '\u2af2', '\u2af2', '\u220b', '\u220b', '\u22fc', '\u22fc', '\u22fa', '\u22fa', '\u220b', '\u220b', '\u045a', '\u045a', '\u21cd', '\u21cd', '\u219a', '\u219a', '\u2025', '\u2025', '\u2270', '\u2270', '\u219a', '\u219a', '\u21ae', '\u21ae', '\u2270', '\u2270', '\u226e', '\u226e', '\u2274', '\u2274', '\u226e', '\u226e', '\u22ea', '\u22ea', '\u22ec', '\u22ec', 8631 '\u2224', '\u2224', '\U0001d55f', '\U0001d55f', '\u00ac', '\u00ac', '\u2209', '\u2209', '\u2209', '\u2209', '\u22f7', '\u22f7', '\u22f6', '\u22f6', '\u220c', '\u220c', '\u220c', '\u220c', '\u22fe', '\u22fe', '\u22fd', '\u22fd', '\u2226', '\u2226', '\u2226', '\u2226', '\u2a14', '\u2a14', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u2280', 8632 '\u2280', '\u21cf', '\u21cf', '\u219b', '\u219b', '\u219b', '\u219b', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u2281', '\u2281', '\u22e1', '\u22e1', '\U0001d4c3', '\U0001d4c3', '\u2224', '\u2224', '\u2226', '\u2226', '\u2241', '\u2241', '\u2244', '\u2244', '\u2244', '\u2244', '\u2224', '\u2224', '\u2226', '\u2226', '\u22e2', 8633 '\u22e2', '\u22e3', '\u22e3', '\u2284', '\u2284', '\u2288', '\u2288', '\u2288', '\u2288', '\u2281', '\u2281', '\u2285', '\u2285', '\u2289', '\u2289', '\u2289', '\u2289', '\u2279', '\u2279', '\u00f1', '\u00f1', '\u2278', '\u2278', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u22eb', '\u22eb', 8634 '\u22ed', '\u22ed', '\u03bd', '\u03bd', '\u0023', '\u0023', '\u2116', '\u2116', '\u2007', '\u2007', '\u22ad', '\u22ad', '\u2904', '\u2904', '\u22ac', '\u22ac', '\u29de', '\u29de', '\u2902', '\u2902', '\u2903', '\u2903', '\u21d6', '\u21d6', '\u2923', '\u2923', '\u2196', '\u2196', '\u2196', '\u2196', '\u2927', '\u2927', 8635 '\u24c8', '\u24c8', '\u00f3', '\u00f3', '\u229b', '\u229b', '\u229a', '\u229a', '\u00f4', '\u00f4', '\u043e', '\u043e', '\u229d', '\u229d', '\u0151', '\u0151', '\u2a38', '\u2a38', '\u2299', '\u2299', '\u29bc', '\u29bc', '\u0153', '\u0153', '\u29bf', '\u29bf', '\U0001d52c', '\U0001d52c', '\u02db', '\u02db', '\u00f2', '\u00f2', '\u29c1', '\u29c1', '\u29b5', '\u29b5', '\u03a9', '\u03a9', '\u222e', 8636 '\u222e', '\u21ba', '\u21ba', '\u29be', '\u29be', '\u29bb', '\u29bb', '\u203e', '\u203e', '\u29c0', '\u29c0', '\u014d', '\u014d', '\u03c9', '\u03c9', '\u03bf', '\u03bf', '\u29b6', '\u29b6', '\u2296', '\u2296', '\U0001d560', '\U0001d560', '\u29b7', '\u29b7', '\u29b9', '\u29b9', '\u2295', '\u2295', '\u2228', '\u2228', '\u21bb', '\u21bb', '\u2a5d', '\u2a5d', '\u2134', '\u2134', 8637 '\u2134', '\u2134', '\u00aa', '\u00aa', '\u00ba', '\u00ba', '\u22b6', '\u22b6', '\u2a56', '\u2a56', '\u2a57', '\u2a57', '\u2a5b', '\u2a5b', '\u2134', '\u2134', '\u00f8', '\u00f8', '\u2298', '\u2298', '\u00f5', '\u00f5', '\u2297', '\u2297', '\u2a36', '\u2a36', '\u00f6', '\u00f6', '\u233d', '\u233d', '\u2225', '\u2225', '\u00b6', '\u00b6', '\u2225', '\u2225', 8638 '\u2af3', '\u2af3', '\u2afd', '\u2afd', '\u2202', '\u2202', '\u043f', '\u043f', '\u0025', '\u0025', '\u002e', '\u002e', '\u2030', '\u2030', '\u22a5', '\u22a5', '\u2031', '\u2031', '\U0001d52d', '\U0001d52d', '\u03c6', '\u03c6', '\u03d5', '\u03d5', '\u2133', '\u2133', '\u260e', '\u260e', '\u03c0', '\u03c0', '\u22d4', '\u22d4', '\u03d6', '\u03d6', '\u210f', '\u210f', 8639 '\u210e', '\u210e', '\u210f', '\u210f', '\u002b', '\u002b', '\u2a23', '\u2a23', '\u229e', '\u229e', '\u2a22', '\u2a22', '\u2214', '\u2214', '\u2a25', '\u2a25', '\u2a72', '\u2a72', '\u00b1', '\u00b1', '\u2a26', '\u2a26', '\u2a27', '\u2a27', '\u00b1', '\u00b1', '\u2a15', '\u2a15', '\U0001d561', '\U0001d561', '\u00a3', '\u00a3', '\u227a', 8640 '\u227a', '\u2ab3', '\u2ab3', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u227a', '\u227a', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u2ab9', '\u2ab9', '\u2ab5', '\u2ab5', '\u22e8', '\u22e8', '\u227e', '\u227e', '\u2032', '\u2032', '\u2119', '\u2119', '\u2ab5', '\u2ab5', '\u2ab9', 8641 '\u2ab9', '\u22e8', '\u22e8', '\u220f', '\u220f', '\u232e', '\u232e', '\u2312', '\u2312', '\u2313', '\u2313', '\u221d', '\u221d', '\u221d', '\u221d', '\u227e', '\u227e', '\u22b0', '\u22b0', '\U0001d4c5', '\U0001d4c5', '\u03c8', '\u03c8', '\u2008', '\u2008', '\U0001d52e', '\U0001d52e', '\u2a0c', '\u2a0c', '\U0001d562', '\U0001d562', '\u2057', '\u2057', '\U0001d4c6', '\U0001d4c6', 8642 '\u210d', '\u210d', '\u2a16', '\u2a16', '\u003f', '\u003f', '\u225f', '\u225f', '\u21db', '\u21db', '\u21d2', '\u21d2', '\u291c', '\u291c', '\u290f', '\u290f', '\u2964', '\u2964', '\u0155', '\u0155', '\u221a', '\u221a', '\u29b3', '\u29b3', '\u27e9', '\u27e9', '\u2992', '\u2992', '\u29a5', '\u29a5', '\u27e9', '\u27e9', '\u00bb', 8643 '\u00bb', '\u2192', '\u2192', '\u2975', '\u2975', '\u21e5', '\u21e5', '\u2920', '\u2920', '\u2933', '\u2933', '\u291e', '\u291e', '\u21aa', '\u21aa', '\u21ac', '\u21ac', '\u2945', '\u2945', '\u2974', '\u2974', '\u21a3', '\u21a3', '\u219d', '\u219d', '\u291a', '\u291a', '\u2236', '\u2236', '\u211a', '\u211a', '\u290d', '\u290d', 8644 '\u2773', '\u2773', '\u007d', '\u007d', '\u005d', '\u005d', '\u298c', '\u298c', '\u298e', '\u298e', '\u2990', '\u2990', '\u0159', '\u0159', '\u0157', '\u0157', '\u2309', '\u2309', '\u007d', '\u007d', '\u0440', '\u0440', '\u2937', '\u2937', '\u2969', '\u2969', '\u201d', '\u201d', '\u201d', '\u201d', '\u21b3', '\u21b3', '\u211c', '\u211c', '\u211b', 8645 '\u211b', '\u211c', '\u211c', '\u211d', '\u211d', '\u25ad', '\u25ad', '\u00ae', '\u00ae', '\u297d', '\u297d', '\u230b', '\u230b', '\U0001d52f', '\U0001d52f', '\u21c1', '\u21c1', '\u21c0', '\u21c0', '\u296c', '\u296c', '\u03c1', '\u03c1', '\u03f1', '\u03f1', '\u2192', '\u2192', '\u21a3', '\u21a3', '\u21c1', '\u21c1', 8646 '\u21c0', '\u21c0', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u21c9', '\u21c9', '\u219d', '\u219d', '\u22cc', '\u22cc', '\u02da', '\u02da', '\u2253', '\u2253', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u200f', 8647 '\u200f', '\u23b1', '\u23b1', '\u23b1', '\u23b1', '\u2aee', '\u2aee', '\u27ed', '\u27ed', '\u21fe', '\u21fe', '\u27e7', '\u27e7', '\u2986', '\u2986', '\U0001d563', '\U0001d563', '\u2a2e', '\u2a2e', '\u2a35', '\u2a35', '\u0029', '\u0029', '\u2994', '\u2994', '\u2a12', '\u2a12', '\u21c9', '\u21c9', '\u203a', '\u203a', '\U0001d4c7', '\U0001d4c7', '\u21b1', 8648 '\u21b1', '\u005d', '\u005d', '\u2019', '\u2019', '\u2019', '\u2019', '\u22cc', '\u22cc', '\u22ca', '\u22ca', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25b8', '\u25b8', '\u29ce', '\u29ce', '\u2968', '\u2968', '\u211e', '\u211e', '\u015b', '\u015b', '\u201a', '\u201a', '\u227b', '\u227b', '\u2ab4', '\u2ab4', '\u2ab8', '\u2ab8', '\u0161', '\u0161', '\u227d', 8649 '\u227d', '\u2ab0', '\u2ab0', '\u015f', '\u015f', '\u015d', '\u015d', '\u2ab6', '\u2ab6', '\u2aba', '\u2aba', '\u22e9', '\u22e9', '\u2a13', '\u2a13', '\u227f', '\u227f', '\u0441', '\u0441', '\u22c5', '\u22c5', '\u22a1', '\u22a1', '\u2a66', '\u2a66', '\u21d8', '\u21d8', '\u2925', '\u2925', '\u2198', '\u2198', '\u2198', '\u2198', '\u00a7', '\u00a7', '\u003b', 8650 '\u003b', '\u2929', '\u2929', '\u2216', '\u2216', '\u2216', '\u2216', '\u2736', '\u2736', '\U0001d530', '\U0001d530', '\u2322', '\u2322', '\u266f', '\u266f', '\u0449', '\u0449', '\u0448', '\u0448', '\u2223', '\u2223', '\u2225', '\u2225', '\u00ad', '\u00ad', '\u03c3', '\u03c3', '\u03c2', '\u03c2', '\u03c2', '\u03c2', '\u223c', '\u223c', '\u2a6a', 8651 '\u2a6a', '\u2243', '\u2243', '\u2243', '\u2243', '\u2a9e', '\u2a9e', '\u2aa0', '\u2aa0', '\u2a9d', '\u2a9d', '\u2a9f', '\u2a9f', '\u2246', '\u2246', '\u2a24', '\u2a24', '\u2972', '\u2972', '\u2190', '\u2190', '\u2216', '\u2216', '\u2a33', '\u2a33', '\u29e4', '\u29e4', '\u2223', '\u2223', '\u2323', '\u2323', '\u2aaa', '\u2aaa', '\u2aac', 8652 '\u2aac', '\u044c', '\u044c', '\u002f', '\u002f', '\u29c4', '\u29c4', '\u233f', '\u233f', '\U0001d564', '\U0001d564', '\u2660', '\u2660', '\u2660', '\u2660', '\u2225', '\u2225', '\u2293', '\u2293', '\u2294', '\u2294', '\u228f', '\u228f', '\u2291', '\u2291', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', '\u2292', '\u2292', 8653 '\u2290', '\u2290', '\u2292', '\u2292', '\u25a1', '\u25a1', '\u25a1', '\u25a1', '\u25aa', '\u25aa', '\u25aa', '\u25aa', '\u2192', '\u2192', '\U0001d4c8', '\U0001d4c8', '\u2216', '\u2216', '\u2323', '\u2323', '\u22c6', '\u22c6', '\u2606', '\u2606', '\u2605', '\u2605', '\u03f5', '\u03f5', '\u03d5', '\u03d5', '\u00af', 8654 '\u00af', '\u2282', '\u2282', '\u2ac5', '\u2ac5', '\u2abd', '\u2abd', '\u2286', '\u2286', '\u2ac3', '\u2ac3', '\u2ac1', '\u2ac1', '\u2acb', '\u2acb', '\u228a', '\u228a', '\u2abf', '\u2abf', '\u2979', '\u2979', '\u2282', '\u2282', '\u2286', '\u2286', '\u2ac5', '\u2ac5', '\u228a', '\u228a', '\u2acb', '\u2acb', 8655 '\u2ac7', '\u2ac7', '\u2ad5', '\u2ad5', '\u2ad3', '\u2ad3', '\u227b', '\u227b', '\u2ab8', '\u2ab8', '\u227d', '\u227d', '\u2ab0', '\u2ab0', '\u2aba', '\u2aba', '\u2ab6', '\u2ab6', '\u22e9', '\u22e9', '\u227f', '\u227f', '\u2211', '\u2211', '\u266a', '\u266a', '\u2283', '\u2283', '\u00b9', '\u00b9', '\u00b2', 8656 '\u00b2', '\u00b3', '\u00b3', '\u2ac6', '\u2ac6', '\u2abe', '\u2abe', '\u2ad8', '\u2ad8', '\u2287', '\u2287', '\u2ac4', '\u2ac4', '\u27c9', '\u27c9', '\u2ad7', '\u2ad7', '\u297b', '\u297b', '\u2ac2', '\u2ac2', '\u2acc', '\u2acc', '\u228b', '\u228b', '\u2ac0', '\u2ac0', '\u2283', '\u2283', '\u2287', '\u2287', '\u2ac6', 8657 '\u2ac6', '\u228b', '\u228b', '\u2acc', '\u2acc', '\u2ac8', '\u2ac8', '\u2ad4', '\u2ad4', '\u2ad6', '\u2ad6', '\u21d9', '\u21d9', '\u2926', '\u2926', '\u2199', '\u2199', '\u2199', '\u2199', '\u292a', '\u292a', '\u00df', '\u00df', '\u2316', '\u2316', '\u03c4', '\u03c4', '\u23b4', '\u23b4', '\u0165', '\u0165', '\u0163', 8658 '\u0163', '\u0442', '\u0442', '\u20db', '\u20db', '\u2315', '\u2315', '\U0001d531', '\U0001d531', '\u2234', '\u2234', '\u2234', '\u2234', '\u03b8', '\u03b8', '\u03d1', '\u03d1', '\u03d1', '\u03d1', '\u2248', '\u2248', '\u223c', '\u223c', '\u2009', '\u2009', '\u2248', '\u2248', '\u223c', '\u223c', '\u00fe', '\u00fe', '\u02dc', 8659 '\u02dc', '\u00d7', '\u00d7', '\u22a0', '\u22a0', '\u2a31', '\u2a31', '\u2a30', '\u2a30', '\u222d', '\u222d', '\u2928', '\u2928', '\u22a4', '\u22a4', '\u2336', '\u2336', '\u2af1', '\u2af1', '\U0001d565', '\U0001d565', '\u2ada', '\u2ada', '\u2929', '\u2929', '\u2034', '\u2034', '\u2122', '\u2122', '\u25b5', '\u25b5', '\u25bf', '\u25bf', 8660 '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u225c', '\u225c', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25ec', '\u25ec', '\u225c', '\u225c', '\u2a3a', '\u2a3a', '\u2a39', '\u2a39', '\u29cd', '\u29cd', '\u2a3b', '\u2a3b', '\u23e2', '\u23e2', '\U0001d4c9', 8661 '\U0001d4c9', '\u0446', '\u0446', '\u045b', '\u045b', '\u0167', '\u0167', '\u226c', '\u226c', '\u219e', '\u219e', '\u21a0', '\u21a0', '\u21d1', '\u21d1', '\u2963', '\u2963', '\u00fa', '\u00fa', '\u2191', '\u2191', '\u045e', '\u045e', '\u016d', '\u016d', '\u00fb', '\u00fb', '\u0443', '\u0443', '\u21c5', '\u21c5', '\u0171', 8662 '\u0171', '\u296e', '\u296e', '\u297e', '\u297e', '\U0001d532', '\U0001d532', '\u00f9', '\u00f9', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u2580', '\u2580', '\u231c', '\u231c', '\u231c', '\u231c', '\u230f', '\u230f', '\u25f8', '\u25f8', '\u016b', '\u016b', '\u00a8', '\u00a8', '\u0173', '\u0173', '\U0001d566', '\U0001d566', '\u2191', '\u2191', '\u2195', 8663 '\u2195', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u228e', '\u228e', '\u03c5', '\u03c5', '\u03d2', '\u03d2', '\u03c5', '\u03c5', '\u21c8', '\u21c8', '\u231d', '\u231d', '\u231d', '\u231d', '\u230e', '\u230e', '\u016f', '\u016f', '\u25f9', '\u25f9', '\U0001d4ca', '\U0001d4ca', '\u22f0', '\u22f0', 8664 '\u0169', '\u0169', '\u25b5', '\u25b5', '\u25b4', '\u25b4', '\u21c8', '\u21c8', '\u00fc', '\u00fc', '\u29a7', '\u29a7', '\u21d5', '\u21d5', '\u2ae8', '\u2ae8', '\u2ae9', '\u2ae9', '\u22a8', '\u22a8', '\u299c', '\u299c', '\u03f5', '\u03f5', '\u03f0', '\u03f0', '\u2205', '\u2205', '\u03d5', '\u03d5', '\u03d6', '\u03d6', '\u221d', 8665 '\u221d', '\u2195', '\u2195', '\u03f1', '\u03f1', '\u03c2', '\u03c2', '\u03d1', '\u03d1', '\u22b2', '\u22b2', '\u22b3', '\u22b3', '\u0432', '\u0432', '\u22a2', '\u22a2', '\u2228', '\u2228', '\u22bb', '\u22bb', '\u225a', '\u225a', '\u22ee', '\u22ee', '\u007c', '\u007c', '\u007c', '\u007c', '\U0001d533', 8666 '\U0001d533', '\u22b2', '\u22b2', '\U0001d567', '\U0001d567', '\u221d', '\u221d', '\u22b3', '\u22b3', '\U0001d4cb', '\U0001d4cb', '\u299a', '\u299a', '\u0175', '\u0175', '\u2a5f', '\u2a5f', '\u2227', '\u2227', '\u2259', '\u2259', '\u2118', '\u2118', '\U0001d534', '\U0001d534', '\U0001d568', '\U0001d568', '\u2118', '\u2118', '\u2240', '\u2240', '\u2240', '\u2240', '\U0001d4cc', '\U0001d4cc', '\u22c2', '\u22c2', '\u25ef', 8667 '\u25ef', '\u22c3', '\u22c3', '\u25bd', '\u25bd', '\U0001d535', '\U0001d535', '\u27fa', '\u27fa', '\u27f7', '\u27f7', '\u03be', '\u03be', '\u27f8', '\u27f8', '\u27f5', '\u27f5', '\u27fc', '\u27fc', '\u22fb', '\u22fb', '\u2a00', '\u2a00', '\U0001d569', '\U0001d569', '\u2a01', '\u2a01', '\u2a02', '\u2a02', '\u27f9', '\u27f9', '\u27f6', '\u27f6', '\U0001d4cd', '\U0001d4cd', '\u2a06', '\u2a06', '\u2a04', 8668 '\u2a04', '\u25b3', '\u25b3', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u00fd', '\u00fd', '\u044f', '\u044f', '\u0177', '\u0177', '\u044b', '\u044b', '\u00a5', '\u00a5', '\U0001d536', '\U0001d536', '\u0457', '\u0457', '\U0001d56a', '\U0001d56a', '\U0001d4ce', '\U0001d4ce', '\u044e', '\u044e', '\u00ff', '\u00ff', '\u017a', '\u017a', '\u017e', '\u017e', '\u0437', '\u0437', '\u017c', '\u017c', '\u2128', 8669 '\u2128', '\u03b6', '\u03b6', '\U0001d537', '\U0001d537', '\u0436', '\u0436', '\u21dd', '\u21dd', '\U0001d56b', '\U0001d56b', '\U0001d4cf', '\U0001d4cf', '\u200d', '\u200d', '\u200c', '\u200c', ]; 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 // dom event support, if you want to use it 8694 8695 /// used for DOM events 8696 version(dom_with_events) 8697 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8698 8699 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8700 version(dom_with_events) 8701 class Event { 8702 this(string eventName, Element target) { 8703 this.eventName = eventName; 8704 this.srcElement = target; 8705 } 8706 8707 /// Prevents the default event handler (if there is one) from being called 8708 void preventDefault() { 8709 defaultPrevented = true; 8710 } 8711 8712 /// Stops the event propagation immediately. 8713 void stopPropagation() { 8714 propagationStopped = true; 8715 } 8716 8717 bool defaultPrevented; 8718 bool propagationStopped; 8719 string eventName; 8720 8721 Element srcElement; 8722 alias srcElement target; 8723 8724 Element relatedTarget; 8725 8726 int clientX; 8727 int clientY; 8728 8729 int button; 8730 8731 bool isBubbling; 8732 8733 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8734 void send() { 8735 if(srcElement is null) 8736 return; 8737 8738 auto e = srcElement; 8739 8740 if(eventName in e.bubblingEventHandlers) 8741 foreach(handler; e.bubblingEventHandlers[eventName]) 8742 handler(e, this); 8743 8744 if(!defaultPrevented) 8745 if(eventName in e.defaultEventHandlers) 8746 e.defaultEventHandlers[eventName](e, this); 8747 } 8748 8749 /// this dispatches the element using the capture -> target -> bubble process 8750 void dispatch() { 8751 if(srcElement is null) 8752 return; 8753 8754 // first capture, then bubble 8755 8756 Element[] chain; 8757 Element curr = srcElement; 8758 while(curr) { 8759 auto l = curr; 8760 chain ~= l; 8761 curr = curr.parentNode; 8762 8763 } 8764 8765 isBubbling = false; 8766 8767 foreach(e; chain.retro()) { 8768 if(eventName in e.capturingEventHandlers) 8769 foreach(handler; e.capturingEventHandlers[eventName]) 8770 handler(e, this); 8771 8772 // the default on capture should really be to always do nothing 8773 8774 //if(!defaultPrevented) 8775 // if(eventName in e.defaultEventHandlers) 8776 // e.defaultEventHandlers[eventName](e.element, this); 8777 8778 if(propagationStopped) 8779 break; 8780 } 8781 8782 isBubbling = true; 8783 if(!propagationStopped) 8784 foreach(e; chain) { 8785 if(eventName in e.bubblingEventHandlers) 8786 foreach(handler; e.bubblingEventHandlers[eventName]) 8787 handler(e, this); 8788 8789 if(propagationStopped) 8790 break; 8791 } 8792 8793 if(!defaultPrevented) 8794 foreach(e; chain) { 8795 if(eventName in e.defaultEventHandlers) 8796 e.defaultEventHandlers[eventName](e, this); 8797 } 8798 } 8799 } 8800 8801 struct FormFieldOptions { 8802 // usable for any 8803 8804 /// this is a regex pattern used to validate the field 8805 string pattern; 8806 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8807 bool isRequired; 8808 /// this is displayed as an example to the user 8809 string placeholder; 8810 8811 // usable for numeric ones 8812 8813 8814 // convenience methods to quickly get some options 8815 @property static FormFieldOptions none() { 8816 FormFieldOptions f; 8817 return f; 8818 } 8819 8820 static FormFieldOptions required() { 8821 FormFieldOptions f; 8822 f.isRequired = true; 8823 return f; 8824 } 8825 8826 static FormFieldOptions regex(string pattern, bool required = false) { 8827 FormFieldOptions f; 8828 f.pattern = pattern; 8829 f.isRequired = required; 8830 return f; 8831 } 8832 8833 static FormFieldOptions fromElement(Element e) { 8834 FormFieldOptions f; 8835 if(e.hasAttribute("required")) 8836 f.isRequired = true; 8837 if(e.hasAttribute("pattern")) 8838 f.pattern = e.pattern; 8839 if(e.hasAttribute("placeholder")) 8840 f.placeholder = e.placeholder; 8841 return f; 8842 } 8843 8844 Element applyToElement(Element e) { 8845 if(this.isRequired) 8846 e.required = "required"; 8847 if(this.pattern.length) 8848 e.pattern = this.pattern; 8849 if(this.placeholder.length) 8850 e.placeholder = this.placeholder; 8851 return e; 8852 } 8853 } 8854 8855 // this needs to look just like a string, but can expand as needed 8856 version(no_dom_stream) 8857 alias string Utf8Stream; 8858 else 8859 class Utf8Stream { 8860 protected: 8861 // these two should be overridden in subclasses to actually do the stream magic 8862 string getMore() { 8863 if(getMoreHelper !is null) 8864 return getMoreHelper(); 8865 return null; 8866 } 8867 8868 bool hasMore() { 8869 if(hasMoreHelper !is null) 8870 return hasMoreHelper(); 8871 return false; 8872 } 8873 // the rest should be ok 8874 8875 public: 8876 this(string d) { 8877 this.data = d; 8878 } 8879 8880 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8881 this.getMoreHelper = getMoreHelper; 8882 this.hasMoreHelper = hasMoreHelper; 8883 8884 if(hasMore()) 8885 this.data ~= getMore(); 8886 8887 // stdout.flush(); 8888 } 8889 8890 enum contextToKeep = 100; 8891 8892 void markDataDiscardable(size_t p) { 8893 8894 if(p < contextToKeep) 8895 return; 8896 p -= contextToKeep; 8897 8898 // pretends data[0 .. p] is gone and adjusts future things as if it was still there 8899 startingLineNumber = getLineNumber(p); 8900 assert(p >= virtualStartIndex); 8901 data = data[p - virtualStartIndex .. $]; 8902 virtualStartIndex = p; 8903 } 8904 8905 int getLineNumber(size_t p) { 8906 int line = startingLineNumber; 8907 assert(p >= virtualStartIndex); 8908 foreach(c; data[0 .. p - virtualStartIndex]) 8909 if(c == '\n') 8910 line++; 8911 return line; 8912 } 8913 8914 8915 @property final size_t length() { 8916 // the parser checks length primarily directly before accessing the next character 8917 // so this is the place we'll hook to append more if possible and needed. 8918 if(lastIdx + 1 >= (data.length + virtualStartIndex) && hasMore()) { 8919 data ~= getMore(); 8920 } 8921 return data.length + virtualStartIndex; 8922 } 8923 8924 final char opIndex(size_t idx) { 8925 if(idx > lastIdx) 8926 lastIdx = idx; 8927 return data[idx - virtualStartIndex]; 8928 } 8929 8930 final string opSlice(size_t start, size_t end) { 8931 if(end > lastIdx) 8932 lastIdx = end; 8933 // writeln(virtualStartIndex, " " , start, " ", end); 8934 assert(start >= virtualStartIndex); 8935 assert(end >= virtualStartIndex); 8936 return data[start - virtualStartIndex .. end - virtualStartIndex]; 8937 } 8938 8939 final size_t opDollar() { 8940 return length(); 8941 } 8942 8943 final Utf8Stream opBinary(string op : "~")(string s) { 8944 this.data ~= s; 8945 return this; 8946 } 8947 8948 final Utf8Stream opOpAssign(string op : "~")(string s) { 8949 this.data ~= s; 8950 return this; 8951 } 8952 8953 final Utf8Stream opAssign(string rhs) { 8954 this.data = rhs; 8955 return this; 8956 } 8957 private: 8958 string data; 8959 8960 size_t lastIdx; 8961 8962 bool delegate() hasMoreHelper; 8963 string delegate() getMoreHelper; 8964 8965 int startingLineNumber = 1; 8966 size_t virtualStartIndex = 0; 8967 8968 8969 /+ 8970 // used to maybe clear some old stuff 8971 // you might have to remove elements parsed with it too since they can hold slices into the 8972 // old stuff, preventing gc 8973 void dropFront(int bytes) { 8974 posAdjustment += bytes; 8975 data = data[bytes .. $]; 8976 } 8977 8978 int posAdjustment; 8979 +/ 8980 } 8981 8982 void fillForm(T)(Form form, T obj, string name) { 8983 import arsd.database; 8984 fillData((k, v) => form.setValue(k, v), obj, name); 8985 } 8986 8987 /++ 8988 Normalizes the whitespace in the given text according to HTML rules. 8989 8990 History: 8991 Added March 25, 2022 (dub v10.8) 8992 8993 The `stripLeadingAndTrailing` argument was added September 13, 2024 (dub v11.6). 8994 +/ 8995 string normalizeWhitespace(string text, bool stripLeadingAndTrailing = true) { 8996 string ret; 8997 ret.reserve(text.length); 8998 bool lastWasWhite = stripLeadingAndTrailing; 8999 foreach(char ch; text) { 9000 if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 9001 if(lastWasWhite) 9002 continue; 9003 lastWasWhite = true; 9004 ch = ' '; 9005 } else { 9006 lastWasWhite = false; 9007 } 9008 9009 ret ~= ch; 9010 } 9011 9012 if(stripLeadingAndTrailing) 9013 return ret.stripRight; 9014 else { 9015 /+ 9016 if(lastWasWhite && (ret.length == 0 || ret[$-1] != ' ')) 9017 ret ~= ' '; 9018 +/ 9019 return ret; 9020 } 9021 } 9022 9023 unittest { 9024 assert(normalizeWhitespace(" foo ") == "foo"); 9025 assert(normalizeWhitespace(" f\n \t oo ") == "f oo"); 9026 assert(normalizeWhitespace(" foo ", false) == " foo "); 9027 assert(normalizeWhitespace(" foo ", false) == " foo "); 9028 assert(normalizeWhitespace("\nfoo", false) == " foo"); 9029 } 9030 9031 unittest { 9032 Document document; 9033 9034 document = new Document("<test> foo \r </test>"); 9035 assert(document.root.visibleText == "foo"); 9036 9037 document = new Document("<test> foo \r <br>hi</test>"); 9038 assert(document.root.visibleText == "foo\nhi"); 9039 9040 document = new Document("<test> foo \r <br>hi<pre>hi\nthere\n indent<br />line</pre></test>"); 9041 assert(document.root.visibleText == "foo\nhihi\nthere\n indent\nline", document.root.visibleText); 9042 } 9043 9044 /+ 9045 /+ 9046 Syntax: 9047 9048 Tag: tagname#id.class 9049 Tree: Tag(Children, comma, separated...) 9050 Children: Tee or Variable 9051 Variable: $varname with optional |funcname following. 9052 9053 If a variable has a tree after it, it breaks the variable down: 9054 * if array, foreach it does the tree 9055 * if struct, it breaks down the member variables 9056 9057 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 9058 +/ 9059 struct Stringplate { 9060 /++ 9061 9062 +/ 9063 this(string s) { 9064 9065 } 9066 9067 /++ 9068 9069 +/ 9070 Element expand(T...)(T vars) { 9071 return null; 9072 } 9073 } 9074 /// 9075 unittest { 9076 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 9077 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 9078 } 9079 +/ 9080 9081 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 9082 foreach(child; children) { 9083 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 9084 // cool 9085 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 9086 // cool, this is an inline element and none of its children contradict that 9087 } else { 9088 // prolly block 9089 return false; 9090 } 9091 } 9092 return true; 9093 } 9094 9095 private bool isSimpleWhite(dchar c) { 9096 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 9097 } 9098 9099 unittest { 9100 // Test for issue #120 9101 string s = `<html> 9102 <body> 9103 <P>AN 9104 <P>bubbles</P> 9105 <P>giggles</P> 9106 </body> 9107 </html>`; 9108 auto doc = new Document(); 9109 doc.parseUtf8(s, false, false); 9110 auto s2 = doc.toString(); 9111 assert( 9112 s2.indexOf("bubbles") < s2.indexOf("giggles"), 9113 "paragraph order incorrect:\n" ~ s2); 9114 } 9115 9116 unittest { 9117 // test for suncarpet email dec 24 2019 9118 // arbitrary id asduiwh 9119 auto document = new Document("<html> 9120 <head> 9121 <meta charset=\"utf-8\"></meta> 9122 <title>Element.querySelector Test</title> 9123 </head> 9124 <body> 9125 <div id=\"foo\"> 9126 <div>Foo</div> 9127 <div>Bar</div> 9128 </div> 9129 <div id=\"empty\"></div> 9130 <div id=\"empty-but-text\">test</div> 9131 </body> 9132 </html>"); 9133 9134 auto doc = document; 9135 9136 { 9137 auto empty = doc.requireElementById("empty"); 9138 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9139 } 9140 { 9141 auto empty = doc.requireElementById("empty-but-text"); 9142 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9143 } 9144 9145 assert(doc.querySelectorAll("div div").length == 2); 9146 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 9147 assert(doc.querySelectorAll("> html").length == 0); 9148 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 9149 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 9150 9151 9152 assert(doc.root.matches("html")); 9153 assert(!doc.root.matches("nothtml")); 9154 assert(doc.querySelector("#foo > div").matches("div")); 9155 assert(doc.querySelector("body > #foo").matches("#foo")); 9156 9157 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 9158 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 9159 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 9160 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 9161 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 9162 9163 // also confirming the querySelector works via the mdn definition 9164 auto foo = doc.requireSelector("#foo"); 9165 assert(foo.querySelector("#foo > div") !is null); 9166 assert(foo.querySelector("body #foo > div") !is null); 9167 9168 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 9169 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 9170 //assert(foo.querySelectorAll("#foo > div").length == 2); 9171 } 9172 9173 unittest { 9174 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 9175 auto document = new Document(`<article> 9176 <div id="div-01">Here is div-01 9177 <div id="div-02">Here is div-02 9178 <div id="div-03">Here is div-03</div> 9179 </div> 9180 </div> 9181 </article>`, true, true); 9182 9183 auto el = document.getElementById("div-03"); 9184 assert(el.closest("#div-02").id == "div-02"); 9185 assert(el.closest("div div").id == "div-03"); 9186 assert(el.closest("article > div").id == "div-01"); 9187 assert(el.closest(":not(div)").tagName == "article"); 9188 9189 assert(el.closest("p") is null); 9190 assert(el.closest("p, div") is el); 9191 } 9192 9193 unittest { 9194 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 9195 auto document = new Document(`<test> 9196 <div class="foo"><p>cool</p><span>bar</span></div> 9197 <main><p>two</p></main> 9198 </test>`); 9199 9200 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 9201 assert(document.querySelector("div:where(.foo)") !is null); 9202 } 9203 9204 unittest { 9205 immutable string html = q{ 9206 <root> 9207 <div class="roundedbox"> 9208 <table> 9209 <caption class="boxheader">Recent Reviews</caption> 9210 <tr> 9211 <th>Game</th> 9212 <th>User</th> 9213 <th>Rating</th> 9214 <th>Created</th> 9215 </tr> 9216 9217 <tr> 9218 <td>June 13, 2020 15:10</td> 9219 <td><a href="/reviews/8833">[Show]</a></td> 9220 </tr> 9221 9222 <tr> 9223 <td>June 13, 2020 15:02</td> 9224 <td><a href="/reviews/8832">[Show]</a></td> 9225 </tr> 9226 9227 <tr> 9228 <td>June 13, 2020 14:41</td> 9229 <td><a href="/reviews/8831">[Show]</a></td> 9230 </tr> 9231 </table> 9232 </div> 9233 </root> 9234 }; 9235 9236 auto doc = new Document(cast(string)html); 9237 // this should select the second table row, but... 9238 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9239 assert(rd !is null); 9240 assert(rd.href == "/reviews/8832"); 9241 9242 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9243 assert(rd !is null); 9244 assert(rd.href == "/reviews/8832"); 9245 } 9246 9247 unittest { 9248 try { 9249 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9250 assert(0); 9251 } catch(Exception e) { 9252 // good; it should throw an exception, not an error. 9253 } 9254 } 9255 9256 unittest { 9257 // toPrettyString is not stable, but these are some best-effort attempts 9258 // despite these being in a test, I might change these anyway! 9259 assert(Element.make("a").toPrettyString == "<a></a>"); 9260 assert(Element.make("a", "").toPrettyString(false, 0, " ") == "<a></a>"); 9261 assert(Element.make("a", " ").toPrettyString(false, 0, " ") == "<a> </a>");//, Element.make("a", " ").toPrettyString(false, 0, " ")); 9262 assert(Element.make("a", "b").toPrettyString == "<a>b</a>"); 9263 assert(Element.make("a", "b").toPrettyString(false, 0, "") == "<a>b</a>"); 9264 9265 { 9266 auto document = new Document("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9267 auto pretty = document.toPrettyString(false, 0, " "); 9268 assert(pretty == 9269 `<!DOCTYPE html> 9270 <html> 9271 <body> 9272 <p>hello <a href="world">world</a></p> 9273 </body> 9274 </html>`, pretty); 9275 } 9276 9277 { 9278 auto document = new XmlDocument("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9279 assert(document.toPrettyString(false, 0, " ") == 9280 `<?xml version="1.0" encoding="UTF-8"?> 9281 <html> 9282 <body> 9283 <p> 9284 hello 9285 <a href="world">world</a> 9286 </p> 9287 </body> 9288 </html>`); 9289 } 9290 9291 foreach(test; [ 9292 "<a att=\"http://ele\"><b><ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>", 9293 "<a att=\"http://ele\"><b><ele1>Hello</ele1><c><d><ele2>How are you?</ele2></d><e><ele3>Good & you?</ele3></e></c></b></a>", 9294 ] ) 9295 { 9296 auto document = new XmlDocument(test); 9297 assert(document.root.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9298 assert(document.toPrettyString(false, 0, " ") == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9299 auto omg = document.root; 9300 omg.parent_ = null; 9301 assert(omg.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9302 } 9303 9304 { 9305 auto document = new XmlDocument(`<a><b>toto</b><c></c></a>`); 9306 assert(document.root.toPrettyString(false, 0, null) == `<a><b>toto</b><c></c></a>`); 9307 assert(document.root.toPrettyString(false, 0, " ") == `<a> 9308 <b>toto</b> 9309 <c></c> 9310 </a>`); 9311 } 9312 9313 { 9314 auto str = `<!DOCTYPE html> 9315 <html> 9316 <head> 9317 <title>Test</title> 9318 </head> 9319 <body> 9320 <p>Hello there</p> 9321 <p>I like <a href="">Links</a></p> 9322 <div> 9323 this is indented since there's a block inside 9324 <p>this is the block</p> 9325 and this gets its own line 9326 </div> 9327 </body> 9328 </html>`; 9329 auto doc = new Document(str, true, true); 9330 assert(doc.toPrettyString == str); 9331 } 9332 } 9333 9334 unittest { 9335 auto document = new Document("<foo><items><item><title>test</title><desc>desc</desc></item></items></foo>"); 9336 auto items = document.root.requireSelector("> items"); 9337 auto item = items.requireSelector("> item"); 9338 auto title = item.requireSelector("> title"); 9339 9340 // this not actually implemented at this point but i might want to later. it prolly should work as an extension of the standard behavior 9341 // assert(title.requireSelector("~ desc").innerText == "desc"); 9342 9343 assert(item.requireSelector("title ~ desc").innerText == "desc"); 9344 9345 assert(items.querySelector("item:has(title)") !is null); 9346 assert(items.querySelector("item:has(nothing)") is null); 9347 9348 assert(title.innerText == "test"); 9349 } 9350 9351 unittest { 9352 auto document = new Document("broken"); // just ensuring it doesn't crash 9353 } 9354 9355 9356 /* 9357 Copyright: Adam D. Ruppe, 2010 - 2023 9358 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9359 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9360 */