1 // FIXME: i want css nesting via the new standard now. 2 3 // FIXME: xml namespace support??? 4 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 5 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 6 7 // FIXME: the scriptable list is quite arbitrary 8 9 10 // xml entity references?! 11 12 /++ 13 This is an html DOM implementation, started with cloning 14 what the browser offers in Javascript, but going well beyond 15 it in convenience. 16 17 If you can do it in Javascript, you can probably do it with 18 this module, and much more. 19 20 --- 21 import arsd.dom; 22 23 void main() { 24 auto document = new Document("<html><p>paragraph</p></html>"); 25 writeln(document.querySelector("p")); 26 document.root.innerHTML = "<p>hey</p>"; 27 writeln(document); 28 } 29 --- 30 31 BTW: this file optionally depends on `arsd.characterencodings`, to 32 help it correctly read files from the internet. You should be able to 33 get characterencodings.d from the same place you got this file. 34 35 If you want it to stand alone, just always use the `Document.parseUtf8` 36 function or the constructor that takes a string. 37 38 Symbol_groups: 39 40 core_functionality = 41 42 These members provide core functionality. The members on these classes 43 will provide most your direct interaction. 44 45 bonus_functionality = 46 47 These provide additional functionality for special use cases. 48 49 implementations = 50 51 These provide implementations of other functionality. 52 53 History: 54 The `toString` methods used to optionally take a Phobos `appender`, 55 but now it takes a private internal implementation as of August 26, 2025. This may change again. 56 +/ 57 module arsd.dom; 58 59 static import arsd.core; 60 import arsd.core : encodeUriComponent, decodeUriComponent; 61 62 // FIXME: support the css standard namespace thing in the selectors too 63 64 version(with_arsd_jsvar) 65 import arsd.jsvar; 66 else { 67 enum scriptable = "arsd_jsvar_compatible"; 68 } 69 70 // this is only meant to be used at compile time, as a filter for opDispatch 71 // lists the attributes we want to allow without the use of .attr 72 bool isConvenientAttribute(string name) { 73 static immutable list = [ 74 "name", "id", "href", "value", 75 "checked", "selected", "type", 76 "src", "content", "pattern", 77 "placeholder", "required", "alt", 78 "rel", 79 "method", "action", "enctype" 80 ]; 81 foreach(l; list) 82 if(name == l) return true; 83 return false; 84 } 85 86 87 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 88 // FIXME: failing to close a paragraph sometimes messes things up too 89 90 // FIXME: it would be kinda cool to have some support for internal DTDs 91 // and maybe XPath as well, to some extent 92 /* 93 we could do 94 meh this sux 95 96 auto xpath = XPath(element); 97 98 // get the first p 99 xpath.p[0].a["href"] 100 */ 101 102 103 /++ 104 The main document interface, including a html or xml parser. 105 106 There's three main ways to create a Document: 107 108 If you want to parse something and inspect the tags, you can use the [this|constructor]: 109 --- 110 // create and parse some HTML in one call 111 auto document = new Document("<html></html>"); 112 113 // or some XML 114 auto document = new Document("<xml></xml>", true, true); // strict mode enabled 115 116 // or better yet: 117 auto document = new XmlDocument("<xml></xml>"); // specialized subclass 118 --- 119 120 If you want to download something and parse it in one call, the [fromUrl] static function can help: 121 --- 122 auto document = Document.fromUrl("http://dlang.org/"); 123 --- 124 (note that this requires my [arsd.characterencodings] and [arsd.http2] libraries) 125 126 And, if you need to inspect things like `<%= foo %>` tags and comments, you can add them to the dom like this, with the [enableAddingSpecialTagsToDom] 127 and [parseUtf8] or [parseGarbage] functions: 128 --- 129 auto document = new Document(); 130 document.enableAddingSpecialTagsToDom(); 131 document.parseUtf8("<example></example>", true, true); // changes the trues to false to switch from xml to html mode 132 --- 133 134 You can also modify things like [selfClosedElements] and [rawSourceElements] before calling the `parse` family of functions to do further advanced tasks. 135 136 However you parse it, it will put a few things into special variables. 137 138 [root] contains the root document. 139 [prolog] contains the instructions before the root (like `<!DOCTYPE html>`). To keep the original things, you will need to [enableAddingSpecialTagsToDom] first, otherwise the library will return generic strings in there. [piecesBeforeRoot] will have other parsed instructions, if [enableAddingSpecialTagsToDom] is called. 140 [piecesAfterRoot] will contain any xml-looking data after the root tag is closed. 141 142 Most often though, you will not need to look at any of that data, since `Document` itself has methods like [querySelector], [appendChild], and more which will forward to the root [Element] for you. 143 +/ 144 /// Group: core_functionality 145 class Document : FileResource, DomParent { 146 inout(Document) asDocument() inout { return this; } 147 inout(Element) asElement() inout { return null; } 148 149 /++ 150 These three functions, `processTagOpen`, `processTagClose`, and `processNodeWhileParsing`, allow you to process elements as they are parsed and choose to not append them to the dom tree. 151 152 153 `processTagOpen` is called as soon as it reads the tag name and attributes into the passed `Element` structure, in order 154 of appearance in the file. `processTagClose` is called similarly, when that tag has been closed. In between, all descendant 155 nodes - including tags as well as text and other nodes - are passed to `processNodeWhileParsing`. Finally, after `processTagClose`, 156 the node itself is passed to `processNodeWhileParsing` only after its children. 157 158 So, given: 159 160 ```xml 161 <thing> 162 <child> 163 <grandchild></grandchild> 164 </child> 165 </thing> 166 ``` 167 168 It would call: 169 170 $(NUMBERED_LIST 171 * processTagOpen(thing) 172 * processNodeWhileParsing(thing, whitespace text) // the newlines, spaces, and tabs between the thing tag and child tag 173 * processTagOpen(child) 174 * processNodeWhileParsing(child, whitespace text) 175 * processTagOpen(grandchild) 176 * processTagClose(grandchild) 177 * processNodeWhileParsing(child, grandchild) 178 * processNodeWhileParsing(child, whitespace text) // whitespace after the grandchild 179 * processTagClose(child) 180 * processNodeWhileParsing(thing, child) 181 * processNodeWhileParsing(thing, whitespace text) 182 * processTagClose(thing) 183 ) 184 185 The Element objects passed to those functions are the same ones you'd see; the tag open and tag close calls receive the same 186 object, so you can compare them with the `is` operator if you want. 187 188 The default behavior of each function is that `processTagOpen` and `processTagClose` do nothing. 189 `processNodeWhileParsing`'s default behavior is to call `parent.appendChild(child)`, in order to 190 build the dom tree. If you do not want the dom tree, you can do override this function to do nothing. 191 192 If you do not choose to append child to parent in `processNodeWhileParsing`, the garbage collector is free to clean up 193 the node even as the document is not finished parsing, allowing memory use to stay lower. Memory use will tend to scale 194 approximately with the max depth in the element tree rather the entire document size. 195 196 To cancel processing before the end of a document, you'll have to throw an exception and catch it at your call to parse. 197 There is no other way to stop early and there are no concrete plans to add one. 198 199 There are several approaches to use this: you might might use `processTagOpen` and `processTagClose` to keep a stack or 200 other state variables to process nodes as they come and never add them to the actual tree. You might also build partial 201 subtrees to use all the convenient methods in `processTagClose`, but then not add that particular node to the rest of the 202 tree to keep memory usage down. 203 204 Examples: 205 206 Suppose you have a large array of items under the root element you'd like to process individually, without 207 taking all the items into memory at once. You can do that with code like this: 208 --- 209 import arsd.dom; 210 class MyStream : XmlDocument { 211 this(string s) { super(s); } // need to forward the constructor we use 212 213 override void processNodeWhileParsing(Element parent, Element child) { 214 // don't append anything to the root node, since we don't need them 215 // all in the tree - that'd take too much memory - 216 // but still build any subtree for each individual item for ease of processing 217 if(parent is root) 218 return; 219 else 220 super.processNodeWhileParsing(parent, child); 221 } 222 223 int count; 224 override void processTagClose(Element element) { 225 if(element.tagName == "item") { 226 // process the element here with all the regular dom functions on `element` 227 count++; 228 // can still use dom functions on the subtree we built 229 assert(element.requireSelector("name").textContent == "sample"); 230 } 231 } 232 } 233 234 void main() { 235 // generate an example file with a million items 236 string xml = "<list>"; 237 foreach(i; 0 .. 1_000_000) { 238 xml ~= "<item><name>sample</name><type>example</type></item>"; 239 } 240 xml ~= "</list>"; 241 242 auto document = new MyStream(xml); 243 assert(document.count == 1_000_000); 244 } 245 --- 246 247 This example runs in about 1/10th of the memory and 2/3 of the time on my computer relative to a default [XmlDocument] full tree dom. 248 249 By overriding these three functions to fit the specific document and processing requirements you have, you might realize even bigger 250 gains over the normal full document tree while still getting most the benefits of the convenient dom functions. 251 252 Tip: if you use a [Utf8Stream] instead of a string, you might be able to bring the memory use further down. The easiest way to do that 253 is something like this when loading from a file: 254 255 --- 256 import std.stdio; 257 auto file = File("filename.xml", "rb"); 258 auto textStream = new Utf8Stream(() { 259 // get more 260 auto buffer = new char[](32 * 1024); 261 return cast(string) file.rawRead(buffer); 262 }, () { 263 // has more 264 return !file.eof; 265 }); 266 267 auto document = new XmlDocument(textStream); 268 --- 269 270 You'll need to forward a constructor in your subclasses that takes `Utf8Stream` too if you want to subclass to override the streaming parsing functions. 271 272 Note that if you do save parts of the document strings or objects, it might prevent the GC from freeing that string block anyway, since dom.d will often slice into its buffer while parsing instead of copying strings. It will depend on your specific case to know if this actually saves memory or not for you. 273 274 Bugs: 275 Even if you use a [Utf8Stream] to feed data and decline to append to the tree, the entire xml text is likely to 276 end up in memory anyway. 277 278 See_Also: 279 [Document#examples]'s high level streaming example. 280 281 History: 282 `processNodeWhileParsing` was added January 6, 2023. 283 284 `processTagOpen` and `processTagClose` were added February 21, 2025. 285 +/ 286 void processTagOpen(Element what) { 287 } 288 289 /// ditto 290 void processTagClose(Element what) { 291 } 292 293 /// ditto 294 void processNodeWhileParsing(Element parent, Element child) { 295 parent.appendChild(child); 296 } 297 298 /++ 299 Convenience method for web scraping. Requires [arsd.http2] to be 300 included in the build as well as [arsd.characterencodings]. 301 302 This will download the file from the given url and create a document 303 off it, using a strict constructor or a [parseGarbage], depending on 304 the value of `strictMode`. 305 +/ 306 static Document fromUrl()(string url, bool strictMode = false) { 307 import arsd.http2; 308 auto client = new HttpClient(); 309 310 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 311 auto res = req.waitForCompletion(); 312 313 auto document = new Document(); 314 if(strictMode) { 315 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 316 } else { 317 document.parseGarbage(cast(string) res.content); 318 } 319 320 return document; 321 } 322 323 /++ 324 Creates a document with the given source data. If you want HTML behavior, use `caseSensitive` and `struct` set to `false`. For XML mode, set them to `true`. 325 326 Please note that anything after the root element will be found in [piecesAfterRoot]. Comments, processing instructions, and other special tags will be stripped out b default. You can customize this by using the zero-argument constructor and setting callbacks on the [parseSawComment], [parseSawBangInstruction], [parseSawAspCode], [parseSawPhpCode], and [parseSawQuestionInstruction] members, then calling one of the [parseUtf8], [parseGarbage], or [parse] functions. Calling the convenience method, [enableAddingSpecialTagsToDom], will enable all those things at once. 327 328 See_Also: 329 [parseGarbage] 330 [parseUtf8] 331 [parseUrl] 332 +/ 333 this(string data, bool caseSensitive = false, bool strict = false) { 334 parseUtf8(data, caseSensitive, strict); 335 } 336 337 /** 338 Creates an empty document. It has *nothing* in it at all, ready. 339 */ 340 this() { 341 342 } 343 344 /++ 345 This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 346 It returns a struct that forwards calls to all elements it holds, and returns itself so you 347 can chain it. 348 349 Example: document["p"].innerText("hello").addClass("modified"); 350 351 Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 352 353 Note: always use function calls (not property syntax) and don't use toString in there for best results. 354 355 You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 356 you could put in some kind of custom filter function tho. 357 +/ 358 ElementCollection opIndex(string selector) { 359 auto e = ElementCollection(this.root); 360 return e[selector]; 361 } 362 363 string _contentType = "text/html; charset=utf-8"; 364 365 /// If you're using this for some other kind of XML, you can 366 /// set the content type here. 367 /// 368 /// Note: this has no impact on the function of this class. 369 /// It is only used if the document is sent via a protocol like HTTP. 370 /// 371 /// This may be called by parse() if it recognizes the data. Otherwise, 372 /// if you don't set it, it assumes text/html; charset=utf-8. 373 @property string contentType(string mimeType) { 374 _contentType = mimeType; 375 return _contentType; 376 } 377 378 /// implementing the FileResource interface, useful for sending via 379 /// http automatically. 380 @property string filename() const { return null; } 381 382 /// implementing the FileResource interface, useful for sending via 383 /// http automatically. 384 override @property string contentType() const { 385 return _contentType; 386 } 387 388 /// implementing the FileResource interface; it calls toString. 389 override immutable(ubyte)[] getData() const { 390 return cast(immutable(ubyte)[]) this.toString(); 391 } 392 393 394 /* 395 /// Concatenates any consecutive text nodes 396 void normalize() { 397 398 } 399 */ 400 401 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 402 /// Call this before calling parse(). 403 404 /++ 405 Adds objects to the dom representing things normally stripped out during the default parse, like comments, `<!instructions>`, `<% code%>`, and `<? code?>` all at once. 406 407 Note this will also preserve the prolog and doctype from the original file, if there was one. 408 409 See_Also: 410 [parseSawComment] 411 [parseSawAspCode] 412 [parseSawPhpCode] 413 [parseSawQuestionInstruction] 414 [parseSawBangInstruction] 415 +/ 416 void enableAddingSpecialTagsToDom() { 417 parseSawComment = (string) => true; 418 parseSawAspCode = (string) => true; 419 parseSawPhpCode = (string) => true; 420 parseSawQuestionInstruction = (string) => true; 421 parseSawBangInstruction = (string) => true; 422 } 423 424 /// If the parser sees a html comment, it will call this callback 425 /// <!-- comment --> will call parseSawComment(" comment ") 426 /// Return true if you want the node appended to the document. It will be in a [HtmlComment] object. 427 bool delegate(string) parseSawComment; 428 429 /// If the parser sees <% asp code... %>, it will call this callback. 430 /// It will be passed "% asp code... %" or "%= asp code .. %" 431 /// Return true if you want the node appended to the document. It will be in an [AspCode] object. 432 bool delegate(string) parseSawAspCode; 433 434 /// If the parser sees <?php php code... ?>, it will call this callback. 435 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 436 /// Note: dom.d cannot identify the other php <? code ?> short format. 437 /// Return true if you want the node appended to the document. It will be in a [PhpCode] object. 438 bool delegate(string) parseSawPhpCode; 439 440 /// if it sees a <?xxx> that is not php or asp 441 /// it calls this function with the contents. 442 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 443 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 444 /// Return true if you want the node appended to the document. It will be in a [QuestionInstruction] object. 445 bool delegate(string) parseSawQuestionInstruction; 446 447 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 448 /// it calls this function with the contents. 449 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 450 /// Return true if you want the node appended to the document. It will be in a [BangInstruction] object. 451 bool delegate(string) parseSawBangInstruction; 452 453 /// Given the kind of garbage you find on the Internet, try to make sense of it. 454 /// Equivalent to document.parse(data, false, false, null); 455 /// (Case-insensitive, non-strict, determine character encoding from the data.) 456 457 /// NOTE: this makes no attempt at added security, but it will try to recover from anything instead of throwing. 458 /// 459 /// It is a template so it lazily imports characterencodings. 460 void parseGarbage()(string data) { 461 parse(data, false, false, null); 462 } 463 464 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 465 /// Will throw exceptions on things like unclosed tags. 466 void parseStrict(string data, bool pureXmlMode = false) { 467 parseStream(toUtf8Stream(data), true, true, pureXmlMode); 468 } 469 470 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 471 /// tag soup, but does NOT try to correct bad character encodings. 472 /// 473 /// They will still throw an exception. 474 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 475 parseStream(toUtf8Stream(data), caseSensitive, strict); 476 } 477 478 // this is a template so we get lazy import behavior 479 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 480 import arsd.characterencodings; 481 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 482 if(dataEncoding is null) { 483 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 484 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 485 // Now, XML and HTML can both list encoding in the document, but we can't really parse 486 // it here without changing a lot of code until we know the encoding. So I'm going to 487 // do some hackish string checking. 488 if(dataEncoding is null) { 489 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 490 // first, look for an XML prolog 491 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 492 if(idx != -1) { 493 idx += "encoding=\"".length; 494 // we're probably past the prolog if it's this far in; we might be looking at 495 // content. Forget about it. 496 if(idx > 100) 497 idx = -1; 498 } 499 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 500 if(idx == -1) { 501 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 502 if(idx != -1) { 503 idx += "charset=".length; 504 if(dataAsBytes[idx] == '"') 505 idx++; 506 } 507 } 508 509 // found something in either branch... 510 if(idx != -1) { 511 // read till a quote or about 12 chars, whichever comes first... 512 auto end = idx; 513 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 514 end++; 515 516 dataEncoding = cast(string) dataAsBytes[idx .. end]; 517 } 518 // otherwise, we just don't know. 519 } 520 } 521 522 if(dataEncoding is null) { 523 if(strict) 524 throw new MarkupException("I couldn't figure out the encoding of this document."); 525 else 526 // if we really don't know by here, it means we already tried UTF-8, 527 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 528 // tags... let's assume it's Windows-1252, since that's probably the most 529 // common aside from utf that wouldn't be labeled. 530 531 dataEncoding = "Windows 1252"; 532 } 533 534 // and now, go ahead and convert it. 535 536 string data; 537 538 if(!strict) { 539 // if we're in non-strict mode, we need to check 540 // the document for mislabeling too; sometimes 541 // web documents will say they are utf-8, but aren't 542 // actually properly encoded. If it fails to validate, 543 // we'll assume it's actually Windows encoding - the most 544 // likely candidate for mislabeled garbage. 545 dataEncoding = dataEncoding.toLower(); 546 dataEncoding = dataEncoding.replace(" ", ""); 547 dataEncoding = dataEncoding.replace("-", ""); 548 dataEncoding = dataEncoding.replace("_", ""); 549 if(dataEncoding == "utf8") { 550 import std.utf; 551 try { 552 validate(rawdata); 553 } catch(UTFException e) { 554 dataEncoding = "Windows 1252"; 555 } 556 } 557 } 558 559 if(dataEncoding != "UTF-8") { 560 if(strict) 561 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 562 else { 563 try { 564 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 565 } catch(Exception e) { 566 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 567 } 568 } 569 } else 570 data = rawdata; 571 572 return toUtf8Stream(data); 573 } 574 575 private 576 Utf8Stream toUtf8Stream(in string rawdata) { 577 string data = rawdata; 578 static if(is(Utf8Stream == string)) 579 return data; 580 else 581 return new Utf8Stream(data); 582 } 583 584 /++ 585 List of elements that can be assumed to be self-closed 586 in this document. The default for a Document are a hard-coded 587 list of ones appropriate for HTML. For [XmlDocument], it defaults 588 to empty. You can modify this after construction but before parsing. 589 590 History: 591 Added February 8, 2021 (included in dub release 9.2) 592 593 Changed from `string[]` to `immutable(string)[]` on 594 February 4, 2024 (dub v11.5) to plug a hole discovered 595 by the OpenD compiler's diagnostics. 596 +/ 597 immutable(string)[] selfClosedElements = htmlSelfClosedElements; 598 599 /++ 600 List of elements that contain raw CDATA content for this 601 document, e.g. `<script>` and `<style>` for HTML. The parser 602 will read until the closing string and put everything else 603 in a [RawSource] object for future processing, not trying to 604 do any further child nodes or attributes, etc. 605 606 History: 607 Added February 4, 2024 (dub v11.5) 608 609 +/ 610 immutable(string)[] rawSourceElements = htmlRawSourceElements; 611 612 /++ 613 List of elements that are considered inline for pretty printing. 614 The default for a Document are hard-coded to something appropriate 615 for HTML. For [XmlDocument], it defaults to empty. You can modify 616 this after construction but before parsing. 617 618 History: 619 Added June 21, 2021 (included in dub release 10.1) 620 621 Changed from `string[]` to `immutable(string)[]` on 622 February 4, 2024 (dub v11.5) to plug a hole discovered 623 by the OpenD compiler's diagnostics. 624 +/ 625 immutable(string)[] inlineElements = htmlInlineElements; 626 627 /** 628 Take XMLish data and try to make the DOM tree out of it. 629 630 The goal isn't to be perfect, but to just be good enough to 631 approximate Javascript's behavior. 632 633 If strict, it throws on something that doesn't make sense. 634 (Examples: mismatched tags. It doesn't validate!) 635 If not strict, it tries to recover anyway, and only throws 636 when something is REALLY unworkable. 637 638 If strict is false, it uses a magic list of tags that needn't 639 be closed. If you are writing a document specifically for this, 640 try to avoid such - use self closed tags at least. Easier to parse. 641 642 The dataEncoding argument can be used to pass a specific 643 charset encoding for automatic conversion. If null (which is NOT 644 the default!), it tries to determine from the data itself, 645 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 646 647 If this assumption is wrong, it can throw on non-ascii 648 characters! 649 650 651 Note that it previously assumed the data was encoded as UTF-8, which 652 is why the dataEncoding argument defaults to that. 653 654 So it shouldn't break backward compatibility. 655 656 But, if you want the best behavior on wild data - figuring it out from the document 657 instead of assuming - you'll probably want to change that argument to null. 658 659 This is a template so it lazily imports arsd.characterencodings, which is required 660 to fix up data encodings. 661 662 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 663 dependency. If it is data from the Internet though, a random website, the encoding 664 is often a lie. This function, if dataEncoding == null, can correct for that, or 665 you can try parseGarbage. In those cases, arsd.characterencodings is required to 666 compile. 667 */ 668 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 669 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 670 parseStream(data, caseSensitive, strict); 671 } 672 673 // note: this work best in strict mode, unless data is just a simple string wrapper 674 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false, bool pureXmlMode = false) { 675 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 676 // of my big app. 677 678 assert(data !is null); 679 680 // go through character by character. 681 // if you see a <, consider it a tag. 682 // name goes until the first non tagname character 683 // then see if it self closes or has an attribute 684 685 // if not in a tag, anything not a tag is a big text 686 // node child. It ends as soon as it sees a < 687 688 // Whitespace in text or attributes is preserved, but not between attributes 689 690 // & and friends are converted when I know them, left the same otherwise 691 692 693 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 694 //validate(data); // it *must* be UTF-8 for this to work correctly 695 696 sizediff_t pos = 0; 697 698 clear(); 699 700 loose = !caseSensitive; 701 702 bool sawImproperNesting = false; 703 bool nonNestableHackRequired = false; 704 705 int getLineNumber(sizediff_t p) { 706 return data.getLineNumber(p); 707 } 708 709 void parseError(string message) { 710 throw new MarkupException("char "~to!string(pos)~" (line "~to!string(getLineNumber(pos))~"): " ~ message); 711 } 712 713 bool eatWhitespace() { 714 bool ateAny = false; 715 while(pos < data.length && data[pos].isSimpleWhite) { 716 pos++; 717 ateAny = true; 718 } 719 return ateAny; 720 } 721 722 string readTagName() { 723 724 // this messes up the whereThisTagStarted in the error messages if we uncomment..... 725 //data.markDataDiscardable(pos); 726 727 // remember to include : for namespaces 728 // basically just keep going until >, /, or whitespace 729 auto start = pos; 730 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 731 { 732 pos++; 733 if(pos == data.length) { 734 if(strict) 735 throw new Exception("tag name incomplete when file ended"); 736 else 737 break; 738 } 739 } 740 741 if(!caseSensitive) 742 return toLower(data[start..pos]); 743 else 744 return data[start..pos]; 745 } 746 747 string readAttributeName() { 748 // remember to include : for namespaces 749 // basically just keep going until >, /, or whitespace 750 auto start = pos; 751 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 752 { 753 if(data[pos] == '<') { 754 if(strict) 755 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 756 else 757 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 758 } 759 pos++; 760 if(pos == data.length) { 761 if(strict) 762 throw new Exception("unterminated attribute name"); 763 else 764 break; 765 } 766 } 767 768 if(!caseSensitive) 769 return toLower(data[start..pos]); 770 else 771 return data[start..pos]; 772 } 773 774 string readAttributeValue() { 775 if(pos >= data.length) { 776 if(strict) 777 throw new Exception("no attribute value before end of file"); 778 else 779 return null; 780 } 781 switch(data[pos]) { 782 case '\'': 783 case '"': 784 auto started = pos; 785 char end = data[pos]; 786 pos++; 787 auto start = pos; 788 while(pos < data.length && data[pos] != end) 789 pos++; 790 if(strict && pos == data.length) 791 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 792 string v = htmlEntitiesDecode(data[start..pos], strict); 793 pos++; // skip over the end 794 return v; 795 default: 796 if(strict) 797 parseError("Attributes must be quoted"); 798 // read until whitespace or terminator (/> or >) 799 auto start = pos; 800 while( 801 pos < data.length && 802 data[pos] != '>' && 803 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 804 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 805 !data[pos].isSimpleWhite) 806 pos++; 807 808 string v = htmlEntitiesDecode(data[start..pos], strict); 809 // don't skip the end - we'll need it later 810 return v; 811 } 812 } 813 814 TextNode readTextNode() { 815 auto start = pos; 816 while(pos < data.length && data[pos] != '<') { 817 pos++; 818 } 819 820 return TextNode.fromUndecodedString(this, data[start..pos]); 821 } 822 823 // this is obsolete! 824 RawSource readCDataNode() { 825 auto start = pos; 826 while(pos < data.length && data[pos] != '<') { 827 pos++; 828 } 829 830 return new RawSource(this, data[start..pos]); 831 } 832 833 834 struct Ele { 835 int type; // element or closing tag or nothing 836 /* 837 type == 0 means regular node, self-closed (element is valid) 838 type == 1 means closing tag (payload is the tag name, element may be valid) 839 type == 2 means you should ignore it completely 840 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 841 type == 4 means the document was totally empty 842 */ 843 Element element; // for type == 0 or type == 3 844 string payload; // for type == 1 845 } 846 // recursively read a tag 847 Ele readElement(string[] parentChain = null) { 848 // FIXME: this is the slowest function in this module, by far, even in strict mode. 849 // Loose mode should perform decently, but strict mode is the important one. 850 if(!strict && parentChain is null) 851 parentChain = []; 852 853 static string[] recentAutoClosedTags; 854 855 if(pos >= data.length) 856 { 857 if(strict) { 858 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 859 } else { 860 if(parentChain.length) 861 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 862 else 863 return Ele(4); // signal emptiness upstream 864 } 865 } 866 867 if(data[pos] != '<') { 868 return Ele(0, readTextNode(), null); 869 } 870 871 if(data[pos] != '<') 872 throw new MarkupException("expected < not " ~ data[pos]); 873 pos++; 874 if(pos == data.length) { 875 if(strict) 876 throw new MarkupException("Found trailing < at end of file"); 877 // if not strict, we'll just skip the switch 878 } else 879 switch(data[pos]) { 880 // I don't care about these, so I just want to skip them 881 case '!': // might be a comment, a doctype, or a special instruction 882 pos++; 883 884 // FIXME: we should store these in the tree too 885 // though I like having it stripped out tbh. 886 887 if(pos == data.length) { 888 if(strict) 889 throw new MarkupException("<! opened at end of file"); 890 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 891 // comment 892 pos += 2; 893 894 // FIXME: technically, a comment is anything 895 // between -- and -- inside a <!> block. 896 // so in <!-- test -- lol> , the " lol" is NOT a comment 897 // and should probably be handled differently in here, but for now 898 // I'll just keep running until --> since that's the common way 899 900 auto commentStart = pos; 901 while(pos+3 < data.length && data[pos..pos+3] != "-->") 902 pos++; 903 904 auto end = commentStart; 905 906 if(pos + 3 >= data.length) { 907 if(strict) 908 throw new MarkupException("unclosed comment"); 909 end = data.length; 910 pos = data.length; 911 } else { 912 end = pos; 913 assert(data[pos] == '-'); 914 pos++; 915 assert(data[pos] == '-'); 916 pos++; 917 assert(data[pos] == '>'); 918 pos++; 919 } 920 921 if(parseSawComment !is null) 922 if(parseSawComment(data[commentStart .. end])) { 923 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 924 } 925 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 926 pos += 7; 927 928 auto cdataStart = pos; 929 930 ptrdiff_t end = -1; 931 typeof(end) cdataEnd; 932 933 if(pos < data.length) { 934 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 935 end = data[pos .. $].indexOf("]]>"); 936 } 937 938 if(end == -1) { 939 if(strict) 940 throw new MarkupException("Unclosed CDATA section"); 941 end = pos; 942 cdataEnd = pos; 943 } else { 944 cdataEnd = pos + end; 945 pos = cdataEnd + 3; 946 } 947 948 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 949 } else { 950 auto start = pos; 951 while(pos < data.length && data[pos] != '>') 952 pos++; 953 954 auto bangEnds = pos; 955 if(pos == data.length) { 956 if(strict) 957 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 958 } else pos++; // skipping the > 959 960 if(parseSawBangInstruction !is null) 961 if(parseSawBangInstruction(data[start .. bangEnds])) { 962 // FIXME: these should be able to modify the parser state, 963 // doing things like adding entities, somehow. 964 965 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 966 } 967 } 968 969 /* 970 if(pos < data.length && data[pos] == '>') 971 pos++; // skip the > 972 else 973 assert(!strict); 974 */ 975 break; 976 case '%': 977 case '?': 978 /* 979 Here's what we want to support: 980 981 <% asp code %> 982 <%= asp code %> 983 <?php php code ?> 984 <?= php code ?> 985 986 The contents don't really matter, just if it opens with 987 one of the above for, it ends on the two char terminator. 988 989 <?something> 990 this is NOT php code 991 because I've seen this in the wild: <?EM-dummyText> 992 993 This could be php with shorttags which would be cut off 994 prematurely because if(a >) - that > counts as the close 995 of the tag, but since dom.d can't tell the difference 996 between that and the <?EM> real world example, it will 997 not try to look for the ?> ending. 998 999 The difference between this and the asp/php stuff is that it 1000 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 1001 on >. 1002 */ 1003 1004 char end = data[pos]; 1005 auto started = pos; 1006 bool isAsp = end == '%'; 1007 int currentIndex = 0; 1008 bool isPhp = false; 1009 bool isEqualTag = false; 1010 int phpCount = 0; 1011 1012 more: 1013 pos++; // skip the start 1014 if(pos == data.length) { 1015 if(strict) 1016 throw new MarkupException("Unclosed <"~end~" by end of file"); 1017 } else { 1018 currentIndex++; 1019 if(currentIndex == 1 && data[pos] == '=') { 1020 if(!isAsp) 1021 isPhp = true; 1022 isEqualTag = true; 1023 goto more; 1024 } 1025 if(currentIndex == 1 && data[pos] == 'p') 1026 phpCount++; 1027 if(currentIndex == 2 && data[pos] == 'h') 1028 phpCount++; 1029 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 1030 isPhp = true; 1031 1032 if(data[pos] == '>') { 1033 if((isAsp || isPhp) && data[pos - 1] != end) 1034 goto more; 1035 // otherwise we're done 1036 } else 1037 goto more; 1038 } 1039 1040 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 1041 auto code = data[started .. pos]; 1042 1043 1044 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 1045 if(pos < data.length) 1046 pos++; // get past the > 1047 1048 if(isAsp && parseSawAspCode !is null) { 1049 if(parseSawAspCode(code)) { 1050 return Ele(3, new AspCode(this, code), null); 1051 } 1052 } else if(isPhp && parseSawPhpCode !is null) { 1053 if(parseSawPhpCode(code)) { 1054 return Ele(3, new PhpCode(this, code), null); 1055 } 1056 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 1057 if(parseSawQuestionInstruction(code)) { 1058 return Ele(3, new QuestionInstruction(this, code), null); 1059 } 1060 } 1061 break; 1062 case '/': // closing an element 1063 pos++; // skip the start 1064 auto p = pos; 1065 while(pos < data.length && data[pos] != '>') 1066 pos++; 1067 //writefln("</%s>", data[p..pos]); 1068 if(pos == data.length && data[pos-1] != '>') { 1069 if(strict) 1070 throw new MarkupException("File ended before closing tag had a required >"); 1071 else 1072 data ~= ">"; // just hack it in 1073 } 1074 pos++; // skip the '>' 1075 1076 string tname = data[p..pos-1]; 1077 if(!strict) 1078 tname = tname.strip; 1079 if(!caseSensitive) 1080 tname = tname.toLower(); 1081 1082 return Ele(1, null, tname); // closing tag reports itself here 1083 case ' ': // assume it isn't a real element... 1084 if(strict) { 1085 parseError("bad markup - improperly placed <"); 1086 assert(0); // parseError always throws 1087 } else 1088 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 1089 default: 1090 1091 if(!strict) { 1092 // what about something that kinda looks like a tag, but isn't? 1093 auto nextTag = data[pos .. $].indexOf("<"); 1094 auto closeTag = data[pos .. $].indexOf(">"); 1095 if(closeTag != -1 && nextTag != -1) 1096 if(nextTag < closeTag) { 1097 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 1098 1099 auto equal = data[pos .. $].indexOf("=\""); 1100 if(equal != -1 && equal < closeTag) { 1101 // this MIGHT be ok, soldier on 1102 } else { 1103 // definitely no good, this must be a (horribly distorted) text node 1104 pos++; // skip the < we're on - don't want text node to end prematurely 1105 auto node = readTextNode(); 1106 node.contents = "<" ~ node.contents; // put this back 1107 return Ele(0, node, null); 1108 } 1109 } 1110 } 1111 1112 string tagName = readTagName(); 1113 AttributesHolder attributes; 1114 1115 Ele addTag(bool selfClosed) { 1116 if(selfClosed) 1117 pos++; 1118 else { 1119 if(!strict) 1120 if(tagName.isInArray(selfClosedElements)) 1121 // these are de-facto self closed 1122 selfClosed = true; 1123 } 1124 1125 if(strict) { 1126 if(data[pos] != '>') 1127 throw new MarkupException("got "~data[pos]~" when expecting > (possible missing attribute name)\nContext:\n" ~ data[max(0, pos - data.contextToKeep) .. min(data.length, pos + data.contextToKeep)]); 1128 } else { 1129 // if we got here, it's probably because a slash was in an 1130 // unquoted attribute - don't trust the selfClosed value 1131 if(!selfClosed) 1132 selfClosed = tagName.isInArray(selfClosedElements); 1133 1134 while(pos < data.length && data[pos] != '>') 1135 pos++; 1136 1137 if(pos >= data.length) { 1138 // the tag never closed 1139 assert(data.length != 0); 1140 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 1141 } 1142 } 1143 1144 auto whereThisTagStarted = pos; // for better error messages 1145 1146 pos++; 1147 1148 auto e = createElement(tagName); 1149 e.attributes = attributes; 1150 version(dom_node_indexes) { 1151 if(e.dataset.nodeIndex.length == 0) 1152 e.dataset.nodeIndex = to!string(&(e.attributes)); 1153 } 1154 e.selfClosed = selfClosed; 1155 e.parseAttributes(); 1156 1157 // might temporarily set root to the first element we encounter, 1158 // then the final root element assignment will be at the end of the parse, 1159 // when the recursive work is complete. 1160 if(this.root is null) 1161 this.root = e; 1162 this.processTagOpen(e); 1163 scope(exit) 1164 this.processTagClose(e); 1165 1166 1167 // HACK to handle script and style as a raw data section as it is in HTML browsers 1168 if(!pureXmlMode && tagName.isInArray(rawSourceElements)) { 1169 if(!selfClosed) { 1170 string closer = "</" ~ tagName ~ ">"; 1171 ptrdiff_t ending; 1172 if(pos >= data.length) 1173 ending = -1; 1174 else 1175 ending = indexOf(data[pos..$], closer); 1176 1177 ending = indexOf(data[pos..$], closer, (loose ? false : true)); 1178 /* 1179 if(loose && ending == -1 && pos < data.length) 1180 ending = indexOf(data[pos..$], closer.toUpper()); 1181 */ 1182 if(ending == -1) { 1183 if(strict) 1184 throw new Exception("tag " ~ tagName ~ " never closed"); 1185 else { 1186 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 1187 if(pos < data.length) { 1188 e = new TextNode(this, data[pos .. $]); 1189 pos = data.length; 1190 } 1191 } 1192 } else { 1193 ending += pos; 1194 e.innerRawSource = data[pos..ending]; 1195 pos = ending + closer.length; 1196 } 1197 } 1198 return Ele(0, e, null); 1199 } 1200 1201 bool closed = selfClosed; 1202 1203 void considerHtmlNonNestableElementHack(Element n) { 1204 assert(!strict); 1205 if(!canNestElementsInHtml(e.tagName, n.tagName)) { 1206 // html lets you write <p> para 1 <p> para 1 1207 // but in the dom tree, they should be siblings, not children. 1208 nonNestableHackRequired = true; 1209 } 1210 } 1211 1212 //writef("<%s>", tagName); 1213 while(!closed) { 1214 Ele n; 1215 if(strict) 1216 n = readElement(); 1217 else 1218 n = readElement(parentChain ~ tagName); 1219 1220 if(n.type == 4) return n; // the document is empty 1221 1222 if(n.type == 3 && n.element !is null) { 1223 // special node, append if possible 1224 if(e !is null) 1225 processNodeWhileParsing(e, n.element); 1226 else 1227 piecesBeforeRoot ~= n.element; 1228 } else if(n.type == 0) { 1229 if(!strict) 1230 considerHtmlNonNestableElementHack(n.element); 1231 processNodeWhileParsing(e, n.element); 1232 } else if(n.type == 1) { 1233 bool found = false; 1234 if(n.payload != tagName) { 1235 if(strict) 1236 parseError("mismatched tag: </"~n.payload~"> != <"~tagName~"> (opened on line "~to!string(getLineNumber(whereThisTagStarted))~")"); 1237 else { 1238 sawImproperNesting = true; 1239 // this is so we don't drop several levels of awful markup 1240 if(n.element) { 1241 if(!strict) 1242 considerHtmlNonNestableElementHack(n.element); 1243 processNodeWhileParsing(e, n.element); 1244 n.element = null; 1245 } 1246 1247 // is the element open somewhere up the chain? 1248 foreach(i, parent; parentChain) 1249 if(parent == n.payload) { 1250 recentAutoClosedTags ~= tagName; 1251 // just rotating it so we don't inadvertently break stuff with vile crap 1252 if(recentAutoClosedTags.length > 4) 1253 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 1254 1255 n.element = e; 1256 return n; 1257 } 1258 1259 /+ 1260 // COMMENTED OUT BLOCK 1261 // dom.d used to replace improper close tags with their 1262 // text so they'd be visible in the output. the html 1263 // spec says to just ignore them, and browsers do indeed 1264 // seem to jsut ignore them, even checking back on IE6. 1265 // so i guess i was wrong to do this (tho tbh i find it kinda 1266 // useful to call out an obvious mistake in the source... 1267 // but for calling out obvious mistakes, just use strict 1268 // mode.) 1269 1270 // if not, this is a text node; we can't fix it up... 1271 1272 // If it's already in the tree somewhere, assume it is closed by algorithm 1273 // and we shouldn't output it - odds are the user just flipped a couple tags 1274 foreach(ele; e.tree) { 1275 if(ele.tagName == n.payload) { 1276 found = true; 1277 break; 1278 } 1279 } 1280 1281 foreach(ele; recentAutoClosedTags) { 1282 if(ele == n.payload) { 1283 found = true; 1284 break; 1285 } 1286 } 1287 1288 if(!found) // if not found in the tree though, it's probably just text 1289 processNodeWhileParsing(e, TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1290 1291 +/ 1292 } 1293 } else { 1294 if(n.element) { 1295 if(!strict) 1296 considerHtmlNonNestableElementHack(n.element); 1297 processNodeWhileParsing(e, n.element); 1298 } 1299 } 1300 1301 if(n.payload == tagName) // in strict mode, this is always true 1302 closed = true; 1303 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1304 } 1305 //writef("</%s>\n", tagName); 1306 return Ele(0, e, null); 1307 } 1308 1309 // if a tag was opened but not closed by end of file, we can arrive here 1310 if(!strict && pos >= data.length) 1311 return addTag(false); 1312 //else if(strict) assert(0); // should be caught before 1313 1314 switch(data[pos]) { 1315 default: assert(0); 1316 case '/': // self closing tag 1317 return addTag(true); 1318 case '>': 1319 return addTag(false); 1320 case ' ': 1321 case '\t': 1322 case '\n': 1323 case '\r': 1324 // there might be attributes... 1325 moreAttributes: 1326 eatWhitespace(); 1327 1328 // same deal as above the switch.... 1329 if(!strict && pos >= data.length) 1330 return addTag(false); 1331 1332 if(strict && pos >= data.length) 1333 throw new MarkupException("tag open, didn't find > before end of file"); 1334 1335 switch(data[pos]) { 1336 case '/': // self closing tag 1337 return addTag(true); 1338 case '>': // closed tag; open -- we now read the contents 1339 return addTag(false); 1340 default: // it is an attribute 1341 string attrName = readAttributeName(); 1342 string attrValue = attrName; 1343 1344 bool ateAny = eatWhitespace(); 1345 // the spec allows this too, sigh https://www.w3.org/TR/REC-xml/#NT-Eq 1346 //if(strict && ateAny) 1347 //throw new MarkupException("inappropriate whitespace after attribute name"); 1348 1349 if(pos >= data.length) { 1350 if(strict) 1351 assert(0, "this should have thrown in readAttributeName"); 1352 else { 1353 data ~= ">"; 1354 goto blankValue; 1355 } 1356 } 1357 if(data[pos] == '=') { 1358 pos++; 1359 1360 ateAny = eatWhitespace(); 1361 // the spec actually allows this! 1362 //if(strict && ateAny) 1363 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1364 1365 attrValue = readAttributeValue(); 1366 1367 eatWhitespace(); 1368 } 1369 1370 blankValue: 1371 1372 if(strict && attrName in attributes) 1373 throw new MarkupException("Repeated attribute: " ~ attrName); 1374 1375 if(attrName.strip().length) 1376 attributes[attrName] = attrValue; 1377 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1378 1379 if(!strict && pos < data.length && data[pos] == '<') { 1380 // this is the broken tag that doesn't have a > at the end 1381 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1382 // let's insert one as a hack 1383 goto case '>'; 1384 } 1385 1386 goto moreAttributes; 1387 } 1388 } 1389 } 1390 1391 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1392 //assert(0); 1393 } 1394 1395 eatWhitespace(); 1396 Ele r; 1397 do { 1398 r = readElement(); // there SHOULD only be one element... 1399 1400 if(r.type == 3 && r.element !is null) 1401 piecesBeforeRoot ~= r.element; 1402 1403 if(r.type == 4) 1404 break; // the document is completely empty... 1405 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1406 1407 root = r.element; 1408 if(root !is null) 1409 root.parent_ = this; 1410 1411 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1412 while(r.type != 4) { 1413 r = readElement(); 1414 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1415 if(r.element !is null) 1416 piecesAfterRoot ~= r.element; 1417 } 1418 } 1419 1420 if(root is null) 1421 { 1422 if(strict) 1423 assert(0, "empty document should be impossible in strict mode"); 1424 else 1425 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1426 } 1427 1428 if(nonNestableHackRequired) { 1429 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1430 1431 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1432 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1433 1434 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1435 // Kind of inefficient because we can't detect when we recurse back out of a node. 1436 Element[Element] insertLocations; 1437 auto iterator = root.tree; 1438 foreach(ele; iterator) { 1439 if(ele.parentNode is null) 1440 continue; 1441 1442 if(!canNestElementsInHtml(ele.parentNode.tagName, ele.tagName)) { 1443 auto shouldBePreviousSibling = ele.parentNode; 1444 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1445 if (auto p = holder in insertLocations) { 1446 shouldBePreviousSibling = *p; 1447 assert(shouldBePreviousSibling.parentNode is holder); 1448 } 1449 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1450 insertLocations[holder] = ele; 1451 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1452 } 1453 } 1454 } 1455 } 1456 1457 /* end massive parse function */ 1458 1459 /// Gets the <title> element's innerText, if one exists 1460 @property string title() { 1461 bool doesItMatch(Element e) { 1462 return (e.tagName == "title"); 1463 } 1464 1465 auto e = findFirst(&doesItMatch); 1466 if(e) 1467 return e.innerText(); 1468 return ""; 1469 } 1470 1471 /// Sets the title of the page, creating a <title> element if needed. 1472 @property void title(string t) { 1473 bool doesItMatch(Element e) { 1474 return (e.tagName == "title"); 1475 } 1476 1477 auto e = findFirst(&doesItMatch); 1478 1479 if(!e) { 1480 e = createElement("title"); 1481 auto heads = getElementsByTagName("head"); 1482 if(heads.length) 1483 heads[0].appendChild(e); 1484 } 1485 1486 if(e) 1487 e.innerText = t; 1488 } 1489 1490 // FIXME: would it work to alias root this; ???? might be a good idea 1491 /// These functions all forward to the root element. See the documentation in the Element class. 1492 Element getElementById(string id) { 1493 return root.getElementById(id); 1494 } 1495 1496 /// ditto 1497 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1498 if( is(SomeElementType : Element)) 1499 out(ret) { assert(ret !is null); } 1500 do { 1501 return root.requireElementById!(SomeElementType)(id, file, line); 1502 } 1503 1504 /// ditto 1505 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1506 if( is(SomeElementType : Element)) 1507 out(ret) { assert(ret !is null); } 1508 do { 1509 auto e = cast(SomeElementType) querySelector(selector); 1510 if(e is null) 1511 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1512 return e; 1513 } 1514 1515 /// ditto 1516 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1517 if(is(SomeElementType : Element)) 1518 { 1519 auto e = cast(SomeElementType) querySelector(selector); 1520 return MaybeNullElement!SomeElementType(e); 1521 } 1522 1523 /// ditto 1524 @scriptable 1525 Element querySelector(string selector) { 1526 // see comment below on Document.querySelectorAll 1527 auto s = Selector(selector);//, !loose); 1528 foreach(ref comp; s.components) 1529 if(comp.parts.length && comp.parts[0].separation == 0) 1530 comp.parts[0].separation = -1; 1531 foreach(e; s.getMatchingElementsLazy(this.root)) 1532 return e; 1533 return null; 1534 1535 } 1536 1537 /// ditto 1538 @scriptable 1539 Element[] querySelectorAll(string selector) { 1540 // In standards-compliant code, the document is slightly magical 1541 // in that it is a pseudoelement at top level. It should actually 1542 // match the root as one of its children. 1543 // 1544 // In versions of dom.d before Dec 29 2019, this worked because 1545 // querySelectorAll was willing to return itself. With that bug fix 1546 // (search "arbitrary id asduiwh" in this file for associated unittest) 1547 // this would have failed. Hence adding back the root if it matches the 1548 // selector itself. 1549 // 1550 // I'd love to do this better later. 1551 1552 auto s = Selector(selector);//, !loose); 1553 foreach(ref comp; s.components) 1554 if(comp.parts.length && comp.parts[0].separation == 0) 1555 comp.parts[0].separation = -1; 1556 return s.getMatchingElements(this.root, null); 1557 } 1558 1559 /// ditto 1560 deprecated("use querySelectorAll instead") 1561 Element[] getElementsBySelector(string selector) { 1562 return root.getElementsBySelector(selector); 1563 } 1564 1565 /// ditto 1566 @scriptable 1567 Element[] getElementsByTagName(string tag) { 1568 return root.getElementsByTagName(tag); 1569 } 1570 1571 /// ditto 1572 @scriptable 1573 Element[] getElementsByClassName(string tag) { 1574 return root.getElementsByClassName(tag); 1575 } 1576 1577 /** FIXME: btw, this could just be a lazy range...... */ 1578 Element getFirstElementByTagName(string tag) { 1579 if(loose) 1580 tag = tag.toLower(); 1581 bool doesItMatch(Element e) { 1582 return e.tagName == tag; 1583 } 1584 return findFirst(&doesItMatch); 1585 } 1586 1587 /++ 1588 This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body used to be a keyword in D.) 1589 1590 History: 1591 `body` alias added February 26, 2024 1592 +/ 1593 Element mainBody() { 1594 return getFirstElementByTagName("body"); 1595 } 1596 1597 /// ditto 1598 alias body = mainBody; 1599 1600 /// this uses a weird thing... it's [name=] if no colon and 1601 /// [property=] if colon 1602 string getMeta(string name) { 1603 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1604 auto e = querySelector("head meta["~thing~"="~name~"]"); 1605 if(e is null) 1606 return null; 1607 return e.content; 1608 } 1609 1610 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1611 void setMeta(string name, string value) { 1612 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1613 auto e = querySelector("head meta["~thing~"="~name~"]"); 1614 if(e is null) { 1615 e = requireSelector("head").addChild("meta"); 1616 e.setAttribute(thing, name); 1617 } 1618 1619 e.content = value; 1620 } 1621 1622 ///. 1623 Form[] forms() { 1624 return cast(Form[]) getElementsByTagName("form"); 1625 } 1626 1627 ///. 1628 Form createForm() 1629 out(ret) { 1630 assert(ret !is null); 1631 } 1632 do { 1633 return cast(Form) createElement("form"); 1634 } 1635 1636 ///. 1637 Element createElement(string name) { 1638 if(loose) 1639 name = name.toLower(); 1640 1641 auto e = Element.make(name, null, null, selfClosedElements); 1642 1643 return e; 1644 1645 // return new Element(this, name, null, selfClosed); 1646 } 1647 1648 ///. 1649 Element createFragment() { 1650 return new DocumentFragment(this); 1651 } 1652 1653 ///. 1654 Element createTextNode(string content) { 1655 return new TextNode(this, content); 1656 } 1657 1658 1659 ///. 1660 Element findFirst(bool delegate(Element) doesItMatch) { 1661 if(root is null) 1662 return null; 1663 Element result; 1664 1665 bool goThroughElement(Element e) { 1666 if(doesItMatch(e)) { 1667 result = e; 1668 return true; 1669 } 1670 1671 foreach(child; e.children) { 1672 if(goThroughElement(child)) 1673 return true; 1674 } 1675 1676 return false; 1677 } 1678 1679 goThroughElement(root); 1680 1681 return result; 1682 } 1683 1684 ///. 1685 void clear() { 1686 root = null; 1687 loose = false; 1688 } 1689 1690 private string _prolog = "<!DOCTYPE html>\n"; 1691 private bool prologWasSet = false; // set to true if the user changed it 1692 1693 /++ 1694 Returns or sets the string before the root element. This is, for example, 1695 `<!DOCTYPE html>\n` or similar. 1696 +/ 1697 @property string prolog() const { 1698 // if the user explicitly changed it, do what they want 1699 // or if we didn't keep/find stuff from the document itself, 1700 // we'll use the builtin one as a default. 1701 if(prologWasSet || piecesBeforeRoot.length == 0) 1702 return _prolog; 1703 1704 string p; 1705 foreach(e; piecesBeforeRoot) 1706 p ~= e.toString() ~ "\n"; 1707 return p; 1708 } 1709 1710 /// ditto 1711 void setProlog(string d) { 1712 _prolog = d; 1713 prologWasSet = true; 1714 } 1715 1716 /++ 1717 Returns the document as string form. Please note that if there is anything in [piecesAfterRoot], 1718 they are discarded. If you want to add them to the file, loop over that and append it yourself 1719 (but remember xml isn't supposed to have anything after the root element). 1720 +/ 1721 override string toString() const { 1722 return prolog ~ root.toString(); 1723 } 1724 1725 /++ 1726 Writes it out with whitespace for easier eyeball debugging 1727 1728 Do NOT use for anything other than eyeball debugging, 1729 because whitespace may be significant content in XML. 1730 +/ 1731 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1732 string s = prolog.strip; 1733 1734 /* 1735 if(insertComments) s ~= "<!--"; 1736 s ~= "\n"; 1737 if(insertComments) s ~= "-->"; 1738 */ 1739 1740 s ~= root.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1741 foreach(a; piecesAfterRoot) 1742 s ~= a.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1743 return s; 1744 } 1745 1746 /// The root element, like `<html>`. Most the methods on Document forward to this object. 1747 Element root; 1748 1749 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1750 Element[] piecesBeforeRoot; 1751 1752 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1753 Element[] piecesAfterRoot; 1754 1755 ///. 1756 bool loose; 1757 1758 1759 1760 // what follows are for mutation events that you can observe 1761 void delegate(DomMutationEvent)[] eventObservers; 1762 1763 void dispatchMutationEvent(DomMutationEvent e) { 1764 foreach(o; eventObservers) 1765 o(e); 1766 } 1767 } 1768 1769 /++ 1770 Basic parsing of HTML tag soup 1771 1772 If you simply make a `new Document("some string")` or use [Document.fromUrl] to automatically 1773 download a page (that's function is shorthand for `new Document(arsd.http2.get(your_given_url).contentText)`), 1774 the Document parser will assume it is broken HTML. It will try to fix up things like charset messes, missing 1775 closing tags, flipped tags, inconsistent letter cases, and other forms of commonly found HTML on the web. 1776 1777 It isn't exactly the same as what a HTML5 web browser does in all cases, but it usually it, and where it 1778 disagrees, it is still usually good enough (but sometimes a bug). 1779 +/ 1780 unittest { 1781 auto document = new Document(`<html><body><p>hello <P>there`); 1782 // this will automatically try to normalize the html and fix up broken tags, etc 1783 // so notice how it added the missing closing tags here and made them all lower case 1784 assert(document.toString() == "<!DOCTYPE html>\n<html><body><p>hello </p><p>there</p></body></html>", document.toString()); 1785 } 1786 1787 /++ 1788 Stricter parsing of HTML 1789 1790 When you are writing the HTML yourself, you can remove most ambiguity by making it throw exceptions instead 1791 of trying to automatically fix up things basic parsing tries to do. Using strict mode accomplishes this. 1792 1793 This will help guarantee that you have well-formed HTML, which means it is going to parse a lot more reliably 1794 by all users - browsers, dom.d, other libraries, all behave better with well-formed input... people too! 1795 1796 (note it is not a full *validator*, just a well-formedness checker. Full validation is a lot more work for very 1797 little benefit in my experience, so I stopped here.) 1798 +/ 1799 unittest { 1800 try { 1801 auto document = new Document(`<html><body><p>hello <P>there`, true, true); // turns on strict and case sensitive mode to ctor 1802 assert(0); // never reached, the constructor will throw because strict mode is turned on 1803 } catch(Exception e) { 1804 1805 } 1806 1807 // you can also create the object first, then use the [parseStrict] method 1808 auto document = new Document; 1809 document.parseStrict(`<foo></foo>`); // this is invalid html - no such foo tag - but it is well-formed, since it is opened and closed properly, so it passes 1810 1811 } 1812 1813 /++ 1814 Custom HTML extensions 1815 1816 dom.d is a custom HTML parser, which means you can add custom HTML extensions to it too. It normally reads 1817 and discards things like ASP style `<% ... %>` code as well as XML processing instruction / PHP style embeds `<? ... ?>` 1818 but you can keep this data if you call a function to opt into it in before parsing. 1819 1820 Additionally, you can add special tags to be read like `<script>` to preserve its insides for future processing 1821 via the `.innerRawSource` member. 1822 +/ 1823 unittest { 1824 auto document = new Document; // construct an empty thing first 1825 document.enableAddingSpecialTagsToDom(); // add the special tags like <% ... %> etc 1826 document.rawSourceElements ~= "embedded-plaintext"; // tell it we want a custom 1827 1828 document.parseStrict(`<html> 1829 <% some asp code %> 1830 <script>embedded && javascript</script> 1831 <embedded-plaintext>my <custom> plaintext & stuff</embedded-plaintext> 1832 </html>`); 1833 1834 // please note that if we did `document.toString()` right now, the original source - almost your same 1835 // string you passed to parseStrict - would be spit back out. Meaning the embedded-plaintext still has its 1836 // special text inside it. Another parser won't understand how to use this! So if you want to pass this 1837 // document somewhere else, you need to do some transformations. 1838 // 1839 // This differs from cases like CDATA sections, which dom.d will automatically convert into plain html entities 1840 // on the output that can be read by anyone. 1841 1842 assert(document.root.tagName == "html"); // the root element is normal 1843 1844 int foundCount; 1845 // now let's loop through the whole tree 1846 foreach(element; document.root.tree) { 1847 // the asp thing will be in 1848 if(auto asp = cast(AspCode) element) { 1849 // you use the `asp.source` member to get the code for these 1850 assert(asp.source == "% some asp code %"); 1851 foundCount++; 1852 } else if(element.tagName == "script") { 1853 // and for raw source elements - script, style, or the ones you add, 1854 // you use the innerHTML method to get the code inside 1855 assert(element.innerHTML == "embedded && javascript"); 1856 foundCount++; 1857 } else if(element.tagName == "embedded-plaintext") { 1858 // and innerHTML again 1859 assert(element.innerHTML == "my <custom> plaintext & stuff"); 1860 foundCount++; 1861 } 1862 1863 } 1864 1865 assert(foundCount == 3); 1866 1867 // writeln(document.toString()); 1868 } 1869 1870 // FIXME: <textarea> contents are treated kinda special in html5 as well... 1871 1872 /++ 1873 Demoing CDATA, entities, and non-ascii characters. 1874 1875 The previous example mentioned CDATA, let's show you what that does too. These are all read in as plain strings accessible in the DOM - there is no CDATA, no entities once you get inside the object model - but when you convert back into a string, it will normalize them in a particular way. 1876 1877 This is not exactly standards compliant completely in and out thanks to it doing some transformations... but I find it more useful - it reads the data in consistently and writes it out consistently, both in ways that work well for interop. Take a look: 1878 +/ 1879 unittest { 1880 auto document = new Document(`<html> 1881 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1882 <p>¤ is the same thing.</p> 1883 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1884 <p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p> 1885 </html>`, true, true); // strict mode turned on 1886 1887 // Inside the object model, things are simplified to D strings. 1888 auto paragraphs = document.querySelectorAll("p"); 1889 // no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string 1890 assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output."); 1891 // but note on the second paragraph, the entity has been converted to the appropriate *character* in the object 1892 assert(paragraphs[1].textContent == "¤ is the same thing."); 1893 assert(paragraphs[2].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output."); 1894 // and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string: 1895 assert(paragraphs[3].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too."); 1896 // and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing. 1897 assert(paragraphs[3].childNodes.length == 1 && paragraphs[3].childNodes[0].nodeType == NodeType.Text); 1898 1899 // And now, in the output string, we can see they are normalized thusly: 1900 assert(document.toString() == "<!DOCTYPE html>\n<html> 1901 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1902 <p>¤ is the same thing.</p> 1903 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1904 <p>xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.</p> 1905 </html>"); 1906 } 1907 1908 /++ 1909 Streaming parsing 1910 1911 dom.d normally takes a big string and returns a big DOM object tree - hence its name. This is usually the simplest 1912 code to read and write, so I prefer to stick to that, but if you wanna jump through a few hoops, you can still make 1913 dom.d work with streams. 1914 1915 It is awkward - again, dom.d's whole design is based on building the dom tree, but you can do it if you're willing to 1916 subclass a little and trust the garbage collector. Here's how. 1917 +/ 1918 unittest { 1919 bool encountered; 1920 class StreamDocument : Document { 1921 // the normal behavior for this function is to `parent.appendChild(child)` 1922 // but we can override to read it as it is processed and not append it 1923 override void processNodeWhileParsing(Element parent, Element child) { 1924 if(child.tagName == "bar") 1925 encountered = true; 1926 // note that each element's object is created but then discarded as garbage. 1927 // the GC will take care of it, even with a large document, whereas the normal 1928 // object tree could become quite large. 1929 } 1930 1931 this() { 1932 super("<foo><bar></bar></foo>"); 1933 } 1934 } 1935 1936 auto test = new StreamDocument(); 1937 assert(encountered); // it should have been seen 1938 assert(test.querySelector("bar") is null); // but not appended to the dom node, since we didn't append it 1939 } 1940 1941 /++ 1942 Basic parsing of XML. 1943 1944 dom.d is not technically a standards-compliant xml parser and doesn't implement all xml features, 1945 but its stricter parse options together with turning off HTML's special tag handling (e.g. treating 1946 `<script>` and `<style>` the same as any other tag) gets close enough to work fine for a great many 1947 use cases. 1948 1949 For more information, see [XmlDocument]. 1950 +/ 1951 unittest { 1952 auto xml = new XmlDocument(`<my-stuff>hello</my-stuff>`); 1953 } 1954 1955 bool canNestElementsInHtml(string parentTagName, string childTagName) { 1956 switch(parentTagName) { 1957 case "p", "h1", "h2", "h3", "h4", "h5", "h6": 1958 // only should include "phrasing content" 1959 switch(childTagName) { 1960 case "p", "dl", "dt", "dd", "h1", "h2", "h3", "h4", "h5", "h6": 1961 return false; 1962 default: return true; 1963 } 1964 case "dt", "dd": 1965 switch(childTagName) { 1966 case "dd", "dt": 1967 return false; 1968 default: return true; 1969 } 1970 default: 1971 return true; 1972 } 1973 } 1974 1975 interface DomParent { 1976 inout(Document) asDocument() inout; 1977 inout(Element) asElement() inout; 1978 } 1979 1980 /++ 1981 This represents almost everything in the DOM and offers a lot of inspection and manipulation functions. Element, or its subclasses, are what makes the dom tree. 1982 +/ 1983 /// Group: core_functionality 1984 class Element : DomParent { 1985 inout(Document) asDocument() inout { return null; } 1986 inout(Element) asElement() inout { return this; } 1987 1988 /// Returns a collection of elements by selector. 1989 /// See: [Document.opIndex] 1990 ElementCollection opIndex(string selector) { 1991 auto e = ElementCollection(this); 1992 return e[selector]; 1993 } 1994 1995 /++ 1996 Returns the child node with the particular index. 1997 1998 Be aware that child nodes include text nodes, including 1999 whitespace-only nodes. 2000 +/ 2001 Element opIndex(size_t index) { 2002 if(index >= children.length) 2003 return null; 2004 return this.children[index]; 2005 } 2006 2007 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 2008 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 2009 if( 2010 is(SomeElementType : Element) 2011 ) 2012 out(ret) { 2013 assert(ret !is null); 2014 } 2015 do { 2016 auto e = cast(SomeElementType) getElementById(id); 2017 if(e is null) 2018 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 2019 return e; 2020 } 2021 2022 /// ditto but with selectors instead of ids 2023 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2024 if( 2025 is(SomeElementType : Element) 2026 ) 2027 out(ret) { 2028 assert(ret !is null); 2029 } 2030 do { 2031 auto e = cast(SomeElementType) querySelector(selector); 2032 if(e is null) 2033 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 2034 return e; 2035 } 2036 2037 2038 /++ 2039 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 2040 +/ 2041 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2042 if(is(SomeElementType : Element)) 2043 { 2044 auto e = cast(SomeElementType) querySelector(selector); 2045 return MaybeNullElement!SomeElementType(e); 2046 } 2047 2048 2049 2050 /// get all the classes on this element 2051 @property string[] classes() const { 2052 // FIXME: remove blank names 2053 auto cs = split(className, " "); 2054 foreach(ref c; cs) 2055 c = c.strip(); 2056 return cs; 2057 } 2058 2059 /++ 2060 The object [classList] returns. 2061 +/ 2062 static struct ClassListHelper { 2063 Element this_; 2064 this(inout(Element) this_) inout { 2065 this.this_ = this_; 2066 } 2067 2068 /// 2069 bool contains(string cn) const { 2070 return this_.hasClass(cn); 2071 } 2072 2073 /// 2074 void add(string cn) { 2075 this_.addClass(cn); 2076 } 2077 2078 /// 2079 void remove(string cn) { 2080 this_.removeClass(cn); 2081 } 2082 2083 /// 2084 void toggle(string cn) { 2085 if(contains(cn)) 2086 remove(cn); 2087 else 2088 add(cn); 2089 } 2090 2091 // this thing supposed to be iterable in javascript but idk how i want to do it in D. meh 2092 /+ 2093 string[] opIndex() const { 2094 return this_.classes; 2095 } 2096 +/ 2097 } 2098 2099 /++ 2100 Returns a helper object to work with classes, just like javascript. 2101 2102 History: 2103 Added August 25, 2022 2104 +/ 2105 @property inout(ClassListHelper) classList() inout { 2106 return inout(ClassListHelper)(this); 2107 } 2108 // FIXME: classList is supposed to whitespace and duplicates when you use it. need to test. 2109 2110 unittest { 2111 Element element = Element.make("div"); 2112 element.classList.add("foo"); 2113 assert(element.classList.contains("foo")); 2114 element.classList.remove("foo"); 2115 assert(!element.classList.contains("foo")); 2116 element.classList.toggle("bar"); 2117 assert(element.classList.contains("bar")); 2118 } 2119 2120 /// ditto 2121 alias classNames = classes; 2122 2123 2124 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 2125 @scriptable 2126 Element addClass(string c) { 2127 if(hasClass(c)) 2128 return this; // don't add it twice 2129 2130 string cn = getAttribute("class"); 2131 if(cn.length == 0) { 2132 setAttribute("class", c); 2133 return this; 2134 } else { 2135 setAttribute("class", cn ~ " " ~ c); 2136 } 2137 2138 return this; 2139 } 2140 2141 /// Removes a particular class name. 2142 @scriptable 2143 Element removeClass(string c) { 2144 if(!hasClass(c)) 2145 return this; 2146 string n; 2147 foreach(name; classes) { 2148 if(c == name) 2149 continue; // cut it out 2150 if(n.length) 2151 n ~= " "; 2152 n ~= name; 2153 } 2154 2155 className = n.strip(); 2156 2157 return this; 2158 } 2159 2160 /// Returns whether the given class appears in this element. 2161 bool hasClass(string c) const { 2162 string cn = className; 2163 2164 auto idx = cn.indexOf(c); 2165 if(idx == -1) 2166 return false; 2167 2168 foreach(cla; cn.split(" ")) 2169 if(cla.strip == c) 2170 return true; 2171 return false; 2172 2173 /* 2174 int rightSide = idx + c.length; 2175 2176 bool checkRight() { 2177 if(rightSide == cn.length) 2178 return true; // it's the only class 2179 else if(iswhite(cn[rightSide])) 2180 return true; 2181 return false; // this is a substring of something else.. 2182 } 2183 2184 if(idx == 0) { 2185 return checkRight(); 2186 } else { 2187 if(!iswhite(cn[idx - 1])) 2188 return false; // substring 2189 return checkRight(); 2190 } 2191 2192 assert(0); 2193 */ 2194 } 2195 2196 2197 /* ******************************* 2198 DOM Mutation 2199 *********************************/ 2200 /++ 2201 Family of convenience functions to quickly add a tag with some text or 2202 other relevant info (for example, it's a src for an <img> element 2203 instead of inner text). They forward to [Element.make] then calls [appendChild]. 2204 2205 --- 2206 div.addChild("span", "hello there"); 2207 div.addChild("div", Html("<p>children of the div</p>")); 2208 --- 2209 +/ 2210 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 2211 in { 2212 assert(tagName !is null); 2213 } 2214 out(e) { 2215 //assert(e.parentNode is this); 2216 //assert(e.parentDocument is this.parentDocument); 2217 } 2218 do { 2219 auto e = Element.make(tagName, childInfo, childInfo2); 2220 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 2221 // return the parent. That will break existing code though. 2222 return appendChild(e); 2223 } 2224 2225 /// ditto 2226 Element addChild(Element e) { 2227 return this.appendChild(e); 2228 } 2229 2230 /// ditto 2231 Element addChild(string tagName, Element firstChild, string info2 = null) 2232 in { 2233 assert(firstChild !is null); 2234 } 2235 out(ret) { 2236 assert(ret !is null); 2237 assert(ret.parentNode is this); 2238 assert(firstChild.parentNode is ret); 2239 2240 assert(ret.parentDocument is this.parentDocument); 2241 //assert(firstChild.parentDocument is this.parentDocument); 2242 } 2243 do { 2244 auto e = Element.make(tagName, "", info2); 2245 e.appendChild(firstChild); 2246 this.appendChild(e); 2247 return e; 2248 } 2249 2250 /// ditto 2251 Element addChild(string tagName, in Html innerHtml, string info2 = null) 2252 in { 2253 } 2254 out(ret) { 2255 assert(ret !is null); 2256 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 2257 assert(ret.parentDocument is this.parentDocument); 2258 } 2259 do { 2260 auto e = Element.make(tagName, "", info2); 2261 this.appendChild(e); 2262 e.innerHTML = innerHtml.source; 2263 return e; 2264 } 2265 2266 2267 /// Another convenience function. Adds a child directly after the current one, returning 2268 /// the new child. 2269 /// 2270 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 2271 /// See_Also: [addChild] 2272 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 2273 in { 2274 assert(tagName !is null); 2275 assert(parentNode !is null); 2276 } 2277 out(e) { 2278 assert(e.parentNode is this.parentNode); 2279 assert(e.parentDocument is this.parentDocument); 2280 } 2281 do { 2282 auto e = Element.make(tagName, childInfo, childInfo2); 2283 return parentNode.insertAfter(this, e); 2284 } 2285 2286 /// ditto 2287 Element addSibling(Element e) { 2288 return parentNode.insertAfter(this, e); 2289 } 2290 2291 /// Convenience function to append text intermixed with other children. 2292 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 2293 /// or div.addChildren("Hello, ", user.name, "!"); 2294 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 2295 void addChildren(T...)(T t) { 2296 foreach(item; t) { 2297 static if(is(item : Element)) 2298 appendChild(item); 2299 else static if (is(isSomeString!(item))) 2300 appendText(to!string(item)); 2301 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 2302 } 2303 } 2304 2305 /// Appends the list of children to this element. 2306 void appendChildren(Element[] children) { 2307 foreach(ele; children) 2308 appendChild(ele); 2309 } 2310 2311 /// Removes this element form its current parent and appends it to the given `newParent`. 2312 void reparent(Element newParent) 2313 in { 2314 assert(newParent !is null); 2315 assert(parentNode !is null); 2316 } 2317 out { 2318 assert(this.parentNode is newParent); 2319 //assert(isInArray(this, newParent.children)); 2320 } 2321 do { 2322 parentNode.removeChild(this); 2323 newParent.appendChild(this); 2324 } 2325 2326 /** 2327 Strips this tag out of the document, putting its inner html 2328 as children of the parent. 2329 2330 For example, given: `<p>hello <b>there</b></p>`, if you 2331 call `stripOut` on the `b` element, you'll be left with 2332 `<p>hello there<p>`. 2333 2334 The idea here is to make it easy to get rid of garbage 2335 markup you aren't interested in. 2336 */ 2337 void stripOut() 2338 in { 2339 assert(parentNode !is null); 2340 } 2341 out { 2342 assert(parentNode is null); 2343 assert(children.length == 0); 2344 } 2345 do { 2346 foreach(c; children) 2347 c.parentNode = null; // remove the parent 2348 if(children.length) 2349 parentNode.replaceChild(this, this.children); 2350 else 2351 parentNode.removeChild(this); 2352 this.children.length = 0; // we reparented them all above 2353 } 2354 2355 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 2356 /// if the element already isn't in a tree, it does nothing. 2357 Element removeFromTree() 2358 in { 2359 2360 } 2361 out(var) { 2362 assert(this.parentNode is null); 2363 assert(var is this); 2364 } 2365 do { 2366 if(this.parentNode is null) 2367 return this; 2368 2369 this.parentNode.removeChild(this); 2370 2371 return this; 2372 } 2373 2374 /++ 2375 Wraps this element inside the given element. 2376 It's like `this.replaceWith(what); what.appendchild(this);` 2377 2378 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 2379 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 2380 +/ 2381 Element wrapIn(Element what) 2382 in { 2383 assert(what !is null); 2384 } 2385 out(ret) { 2386 assert(this.parentNode is what); 2387 assert(ret is what); 2388 } 2389 do { 2390 this.replaceWith(what); 2391 what.appendChild(this); 2392 2393 return what; 2394 } 2395 2396 /// Replaces this element with something else in the tree. 2397 Element replaceWith(Element e) 2398 in { 2399 assert(this.parentNode !is null); 2400 } 2401 do { 2402 e.removeFromTree(); 2403 this.parentNode.replaceChild(this, e); 2404 return e; 2405 } 2406 2407 /** 2408 Fetches the first consecutive text nodes concatenated together. 2409 2410 2411 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 2412 2413 See_also: [directText], [innerText] 2414 */ 2415 string firstInnerText() const { 2416 string s; 2417 foreach(child; children) { 2418 if(child.nodeType != NodeType.Text) 2419 break; 2420 2421 s ~= child.nodeValue(); 2422 } 2423 return s; 2424 } 2425 2426 2427 /** 2428 Returns the text directly under this element. 2429 2430 2431 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 2432 past child tags. So, `<example>some <b>bold</b> text</example>` 2433 will return `some text` because it only gets the text, skipping non-text children. 2434 2435 See_also: [firstInnerText], [innerText] 2436 */ 2437 @property string directText() { 2438 string ret; 2439 foreach(e; children) { 2440 if(e.nodeType == NodeType.Text) 2441 ret ~= e.nodeValue(); 2442 } 2443 2444 return ret; 2445 } 2446 2447 /** 2448 Sets the direct text, without modifying other child nodes. 2449 2450 2451 Unlike [innerText], this does *not* remove existing elements in the element. 2452 2453 It only replaces the first text node it sees. 2454 2455 If there are no text nodes, it calls [appendText]. 2456 2457 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 2458 */ 2459 @property void directText(string text) { 2460 foreach(e; children) { 2461 if(e.nodeType == NodeType.Text) { 2462 auto it = cast(TextNode) e; 2463 it.contents = text; 2464 return; 2465 } 2466 } 2467 2468 appendText(text); 2469 } 2470 2471 // do nothing, this is primarily a virtual hook 2472 // for links and forms 2473 void setValue(string field, string value) { } 2474 void setValue(string field, string[] value) { } 2475 2476 2477 // this is a thing so i can remove observer support if it gets slow 2478 // I have not implemented all these yet 2479 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 2480 if(parentDocument is null) return; 2481 DomMutationEvent me; 2482 me.operation = operation; 2483 me.target = this; 2484 me.relatedString = s1; 2485 me.relatedString2 = s2; 2486 me.related = r; 2487 me.related2 = r2; 2488 parentDocument.dispatchMutationEvent(me); 2489 } 2490 2491 // putting all the members up front 2492 2493 // this ought to be private. don't use it directly. 2494 Element[] children; 2495 2496 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 2497 string tagName; 2498 2499 /++ 2500 This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 2501 2502 History: 2503 `AttributesHolder` replaced `string[string]` on August 22, 2024 2504 +/ 2505 AttributesHolder attributes; 2506 2507 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 2508 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 2509 private bool selfClosed; 2510 2511 private DomParent parent_; 2512 2513 /// Get the parent Document object that contains this element. 2514 /// It may be null, so remember to check for that. 2515 @property inout(Document) parentDocument() inout { 2516 if(this.parent_ is null) 2517 return null; 2518 auto p = cast() this.parent_.asElement; 2519 auto prev = cast() this; 2520 while(p) { 2521 prev = p; 2522 if(p.parent_ is null) 2523 return null; 2524 p = cast() p.parent_.asElement; 2525 } 2526 return cast(inout) prev.parent_.asDocument; 2527 } 2528 2529 /*deprecated*/ @property void parentDocument(Document doc) { 2530 parent_ = doc; 2531 } 2532 2533 /// Returns the parent node in the tree this element is attached to. 2534 inout(Element) parentNode() inout { 2535 if(parent_ is null) 2536 return null; 2537 2538 auto p = parent_.asElement; 2539 2540 if(cast(DocumentFragment) p) { 2541 if(p.parent_ is null) 2542 return null; 2543 else 2544 return p.parent_.asElement; 2545 } 2546 2547 return p; 2548 } 2549 2550 //protected 2551 Element parentNode(Element e) { 2552 parent_ = e; 2553 return e; 2554 } 2555 2556 // these are here for event handlers. Don't forget that this library never fires events. 2557 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 2558 2559 version(dom_with_events) { 2560 EventHandler[][string] bubblingEventHandlers; 2561 EventHandler[][string] capturingEventHandlers; 2562 EventHandler[string] defaultEventHandlers; 2563 2564 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 2565 if(event.length > 2 && event[0..2] == "on") 2566 event = event[2 .. $]; 2567 2568 if(useCapture) 2569 capturingEventHandlers[event] ~= handler; 2570 else 2571 bubblingEventHandlers[event] ~= handler; 2572 } 2573 } 2574 2575 2576 // and now methods 2577 2578 /++ 2579 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2580 2581 History: 2582 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2583 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2584 the parameter. 2585 See_Also: 2586 [addChild], [addSibling] 2587 +/ 2588 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2589 bool selfClosed = tagName.isInArray(selfClosedElements); 2590 2591 Element e; 2592 // want to create the right kind of object for the given tag... 2593 switch(tagName) { 2594 case "#text": 2595 e = new TextNode(null, childInfo); 2596 return e; 2597 // break; 2598 case "table": 2599 e = new Table(null); 2600 break; 2601 case "a": 2602 e = new Link(null); 2603 break; 2604 case "form": 2605 e = new Form(null); 2606 break; 2607 case "tr": 2608 e = new TableRow(null); 2609 break; 2610 case "td", "th": 2611 e = new TableCell(null, tagName); 2612 break; 2613 default: 2614 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2615 } 2616 2617 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2618 e.tagName = tagName; 2619 e.selfClosed = selfClosed; 2620 2621 if(childInfo !is null) 2622 switch(tagName) { 2623 /* html5 convenience tags */ 2624 case "audio": 2625 if(childInfo.length) 2626 e.addChild("source", childInfo); 2627 if(childInfo2 !is null) 2628 e.appendText(childInfo2); 2629 break; 2630 case "source": 2631 e.src = childInfo; 2632 if(childInfo2 !is null) 2633 e.type = childInfo2; 2634 break; 2635 /* regular html 4 stuff */ 2636 case "img": 2637 e.src = childInfo; 2638 if(childInfo2 !is null) 2639 e.alt = childInfo2; 2640 break; 2641 case "link": 2642 e.href = childInfo; 2643 if(childInfo2 !is null) 2644 e.rel = childInfo2; 2645 break; 2646 case "option": 2647 e.innerText = childInfo; 2648 if(childInfo2 !is null) 2649 e.value = childInfo2; 2650 break; 2651 case "input": 2652 e.type = "hidden"; 2653 e.name = childInfo; 2654 if(childInfo2 !is null) 2655 e.value = childInfo2; 2656 break; 2657 case "button": 2658 e.innerText = childInfo; 2659 if(childInfo2 !is null) 2660 e.type = childInfo2; 2661 break; 2662 case "a": 2663 e.innerText = childInfo; 2664 if(childInfo2 !is null) 2665 e.href = childInfo2; 2666 break; 2667 case "script": 2668 case "style": 2669 e.innerRawSource = childInfo; 2670 break; 2671 case "meta": 2672 e.name = childInfo; 2673 if(childInfo2 !is null) 2674 e.content = childInfo2; 2675 break; 2676 /* generically, assume we were passed text and perhaps class */ 2677 default: 2678 e.innerText = childInfo; 2679 if(childInfo2.length) 2680 e.className = childInfo2; 2681 } 2682 2683 return e; 2684 } 2685 2686 /// ditto 2687 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2688 // FIXME: childInfo2 is ignored when info1 is null 2689 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2690 m.innerHTML = innerHtml.source; 2691 return m; 2692 } 2693 2694 /// ditto 2695 static Element make(string tagName, Element child, string childInfo2 = null) { 2696 auto m = Element.make(tagName, cast(string) null, childInfo2); 2697 m.appendChild(child); 2698 return m; 2699 } 2700 2701 /++ 2702 Makes an element from an interpolated sequence. 2703 2704 FIXME: add a type interpolator thing that can be replaced 2705 FIXME: syntax check at compile time? 2706 FIXME: allow a DocumentFragment in some cases 2707 +/ 2708 static Element make(Args...)(arsd.core.InterpolationHeader head, Args args, arsd.core.InterpolationFooter foot) { 2709 string html; 2710 2711 import arsd.core; 2712 foreach(arg; args) { 2713 static if(is(typeof(arg) == InterpolationHeader)) 2714 {} 2715 else 2716 static if(is(typeof(arg) == InterpolationFooter)) 2717 {} 2718 else 2719 static if(is(typeof(arg) == InterpolatedLiteral!h, string h)) 2720 html ~= h; 2721 else 2722 static if(is(typeof(arg) == InterpolatedExpression!code, string code)) 2723 {} 2724 else 2725 static if(is(typeof(arg) : iraw)) 2726 html ~= arg.s; 2727 else 2728 // FIXME: what if we are inside a <script> ? or an attribute etc 2729 static if(is(typeof(arg) : Html)) 2730 html ~= arg.source; 2731 else 2732 static if(is(typeof(arg) : Element)) 2733 html ~= arg.toString(); 2734 else 2735 html ~= htmlEntitiesEncode(toStringInternal(arg)); 2736 } 2737 2738 auto root = Element.make("root"); 2739 root.innerHTML(html, true /* strict mode */); 2740 return root.querySelector(" > *"); 2741 } 2742 2743 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2744 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2745 tagName = _tagName; 2746 foreach(k, v; _attributes) 2747 attributes[k] = v; 2748 selfClosed = _selfClosed; 2749 2750 version(dom_node_indexes) 2751 this.dataset.nodeIndex = to!string(&(this.attributes)); 2752 2753 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2754 } 2755 2756 /++ 2757 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2758 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2759 2760 History: 2761 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2762 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2763 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2764 something here if you like. 2765 +/ 2766 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2767 tagName = _tagName; 2768 foreach(k, v; _attributes) 2769 attributes[k] = v; 2770 selfClosed = tagName.isInArray(selfClosedElements); 2771 2772 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2773 //children.length = 8; 2774 //children.length = 0; 2775 2776 version(dom_node_indexes) 2777 this.dataset.nodeIndex = to!string(&(this.attributes)); 2778 } 2779 2780 private this(Document _parentDocument) { 2781 version(dom_node_indexes) 2782 this.dataset.nodeIndex = to!string(&(this.attributes)); 2783 } 2784 2785 2786 /* ******************************* 2787 Navigating the DOM 2788 *********************************/ 2789 2790 /// Returns the first child of this element. If it has no children, returns null. 2791 /// Remember, text nodes are children too. 2792 @property Element firstChild() { 2793 return children.length ? children[0] : null; 2794 } 2795 2796 /// Returns the last child of the element, or null if it has no children. Remember, text nodes are children too. 2797 @property Element lastChild() { 2798 return children.length ? children[$ - 1] : null; 2799 } 2800 2801 // FIXME UNTESTED 2802 /// the next or previous element you would encounter if you were reading it in the source. May be a text node or other special non-tag object if you enabled them. 2803 Element nextInSource() { 2804 auto n = firstChild; 2805 if(n is null) 2806 n = nextSibling(); 2807 if(n is null) { 2808 auto p = this.parentNode; 2809 while(p !is null && n is null) { 2810 n = p.nextSibling; 2811 } 2812 } 2813 2814 return n; 2815 } 2816 2817 /// ditto 2818 Element previousInSource() { 2819 auto p = previousSibling; 2820 if(p is null) { 2821 auto par = parentNode; 2822 if(par) 2823 p = par.lastChild; 2824 if(p is null) 2825 p = par; 2826 } 2827 return p; 2828 } 2829 2830 /++ 2831 Returns the next or previous sibling that is not a text node. Please note: the behavior with comments is subject to change. Currently, it will return a comment or other nodes if it is in the tree (if you enabled it with [Document.enableAddingSpecialTagsToDom] or [Document.parseSawComment]) and not if you didn't, but the implementation will probably change at some point to skip them regardless. 2832 2833 Equivalent to [previousSibling]/[nextSibling]("*"). 2834 2835 Please note it may return `null`. 2836 +/ 2837 @property Element previousElementSibling() { 2838 return previousSibling("*"); 2839 } 2840 2841 /// ditto 2842 @property Element nextElementSibling() { 2843 return nextSibling("*"); 2844 } 2845 2846 /++ 2847 Returns the next or previous sibling matching the `tagName` filter. The default filter of `null` will return the first sibling it sees, even if it is a comment or text node, or anything else. A filter of `"*"` will match any tag with a name. Otherwise, the string must match the [tagName] of the sibling you want to find. 2848 +/ 2849 @property Element previousSibling(string tagName = null) { 2850 if(this.parentNode is null) 2851 return null; 2852 Element ps = null; 2853 foreach(e; this.parentNode.childNodes) { 2854 if(e is this) 2855 break; 2856 if(tagName == "*" && e.nodeType != NodeType.Text) { 2857 ps = e; 2858 } else if(tagName is null || e.tagName == tagName) 2859 ps = e; 2860 } 2861 2862 return ps; 2863 } 2864 2865 /// ditto 2866 @property Element nextSibling(string tagName = null) { 2867 if(this.parentNode is null) 2868 return null; 2869 Element ns = null; 2870 bool mightBe = false; 2871 foreach(e; this.parentNode.childNodes) { 2872 if(e is this) { 2873 mightBe = true; 2874 continue; 2875 } 2876 if(mightBe) { 2877 if(tagName == "*" && e.nodeType != NodeType.Text) { 2878 ns = e; 2879 break; 2880 } 2881 if(tagName is null || e.tagName == tagName) { 2882 ns = e; 2883 break; 2884 } 2885 } 2886 } 2887 2888 return ns; 2889 } 2890 2891 2892 /++ 2893 Gets the nearest node, going up the chain, with the given tagName 2894 May return null or throw. The type `T` will specify a subclass like 2895 [Form], [Table], or [Link], which it will cast for you when found. 2896 +/ 2897 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2898 if(tagName is null) { 2899 static if(is(T == Form)) 2900 tagName = "form"; 2901 else static if(is(T == Table)) 2902 tagName = "table"; 2903 else static if(is(T == Link)) 2904 tagName == "a"; 2905 } 2906 2907 auto par = this.parentNode; 2908 while(par !is null) { 2909 if(tagName is null || par.tagName == tagName) 2910 break; 2911 par = par.parentNode; 2912 } 2913 2914 static if(!is(T == Element)) { 2915 auto t = cast(T) par; 2916 if(t is null) 2917 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2918 } else 2919 auto t = par; 2920 2921 return t; 2922 } 2923 2924 /++ 2925 Searches this element and the tree of elements under it for one matching the given `id` attribute. 2926 +/ 2927 Element getElementById(string id) { 2928 // FIXME: I use this function a lot, and it's kinda slow 2929 // not terribly slow, but not great. 2930 foreach(e; tree) 2931 if(e.id == id) 2932 return e; 2933 return null; 2934 } 2935 2936 /++ 2937 Returns a child element that matches the given `selector`. 2938 2939 Note: you can give multiple selectors, separated by commas. 2940 It will return the first match it finds. 2941 2942 Tip: to use namespaces, escape the colon in the name: 2943 2944 --- 2945 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2946 --- 2947 +/ 2948 @scriptable 2949 Element querySelector(string selector) { 2950 Selector s = Selector(selector); 2951 2952 foreach(ref comp; s.components) 2953 if(comp.parts.length && comp.parts[0].separation > 0) { 2954 // this is illegal in standard dom, but i use it a lot 2955 // gonna insert a :scope thing 2956 2957 SelectorPart part; 2958 part.separation = -1; 2959 part.scopeElement = true; 2960 comp.parts = part ~ comp.parts; 2961 } 2962 2963 foreach(ele; tree) 2964 if(s.matchesElement(ele, this)) 2965 return ele; 2966 return null; 2967 } 2968 2969 /// If the element matches the given selector. Previously known as `matchesSelector`. 2970 @scriptable 2971 bool matches(string selector) { 2972 /+ 2973 bool caseSensitiveTags = true; 2974 if(parentDocument && parentDocument.loose) 2975 caseSensitiveTags = false; 2976 +/ 2977 2978 Selector s = Selector(selector); 2979 return s.matchesElement(this); 2980 } 2981 2982 /// Returns itself or the closest parent that matches the given selector, or null if none found 2983 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2984 @scriptable 2985 Element closest(string selector) { 2986 Element e = this; 2987 while(e !is null) { 2988 if(e.matches(selector)) 2989 return e; 2990 e = e.parentNode; 2991 } 2992 return null; 2993 } 2994 2995 /** 2996 Returns elements that match the given CSS selector 2997 2998 * -- all, default if nothing else is there 2999 3000 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 3001 3002 It is all additive 3003 3004 OP 3005 3006 space = descendant 3007 > = direct descendant 3008 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 3009 3010 [foo] Foo is present as an attribute 3011 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 3012 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 3013 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 3014 3015 [item$=sdas] ends with 3016 [item^-sdsad] begins with 3017 3018 Quotes are optional here. 3019 3020 Pseudos: 3021 :first-child 3022 :last-child 3023 :link (same as a[href] for our purposes here) 3024 3025 3026 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 3027 3028 3029 3030 This ONLY cares about elements. text, etc, are ignored 3031 3032 3033 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 3034 3035 The name `getElementsBySelector` was the original name, written back before the name `querySelector` was standardized (this library is older than you might think!), but they do the same thing.. 3036 */ 3037 @scriptable 3038 Element[] querySelectorAll(string selector) { 3039 // FIXME: this function could probably use some performance attention 3040 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 3041 3042 3043 bool caseSensitiveTags = true; 3044 if(parentDocument && parentDocument.loose) 3045 caseSensitiveTags = false; 3046 3047 Element[] ret; 3048 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 3049 ret ~= sel.getElements(this, null); 3050 return ret; 3051 } 3052 3053 /// ditto 3054 alias getElementsBySelector = querySelectorAll; 3055 3056 /++ 3057 Returns child elements that have the given class name or tag name. 3058 3059 Please note the standard specifies this should return a live node list. This means, in Javascript for example, if you loop over the value returned by getElementsByTagName and getElementsByClassName and remove the elements, the length of the list will decrease. When I implemented this, I figured that was more trouble than it was worth and returned a plain array instead. By the time I had the infrastructure to make it simple, I didn't want to do the breaking change. 3060 3061 So these is incompatible with Javascript in the face of live dom mutation and will likely remain so. 3062 +/ 3063 Element[] getElementsByClassName(string cn) { 3064 // is this correct? 3065 return getElementsBySelector("." ~ cn); 3066 } 3067 3068 /// ditto 3069 Element[] getElementsByTagName(string tag) { 3070 if(parentDocument && parentDocument.loose) 3071 tag = tag.toLower(); 3072 Element[] ret; 3073 foreach(e; tree) 3074 if(e.tagName == tag || tag == "*") 3075 ret ~= e; 3076 return ret; 3077 } 3078 3079 3080 /* ******************************* 3081 Attributes 3082 *********************************/ 3083 3084 /** 3085 Gets the given attribute value, or null if the 3086 attribute is not set. 3087 3088 Note that the returned string is decoded, so it no longer contains any xml entities. 3089 */ 3090 @scriptable 3091 string getAttribute(string name) const { 3092 if(parentDocument && parentDocument.loose) 3093 name = name.toLower(); 3094 return attributes.get(name, null); 3095 } 3096 3097 /** 3098 Sets an attribute. Returns this for easy chaining 3099 */ 3100 @scriptable 3101 Element setAttribute(string name, string value) { 3102 if(parentDocument && parentDocument.loose) 3103 name = name.toLower(); 3104 3105 // I never use this shit legitimately and neither should you 3106 auto it = name.toLower(); 3107 if(it == "href" || it == "src") { 3108 auto v = value.strip().toLower(); 3109 if(v.startsWith("vbscript:")) 3110 value = value[9..$]; 3111 if(v.startsWith("javascript:")) 3112 value = value[11..$]; 3113 } 3114 3115 attributes[name] = value; 3116 3117 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 3118 3119 return this; 3120 } 3121 3122 /** 3123 Returns if the attribute exists. 3124 */ 3125 @scriptable 3126 bool hasAttribute(string name) { 3127 if(parentDocument && parentDocument.loose) 3128 name = name.toLower(); 3129 3130 if(name in attributes) 3131 return true; 3132 else 3133 return false; 3134 } 3135 3136 /** 3137 Removes the given attribute from the element. 3138 */ 3139 @scriptable 3140 Element removeAttribute(string name) 3141 out(ret) { 3142 assert(ret is this); 3143 } 3144 do { 3145 if(parentDocument && parentDocument.loose) 3146 name = name.toLower(); 3147 if(name in attributes) 3148 attributes.remove(name); 3149 3150 sendObserverEvent(DomMutationOperations.removeAttribute, name); 3151 return this; 3152 } 3153 3154 /** 3155 Gets or sets the class attribute's contents. Returns 3156 an empty string if it has no class. 3157 */ 3158 @property string className() const { 3159 auto c = getAttribute("class"); 3160 if(c is null) 3161 return ""; 3162 return c; 3163 } 3164 3165 /// ditto 3166 @property Element className(string c) { 3167 setAttribute("class", c); 3168 return this; 3169 } 3170 3171 /** 3172 Provides easy access to common HTML attributes, object style. 3173 3174 --- 3175 auto element = Element.make("a"); 3176 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 3177 string where = a.href; // same as a.getAttribute("href"); 3178 --- 3179 3180 */ 3181 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 3182 if(v !is null) 3183 setAttribute(name, v); 3184 return getAttribute(name); 3185 } 3186 3187 /** 3188 Old access to attributes. Use [attrs] instead. 3189 3190 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 3191 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 3192 3193 Instead, use element.attrs.attribute, element.attrs["attribute"], 3194 or element.getAttribute("attribute")/element.setAttribute("attribute"). 3195 */ 3196 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 3197 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 3198 } 3199 3200 /* 3201 // this would be nice for convenience, but it broke the getter above. 3202 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 3203 if(boolean) 3204 setAttribute(name, name); 3205 else 3206 removeAttribute(name); 3207 } 3208 */ 3209 3210 /** 3211 Returns the element's children. 3212 */ 3213 @property inout(Element[]) childNodes() inout { 3214 return children; 3215 } 3216 3217 /++ 3218 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 3219 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 3220 +/ 3221 @property DataSet dataset() { 3222 return DataSet(this); 3223 } 3224 3225 /++ 3226 Gives dot/opIndex access to attributes 3227 --- 3228 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 3229 --- 3230 +/ 3231 @property AttributeSet attrs() { 3232 return AttributeSet(this); 3233 } 3234 3235 /++ 3236 Provides both string and object style (like in Javascript) access to the style attribute. 3237 3238 --- 3239 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 3240 --- 3241 +/ 3242 @property ElementStyle style() { 3243 return ElementStyle(this); 3244 } 3245 3246 /++ 3247 This sets the style attribute with a string. 3248 +/ 3249 @property ElementStyle style(string s) { 3250 this.setAttribute("style", s); 3251 return this.style; 3252 } 3253 3254 private void parseAttributes(string[] whichOnes = null) { 3255 /+ 3256 if(whichOnes is null) 3257 whichOnes = attributes.keys; 3258 foreach(attr; whichOnes) { 3259 switch(attr) { 3260 case "id": 3261 3262 break; 3263 case "class": 3264 3265 break; 3266 case "style": 3267 3268 break; 3269 default: 3270 // we don't care about it 3271 } 3272 } 3273 +/ 3274 } 3275 3276 3277 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 3278 3279 // the next few methods are for implementing interactive kind of things 3280 private CssStyle _computedStyle; 3281 3282 /// Don't use this. It can try to parse out the style element but it isn't complete and if I get back to it, it won't be for a while. 3283 @property CssStyle computedStyle() { 3284 if(_computedStyle is null) { 3285 auto style = this.getAttribute("style"); 3286 /* we'll treat shitty old html attributes as css here */ 3287 if(this.hasAttribute("width")) 3288 style ~= "; width: " ~ this.attrs.width; 3289 if(this.hasAttribute("height")) 3290 style ~= "; height: " ~ this.attrs.height; 3291 if(this.hasAttribute("bgcolor")) 3292 style ~= "; background-color: " ~ this.attrs.bgcolor; 3293 if(this.tagName == "body" && this.hasAttribute("text")) 3294 style ~= "; color: " ~ this.attrs.text; 3295 if(this.hasAttribute("color")) 3296 style ~= "; color: " ~ this.attrs.color; 3297 /* done */ 3298 3299 3300 _computedStyle = computedStyleFactory(this); 3301 } 3302 return _computedStyle; 3303 } 3304 3305 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 3306 version(browser) { 3307 void* expansionHook; ///ditto 3308 int offsetWidth; ///ditto 3309 int offsetHeight; ///ditto 3310 int offsetLeft; ///ditto 3311 int offsetTop; ///ditto 3312 Element offsetParent; ///ditto 3313 bool hasLayout; ///ditto 3314 int zIndex; ///ditto 3315 3316 ///ditto 3317 int absoluteLeft() { 3318 int a = offsetLeft; 3319 auto p = offsetParent; 3320 while(p) { 3321 a += p.offsetLeft; 3322 p = p.offsetParent; 3323 } 3324 3325 return a; 3326 } 3327 3328 ///ditto 3329 int absoluteTop() { 3330 int a = offsetTop; 3331 auto p = offsetParent; 3332 while(p) { 3333 a += p.offsetTop; 3334 p = p.offsetParent; 3335 } 3336 3337 return a; 3338 } 3339 } 3340 3341 // Back to the regular dom functions 3342 3343 public: 3344 3345 3346 /* ******************************* 3347 DOM Mutation 3348 *********************************/ 3349 3350 /// Removes all inner content from the tag; all child text and elements are gone. 3351 void removeAllChildren() 3352 out { 3353 assert(this.children.length == 0); 3354 } 3355 do { 3356 foreach(child; children) 3357 child.parentNode = null; 3358 children = null; 3359 } 3360 3361 /++ 3362 Adds a sibling element before or after this one in the dom. 3363 3364 History: added June 13, 2020 3365 +/ 3366 Element appendSibling(Element e) { 3367 parentNode.insertAfter(this, e); 3368 return e; 3369 } 3370 3371 /// ditto 3372 Element prependSibling(Element e) { 3373 parentNode.insertBefore(this, e); 3374 return e; 3375 } 3376 3377 3378 /++ 3379 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 3380 3381 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 3382 3383 History: 3384 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 3385 +/ 3386 Element appendChild(Element e) 3387 in { 3388 assert(e !is null); 3389 assert(e !is this); 3390 } 3391 out (ret) { 3392 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 3393 assert(e.parentDocument is this.parentDocument); 3394 assert(e is ret); 3395 } 3396 do { 3397 if(e.parentNode !is null) 3398 e.parentNode.removeChild(e); 3399 3400 selfClosed = false; 3401 if(auto frag = cast(DocumentFragment) e) 3402 children ~= frag.children; 3403 else 3404 children ~= e; 3405 3406 e.parentNode = this; 3407 3408 /+ 3409 foreach(item; e.tree) 3410 item.parentDocument = this.parentDocument; 3411 +/ 3412 3413 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 3414 3415 return e; 3416 } 3417 3418 /// Inserts the second element to this node, right before the first param 3419 Element insertBefore(in Element where, Element what) 3420 in { 3421 assert(where !is null); 3422 assert(where.parentNode is this); 3423 assert(what !is null); 3424 assert(what.parentNode is null); 3425 } 3426 out (ret) { 3427 assert(where.parentNode is this); 3428 assert(what.parentNode is this); 3429 3430 assert(what.parentDocument is this.parentDocument); 3431 assert(ret is what); 3432 } 3433 do { 3434 foreach(i, e; children) { 3435 if(e is where) { 3436 if(auto frag = cast(DocumentFragment) what) { 3437 children = children[0..i] ~ frag.children ~ children[i..$]; 3438 foreach(child; frag.children) 3439 child.parentNode = this; 3440 } else { 3441 children = children[0..i] ~ what ~ children[i..$]; 3442 } 3443 what.parentNode = this; 3444 return what; 3445 } 3446 } 3447 3448 return what; 3449 3450 assert(0); 3451 } 3452 3453 /++ 3454 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 3455 +/ 3456 Element insertAfter(in Element where, Element what) 3457 in { 3458 assert(where !is null); 3459 assert(where.parentNode is this); 3460 assert(what !is null); 3461 assert(what.parentNode is null); 3462 } 3463 out (ret) { 3464 assert(where.parentNode is this); 3465 assert(what.parentNode is this); 3466 assert(what.parentDocument is this.parentDocument); 3467 assert(ret is what); 3468 } 3469 do { 3470 foreach(i, e; children) { 3471 if(e is where) { 3472 if(auto frag = cast(DocumentFragment) what) { 3473 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 3474 foreach(child; frag.children) 3475 child.parentNode = this; 3476 } else 3477 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 3478 what.parentNode = this; 3479 return what; 3480 } 3481 } 3482 3483 return what; 3484 3485 assert(0); 3486 } 3487 3488 /// swaps one child for a new thing. Returns the old child which is now parentless. 3489 Element swapNode(Element child, Element replacement) 3490 in { 3491 assert(child !is null); 3492 assert(replacement !is null); 3493 assert(child.parentNode is this); 3494 } 3495 out(ret) { 3496 assert(ret is child); 3497 assert(ret.parentNode is null); 3498 assert(replacement.parentNode is this); 3499 assert(replacement.parentDocument is this.parentDocument); 3500 } 3501 do { 3502 foreach(ref c; this.children) 3503 if(c is child) { 3504 c.parentNode = null; 3505 c = replacement; 3506 c.parentNode = this; 3507 return child; 3508 } 3509 assert(0); 3510 } 3511 3512 3513 /++ 3514 Appends the given to the node. 3515 3516 3517 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 3518 yields `<example>text <b>bold</b> hi</example>`. 3519 3520 See_Also: 3521 [firstInnerText], [directText], [innerText], [appendChild] 3522 +/ 3523 @scriptable 3524 Element appendText(string text) { 3525 Element e = new TextNode(parentDocument, text); 3526 appendChild(e); 3527 return this; 3528 } 3529 3530 /++ 3531 Returns child elements which are of a tag type (excludes text, comments, etc.). 3532 3533 3534 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 3535 3536 Params: 3537 tagName = filter results to only the child elements with the given tag name. 3538 +/ 3539 @property Element[] childElements(string tagName = null) { 3540 Element[] ret; 3541 foreach(c; children) 3542 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 3543 ret ~= c; 3544 return ret; 3545 } 3546 3547 /++ 3548 Appends the given html to the element, returning the elements appended 3549 3550 3551 This is similar to `element.innerHTML += "html string";` in Javascript. 3552 +/ 3553 @scriptable 3554 Element[] appendHtml(string html) { 3555 Document d = new Document("<root>" ~ html ~ "</root>"); 3556 return stealChildren(d.root); 3557 } 3558 3559 /++ 3560 Returns `this` for use inside `with` expressions. 3561 3562 History: 3563 Added December 20, 2024 3564 +/ 3565 inout(Element) self() inout pure @nogc nothrow @safe scope return { 3566 return this; 3567 } 3568 3569 /++ 3570 Inserts a child under this element after the element `where`. 3571 +/ 3572 void insertChildAfter(Element child, Element where) 3573 in { 3574 assert(child !is null); 3575 assert(where !is null); 3576 assert(where.parentNode is this); 3577 assert(!selfClosed); 3578 //assert(isInArray(where, children)); 3579 } 3580 out { 3581 assert(child.parentNode is this); 3582 assert(where.parentNode is this); 3583 //assert(isInArray(where, children)); 3584 //assert(isInArray(child, children)); 3585 } 3586 do { 3587 foreach(ref i, c; children) { 3588 if(c is where) { 3589 i++; 3590 if(auto frag = cast(DocumentFragment) child) { 3591 children = children[0..i] ~ child.children ~ children[i..$]; 3592 //foreach(child; frag.children) 3593 //child.parentNode = this; 3594 } else 3595 children = children[0..i] ~ child ~ children[i..$]; 3596 child.parentNode = this; 3597 break; 3598 } 3599 } 3600 } 3601 3602 /++ 3603 Reparents all the child elements of `e` to `this`, leaving `e` childless. 3604 3605 Params: 3606 e = the element whose children you want to steal 3607 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 3608 +/ 3609 Element[] stealChildren(Element e, Element position = null) 3610 in { 3611 assert(!selfClosed); 3612 assert(e !is null); 3613 //if(position !is null) 3614 //assert(isInArray(position, children)); 3615 } 3616 out (ret) { 3617 assert(e.children.length == 0); 3618 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 3619 version(none) 3620 debug foreach(child; ret) { 3621 assert(child.parentNode is this); 3622 assert(child.parentDocument is this.parentDocument); 3623 } 3624 } 3625 do { 3626 foreach(c; e.children) { 3627 c.parentNode = this; 3628 } 3629 if(position is null) 3630 children ~= e.children; 3631 else { 3632 foreach(i, child; children) { 3633 if(child is position) { 3634 children = children[0..i] ~ 3635 e.children ~ 3636 children[i..$]; 3637 break; 3638 } 3639 } 3640 } 3641 3642 auto ret = e.children[]; 3643 e.children.length = 0; 3644 3645 return ret; 3646 } 3647 3648 /// Puts the current element first in our children list. The given element must not have a parent already. 3649 Element prependChild(Element e) 3650 in { 3651 assert(e.parentNode is null); 3652 assert(!selfClosed); 3653 } 3654 out { 3655 assert(e.parentNode is this); 3656 assert(e.parentDocument is this.parentDocument); 3657 assert(children[0] is e); 3658 } 3659 do { 3660 if(auto frag = cast(DocumentFragment) e) { 3661 children = e.children ~ children; 3662 foreach(child; frag.children) 3663 child.parentNode = this; 3664 } else 3665 children = e ~ children; 3666 e.parentNode = this; 3667 return e; 3668 } 3669 3670 3671 /** 3672 Returns a string containing all child elements, formatted such that it could be pasted into 3673 an XML file. 3674 */ 3675 @property string innerHTML(Appender!string where = appender!string()) const { 3676 if(children is null) 3677 return ""; 3678 3679 auto start = where.data.length; 3680 3681 foreach(child; children) { 3682 assert(child !is null); 3683 3684 child.writeToAppender(where); 3685 } 3686 3687 return where.data[start .. $]; 3688 } 3689 3690 /** 3691 Takes some html and replaces the element's children with the tree made from the string. 3692 */ 3693 @property Element innerHTML(string html, bool strict = false) { 3694 if(html.length) 3695 selfClosed = false; 3696 3697 if(html.length == 0) { 3698 // I often say innerHTML = ""; as a shortcut to clear it out, 3699 // so let's optimize that slightly. 3700 removeAllChildren(); 3701 return this; 3702 } 3703 3704 auto doc = new Document(); 3705 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3706 3707 children = doc.root.children; 3708 foreach(c; children) { 3709 c.parentNode = this; 3710 } 3711 3712 doc.root.children = null; 3713 3714 return this; 3715 } 3716 3717 /// ditto 3718 @property Element innerHTML(Html html) { 3719 return this.innerHTML = html.source; 3720 } 3721 3722 /** 3723 Replaces this node with the given html string, which is parsed 3724 3725 Note: this invalidates the this reference, since it is removed 3726 from the tree. 3727 3728 Returns the new children that replace this. 3729 */ 3730 @property Element[] outerHTML(string html) { 3731 auto doc = new Document(); 3732 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3733 3734 children = doc.root.children; 3735 foreach(c; children) { 3736 c.parentNode = this; 3737 } 3738 3739 stripOut(); 3740 3741 return doc.root.children; 3742 } 3743 3744 /++ 3745 Returns all the html for this element, including the tag itself. 3746 3747 This is equivalent to calling toString(). 3748 +/ 3749 @property string outerHTML() { 3750 return this.toString(); 3751 } 3752 3753 /// This sets the inner content of the element *without* trying to parse it. 3754 /// You can inject any code in there; this serves as an escape hatch from the dom. 3755 /// 3756 /// The only times you might actually need it are for < style > and < script > tags in html. 3757 /// Other than that, innerHTML and/or innerText should do the job. 3758 @property void innerRawSource(string rawSource) { 3759 children.length = 0; 3760 auto rs = new RawSource(parentDocument, rawSource); 3761 children ~= rs; 3762 rs.parentNode = this; 3763 } 3764 3765 /++ 3766 Replaces the element `find`, which must be a child of `this`, with the element `replace`, which must have no parent. 3767 +/ 3768 Element replaceChild(Element find, Element replace) 3769 in { 3770 assert(find !is null); 3771 assert(find.parentNode is this); 3772 assert(replace !is null); 3773 assert(replace.parentNode is null); 3774 } 3775 out(ret) { 3776 assert(ret is replace); 3777 assert(replace.parentNode is this); 3778 assert(replace.parentDocument is this.parentDocument); 3779 assert(find.parentNode is null); 3780 } 3781 do { 3782 // FIXME 3783 //if(auto frag = cast(DocumentFragment) replace) 3784 //return this.replaceChild(frag, replace.children); 3785 for(int i = 0; i < children.length; i++) { 3786 if(children[i] is find) { 3787 replace.parentNode = this; 3788 children[i].parentNode = null; 3789 children[i] = replace; 3790 return replace; 3791 } 3792 } 3793 3794 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3795 } 3796 3797 /** 3798 Replaces the given element with a whole group. 3799 */ 3800 void replaceChild(Element find, Element[] replace) 3801 in { 3802 assert(find !is null); 3803 assert(replace !is null); 3804 assert(find.parentNode is this); 3805 debug foreach(r; replace) 3806 assert(r.parentNode is null); 3807 } 3808 out { 3809 assert(find.parentNode is null); 3810 assert(children.length >= replace.length); 3811 debug foreach(child; children) 3812 assert(child !is find); 3813 debug foreach(r; replace) 3814 assert(r.parentNode is this); 3815 } 3816 do { 3817 if(replace.length == 0) { 3818 removeChild(find); 3819 return; 3820 } 3821 assert(replace.length); 3822 for(int i = 0; i < children.length; i++) { 3823 if(children[i] is find) { 3824 children[i].parentNode = null; // this element should now be dead 3825 children[i] = replace[0]; 3826 foreach(e; replace) { 3827 e.parentNode = this; 3828 } 3829 3830 children = .insertAfter(children, i, replace[1..$]); 3831 3832 return; 3833 } 3834 } 3835 3836 throw new Exception("no such child"); 3837 } 3838 3839 3840 /** 3841 Removes the given child from this list. 3842 3843 Returns the removed element. 3844 */ 3845 Element removeChild(Element c) 3846 in { 3847 assert(c !is null); 3848 assert(c.parentNode is this); 3849 } 3850 out { 3851 debug foreach(child; children) 3852 assert(child !is c); 3853 assert(c.parentNode is null); 3854 } 3855 do { 3856 foreach(i, e; children) { 3857 if(e is c) { 3858 children = children[0..i] ~ children [i+1..$]; 3859 c.parentNode = null; 3860 return c; 3861 } 3862 } 3863 3864 throw new Exception("no such child"); 3865 } 3866 3867 /// This removes all the children from this element, returning the old list. 3868 Element[] removeChildren() 3869 out (ret) { 3870 assert(children.length == 0); 3871 debug foreach(r; ret) 3872 assert(r.parentNode is null); 3873 } 3874 do { 3875 Element[] oldChildren = children.dup; 3876 foreach(c; oldChildren) 3877 c.parentNode = null; 3878 3879 children.length = 0; 3880 3881 return oldChildren; 3882 } 3883 3884 /** 3885 Fetch the inside text, with all tags stripped out. 3886 3887 <p>cool <b>api</b> & code dude<p> 3888 innerText of that is "cool api & code dude". 3889 3890 This does not match what real innerText does! 3891 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3892 3893 It is more like [textContent]. 3894 3895 See_Also: 3896 [visibleText], which is closer to what the real `innerText` 3897 does. 3898 */ 3899 @scriptable 3900 @property string innerText() const { 3901 string s; 3902 foreach(child; children) { 3903 if(child.nodeType != NodeType.Text) 3904 s ~= child.innerText; 3905 else 3906 s ~= child.nodeValue(); 3907 } 3908 return s; 3909 } 3910 3911 /// ditto 3912 alias textContent = innerText; 3913 3914 /++ 3915 Gets the element's visible text, similar to how it would look assuming 3916 the document was HTML being displayed by a browser. This means it will 3917 attempt whitespace normalization (unless it is a `<pre>` tag), add `\n` 3918 characters for `<br>` tags, and I reserve the right to make it process 3919 additional css and tags in the future. 3920 3921 If you need specific output, use the more stable [textContent] property 3922 or iterate yourself with [tree] or a recursive function with [children]. 3923 3924 History: 3925 Added March 25, 2022 (dub v10.8) 3926 +/ 3927 string visibleText() const { 3928 return this.visibleTextHelper(this.tagName == "pre"); 3929 } 3930 3931 private string visibleTextHelper(bool pre) const { 3932 string result; 3933 foreach(thing; this.children) { 3934 if(thing.nodeType == NodeType.Text) 3935 result ~= pre ? thing.nodeValue : normalizeWhitespace(thing.nodeValue); 3936 else if(thing.tagName == "br") 3937 result ~= "\n"; 3938 else 3939 result ~= thing.visibleTextHelper(pre || thing.tagName == "pre"); 3940 } 3941 return result; 3942 } 3943 3944 /** 3945 Sets the inside text, replacing all children. You don't 3946 have to worry about entity encoding. 3947 */ 3948 @scriptable 3949 @property void innerText(string text) { 3950 selfClosed = false; 3951 Element e = new TextNode(parentDocument, text); 3952 children = [e]; 3953 e.parentNode = this; 3954 } 3955 3956 /** 3957 Strips this node out of the document, replacing it with the given text 3958 */ 3959 @property void outerText(string text) { 3960 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3961 } 3962 3963 /** 3964 Same result as innerText; the tag with all inner tags stripped out 3965 */ 3966 @property string outerText() const { 3967 return innerText; 3968 } 3969 3970 3971 /* ******************************* 3972 Miscellaneous 3973 *********************************/ 3974 3975 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3976 @property Element cloned() 3977 /+ 3978 out(ret) { 3979 // FIXME: not sure why these fail... 3980 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3981 assert(ret.tagName == this.tagName); 3982 } 3983 do { 3984 +/ 3985 { 3986 return this.cloneNode(true); 3987 } 3988 3989 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3990 Element cloneNode(bool deepClone) { 3991 auto e = Element.make(this.tagName); 3992 e.attributes = this.attributes.aadup; 3993 e.selfClosed = this.selfClosed; 3994 3995 if(deepClone) { 3996 foreach(child; children) { 3997 e.appendChild(child.cloneNode(true)); 3998 } 3999 } 4000 4001 4002 return e; 4003 } 4004 4005 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 4006 string nodeValue() const { 4007 return ""; 4008 } 4009 4010 // should return int 4011 ///. 4012 @property int nodeType() const { 4013 return 1; 4014 } 4015 4016 4017 invariant () { 4018 debug assert(tagName.indexOf(" ") == -1); 4019 4020 // commented cuz it gets into recursive pain and eff dat. 4021 /+ 4022 if(children !is null) 4023 foreach(child; children) { 4024 // assert(parentNode !is null); 4025 assert(child !is null); 4026 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 4027 assert(child !is this); 4028 //assert(child !is parentNode); 4029 } 4030 +/ 4031 4032 /+ 4033 // this isn't helping 4034 if(parent_ && parent_.asElement) { 4035 bool found = false; 4036 foreach(child; parent_.asElement.children) 4037 if(child is this) 4038 found = true; 4039 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 4040 } 4041 +/ 4042 4043 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 4044 if(parentNode !is null) { 4045 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 4046 auto lol = cast(TextNode) this; 4047 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 4048 } 4049 +/ 4050 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 4051 // reason is so you can create these without needing a reference to the document 4052 } 4053 4054 /** 4055 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 4056 an XML file. 4057 */ 4058 override string toString() const { 4059 return writeToAppender(); 4060 } 4061 4062 /++ 4063 Returns if the node would be printed to string as `<tag />` or `<tag></tag>`. In other words, if it has no non-empty text nodes and no element nodes. Please note that whitespace text nodes are NOT considered empty; `Html("<tag> </tag>").isEmpty == false`. 4064 4065 4066 The value is undefined if there are comment or processing instruction nodes. The current implementation returns false if it sees those, assuming the nodes haven't been stripped out during parsing. But I'm not married to the current implementation and reserve the right to change it without notice. 4067 4068 History: 4069 Added December 3, 2021 (dub v10.5) 4070 4071 +/ 4072 public bool isEmpty() const { 4073 foreach(child; this.children) { 4074 // any non-text node is of course not empty since that's a tag 4075 if(child.nodeType != NodeType.Text) 4076 return false; 4077 // or a text node is empty if it is is a null or empty string, so this length check fixes that 4078 if(child.nodeValue.length) 4079 return false; 4080 } 4081 4082 return true; 4083 } 4084 4085 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 4086 if(indentWith is null) 4087 return null; 4088 4089 // at the top we don't have anything to really do 4090 //if(parent_ is null) 4091 //return null; 4092 4093 // I've used isEmpty before but this other check seems better.... 4094 //|| this.isEmpty()) 4095 4096 string s; 4097 4098 if(insertComments) s ~= "<!--"; 4099 s ~= "\n"; 4100 foreach(indent; 0 .. indentationLevel) 4101 s ~= indentWith; 4102 if(insertComments) s ~= "-->"; 4103 4104 return s; 4105 } 4106 4107 /++ 4108 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 4109 for eyeball debugging. 4110 4111 $(PITFALL 4112 This function is not stable. Its interface and output may change without 4113 notice. The only promise I make is that it will continue to make a best- 4114 effort attempt at being useful for debugging by human eyes. 4115 4116 I have used it in the past for diffing html documents, but even then, it 4117 might change between versions. If it is useful, great, but beware; this 4118 use is at your own risk. 4119 ) 4120 4121 History: 4122 On November 19, 2021, I changed this to `final`. If you were overriding it, 4123 change our override to `toPrettyStringImpl` instead. It now just calls 4124 `toPrettyStringImpl.strip` to be an entry point for a stand-alone call. 4125 4126 If you are calling it as part of another implementation, you might want to 4127 change that call to `toPrettyStringImpl` as well. 4128 4129 I am NOT considering this a breaking change since this function is documented 4130 to only be used for eyeball debugging anyway, which means the exact format is 4131 not specified and the override behavior can generally not be relied upon. 4132 4133 (And I find it extremely unlikely anyone was subclassing anyway, but if you were, 4134 email me, and we'll see what we can do. I'd like to know at least.) 4135 4136 I reserve the right to make future changes in the future without considering 4137 them breaking as well. 4138 +/ 4139 final string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4140 return toPrettyStringImpl(insertComments, indentationLevel, indentWith).strip; 4141 } 4142 4143 string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4144 4145 // first step is to concatenate any consecutive text nodes to simplify 4146 // the white space analysis. this changes the tree! but i'm allowed since 4147 // the comment always says it changes the comments 4148 // 4149 // actually i'm not allowed cuz it is const so i will cheat and lie 4150 /+ 4151 TextNode lastTextChild = null; 4152 for(int a = 0; a < this.children.length; a++) { 4153 auto child = this.children[a]; 4154 if(auto tn = cast(TextNode) child) { 4155 if(lastTextChild) { 4156 lastTextChild.contents ~= tn.contents; 4157 for(int b = a; b < this.children.length - 1; b++) 4158 this.children[b] = this.children[b + 1]; 4159 this.children = this.children[0 .. $-1]; 4160 } else { 4161 lastTextChild = tn; 4162 } 4163 } else { 4164 lastTextChild = null; 4165 } 4166 } 4167 +/ 4168 4169 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 4170 4171 const(Element)[] children; 4172 4173 TextNode lastTextChild = null; 4174 for(int a = 0; a < this.children.length; a++) { 4175 auto child = this.children[a]; 4176 if(auto tn = cast(const(TextNode)) child) { 4177 if(lastTextChild !is null) { 4178 lastTextChild.contents ~= tn.contents; 4179 } else { 4180 lastTextChild = new TextNode(""); 4181 lastTextChild.parentNode = cast(Element) this; 4182 lastTextChild.contents ~= tn.contents; 4183 children ~= lastTextChild; 4184 } 4185 } else { 4186 lastTextChild = null; 4187 children ~= child; 4188 } 4189 } 4190 4191 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4192 4193 s ~= "<"; 4194 s ~= tagName; 4195 4196 // i sort these for consistent output. might be more legible 4197 // but especially it keeps it the same for diff purposes. 4198 auto keys = sortStrings(attributes.keys); 4199 foreach(n; keys) { 4200 auto v = attributes[n]; 4201 s ~= " "; 4202 s ~= n; 4203 s ~= "=\""; 4204 s ~= htmlEntitiesEncode(v); 4205 s ~= "\""; 4206 } 4207 4208 if(selfClosed){ 4209 s ~= " />"; 4210 return s; 4211 } 4212 4213 s ~= ">"; 4214 4215 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 4216 // just keep them on the same line 4217 4218 if(isEmpty) { 4219 // no work needed, this is empty so don't indent just for a blank line 4220 } else if(children.length == 1 && children[0].isEmpty) { 4221 // just one empty one, can put it inline too 4222 s ~= children[0].toString(); 4223 } else if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 4224 foreach(child; children) { 4225 s ~= child.toString();//toPrettyString(false, 0, null); 4226 } 4227 } else { 4228 foreach(child; children) { 4229 assert(child !is null); 4230 4231 s ~= child.toPrettyStringImpl(insertComments, indentationLevel + 1, indentWith); 4232 } 4233 4234 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4235 } 4236 4237 s ~= "</"; 4238 s ~= tagName; 4239 s ~= ">"; 4240 4241 return s; 4242 } 4243 4244 /+ 4245 /// Writes out the opening tag only, if applicable. 4246 string writeTagOnly(Appender!string where = appender!string()) const { 4247 +/ 4248 4249 /++ 4250 This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 4251 Note: the ordering of attributes in the string is undefined. 4252 Returns the string it creates. 4253 4254 Implementation_Notes: 4255 The order of attributes printed by this function is undefined, as permitted by the XML spec. You should NOT rely on any implementation detail noted here. 4256 4257 However, in practice, between June 14, 2019 and August 22, 2024, it actually did sort attributes by key name. After August 22, 2024, it changed to track attribute append order and will print them back out in the order in which the keys were first seen. 4258 4259 This is subject to change again at any time. Use [toPrettyString] if you want a defined output (toPrettyString always sorts by name for consistent diffing). 4260 +/ 4261 string writeToAppender(Appender!string where = appender!string()) const { 4262 assert(tagName !is null); 4263 4264 where.reserve((this.children.length + 1) * 512); 4265 4266 auto start = where.data.length; 4267 4268 where.put("<"); 4269 where.put(tagName); 4270 4271 /+ 4272 import std.algorithm : sort; 4273 auto keys = sort(attributes.keys); 4274 foreach(n; keys) { 4275 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 4276 +/ 4277 foreach(n, v; attributes) { 4278 //assert(v !is null); 4279 where.put(" "); 4280 where.put(n); 4281 where.put("=\""); 4282 htmlEntitiesEncode(v, where); 4283 where.put("\""); 4284 } 4285 4286 if(selfClosed){ 4287 where.put(" />"); 4288 return where.data[start .. $]; 4289 } 4290 4291 where.put('>'); 4292 4293 innerHTML(where); 4294 4295 where.put("</"); 4296 where.put(tagName); 4297 where.put('>'); 4298 4299 return where.data[start .. $]; 4300 } 4301 4302 /** 4303 Returns a lazy range of all its children, recursively. 4304 */ 4305 @property ElementStream tree() { 4306 return new ElementStream(this); 4307 } 4308 4309 // I moved these from Form because they are generally useful. 4310 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 4311 // FIXME: add overloads for other label types... 4312 /++ 4313 Adds a form field to this element, normally a `<input>` but `type` can also be `"textarea"`. 4314 4315 This is fairly html specific and the label uses my style. I recommend you view the source before you use it to better understand what it does. 4316 +/ 4317 /// Tags: HTML, HTML5 4318 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4319 auto fs = this; 4320 auto i = fs.addChild("label"); 4321 4322 if(!(type == "checkbox" || type == "radio")) 4323 i.addChild("span", label); 4324 4325 Element input; 4326 if(type == "textarea") 4327 input = i.addChild("textarea"). 4328 setAttribute("name", name). 4329 setAttribute("rows", "6"); 4330 else 4331 input = i.addChild("input"). 4332 setAttribute("name", name). 4333 setAttribute("type", type); 4334 4335 if(type == "checkbox" || type == "radio") 4336 i.addChild("span", label); 4337 4338 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4339 fieldOptions.applyToElement(input); 4340 return i; 4341 } 4342 4343 /// ditto 4344 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4345 auto fs = this; 4346 auto i = fs.addChild("label"); 4347 i.addChild(label); 4348 Element input; 4349 if(type == "textarea") 4350 input = i.addChild("textarea"). 4351 setAttribute("name", name). 4352 setAttribute("rows", "6"); 4353 else 4354 input = i.addChild("input"). 4355 setAttribute("name", name). 4356 setAttribute("type", type); 4357 4358 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4359 fieldOptions.applyToElement(input); 4360 return i; 4361 } 4362 4363 /// ditto 4364 Element addField(string label, string name, FormFieldOptions fieldOptions) { 4365 return addField(label, name, "text", fieldOptions); 4366 } 4367 4368 /// ditto 4369 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4370 auto fs = this; 4371 auto i = fs.addChild("label"); 4372 i.addChild("span", label); 4373 auto sel = i.addChild("select").setAttribute("name", name); 4374 4375 foreach(k, opt; options) 4376 sel.addChild("option", opt, k); 4377 4378 // FIXME: implement requirements somehow 4379 4380 return i; 4381 } 4382 4383 /// ditto 4384 Element addSubmitButton(string label = null) { 4385 auto t = this; 4386 auto holder = t.addChild("div"); 4387 holder.addClass("submit-holder"); 4388 auto i = holder.addChild("input"); 4389 i.type = "submit"; 4390 if(label.length) 4391 i.value = label; 4392 return holder; 4393 } 4394 4395 } 4396 4397 // computedStyle could argubaly be removed to bring size down 4398 //pragma(msg, __traits(classInstanceSize, Element)); 4399 //pragma(msg, Element.tupleof); 4400 4401 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4402 /++ 4403 Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4404 4405 History: 4406 On December 16, 2022, it disabled the special case treatment of `<script>` and `<style>` that [Document] 4407 does for HTML. To get the old behavior back, add `, true` to your constructor call. 4408 +/ 4409 /// Group: core_functionality 4410 class XmlDocument : Document { 4411 /++ 4412 Constructs a stricter-mode XML parser and parses the given data source. 4413 4414 History: 4415 The `Utf8Stream` version of the constructor was added on February 22, 2025. 4416 +/ 4417 this(string data, bool enableHtmlHacks = false) { 4418 this(new Utf8Stream(data), enableHtmlHacks); 4419 } 4420 4421 /// ditto 4422 this(Utf8Stream data, bool enableHtmlHacks = false) { 4423 selfClosedElements = null; 4424 inlineElements = null; 4425 rawSourceElements = null; 4426 contentType = "text/xml; charset=utf-8"; 4427 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4428 4429 parseStream(data, true, true, !enableHtmlHacks); 4430 } 4431 } 4432 4433 unittest { 4434 // FIXME: i should also make XmlDocument do different entities than just html too. 4435 auto str = "<html><style>foo {}</style><script>void function() { a < b; }</script></html>"; 4436 auto document = new Document(str, true, true); 4437 assert(document.requireSelector("style").children[0].tagName == "#raw"); 4438 assert(document.requireSelector("script").children[0].tagName == "#raw"); 4439 try { 4440 auto xml = new XmlDocument(str); 4441 assert(0); 4442 } catch(MarkupException e) { 4443 // failure expected, script special case is not valid XML without a dtd (which isn't here) 4444 } 4445 //assert(xml.requireSelector("style").children[0].tagName == "#raw"); 4446 //assert(xml.requireSelector("script").children[0].tagName == "#raw"); 4447 } 4448 4449 4450 4451 /* domconvenience follows { */ 4452 4453 /// finds comments that match the given txt. Case insensitive, strips whitespace. 4454 /// Group: core_functionality 4455 Element[] findComments(Document document, string txt) { 4456 return findComments(document.root, txt); 4457 } 4458 4459 /// ditto 4460 Element[] findComments(Element element, string txt) { 4461 txt = txt.strip().toLower(); 4462 Element[] ret; 4463 4464 foreach(comment; element.getElementsByTagName("#comment")) { 4465 string t = comment.nodeValue().strip().toLower(); 4466 if(t == txt) 4467 ret ~= comment; 4468 } 4469 4470 return ret; 4471 } 4472 4473 /// An option type that propagates null. See: [Element.optionSelector] 4474 /// Group: implementations 4475 struct MaybeNullElement(SomeElementType) { 4476 this(SomeElementType ele) { 4477 this.element = ele; 4478 } 4479 SomeElementType element; 4480 4481 /// Forwards to the element, wit a null check inserted that propagates null. 4482 auto opDispatch(string method, T...)(T args) { 4483 alias type = typeof(__traits(getMember, element, method)(args)); 4484 static if(is(type : Element)) { 4485 if(element is null) 4486 return MaybeNullElement!type(null); 4487 return __traits(getMember, element, method)(args); 4488 } else static if(is(type == string)) { 4489 if(element is null) 4490 return cast(string) null; 4491 return __traits(getMember, element, method)(args); 4492 } else static if(is(type == void)) { 4493 if(element is null) 4494 return; 4495 __traits(getMember, element, method)(args); 4496 } else { 4497 static assert(0); 4498 } 4499 } 4500 4501 /// Allows implicit casting to the wrapped element. 4502 alias element this; 4503 } 4504 4505 /++ 4506 A collection of elements which forwards methods to the children. 4507 +/ 4508 /// Group: implementations 4509 struct ElementCollection { 4510 /// 4511 this(Element e) { 4512 elements = [e]; 4513 } 4514 4515 /// 4516 this(Element e, string selector) { 4517 elements = e.querySelectorAll(selector); 4518 } 4519 4520 /// 4521 this(Element[] e) { 4522 elements = e; 4523 } 4524 4525 Element[] elements; 4526 //alias elements this; // let it implicitly convert to the underlying array 4527 4528 /// 4529 ElementCollection opIndex(string selector) { 4530 ElementCollection ec; 4531 foreach(e; elements) 4532 ec.elements ~= e.getElementsBySelector(selector); 4533 return ec; 4534 } 4535 4536 /// 4537 Element opIndex(int i) { 4538 return elements[i]; 4539 } 4540 4541 /// if you slice it, give the underlying array for easy forwarding of the 4542 /// collection to range expecting algorithms or looping over. 4543 Element[] opSlice() { 4544 return elements; 4545 } 4546 4547 /// And input range primitives so we can foreach over this 4548 void popFront() { 4549 elements = elements[1..$]; 4550 } 4551 4552 /// ditto 4553 Element front() { 4554 return elements[0]; 4555 } 4556 4557 /// ditto 4558 bool empty() { 4559 return !elements.length; 4560 } 4561 4562 /++ 4563 Collects strings from the collection, concatenating them together 4564 Kinda like running reduce and ~= on it. 4565 4566 --- 4567 document["p"].collect!"innerText"; 4568 --- 4569 +/ 4570 string collect(string method)(string separator = "") { 4571 string text; 4572 foreach(e; elements) { 4573 text ~= mixin("e." ~ method); 4574 text ~= separator; 4575 } 4576 return text; 4577 } 4578 4579 /// Forward method calls to each individual [Element|element] of the collection 4580 /// returns this so it can be chained. 4581 ElementCollection opDispatch(string name, T...)(T t) { 4582 foreach(e; elements) { 4583 mixin("e." ~ name)(t); 4584 } 4585 return this; 4586 } 4587 4588 /++ 4589 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 4590 +/ 4591 ElementCollection wrapIn(Element what) { 4592 foreach(e; elements) { 4593 e.wrapIn(what.cloneNode(false)); 4594 } 4595 4596 return this; 4597 } 4598 4599 /// Concatenates two ElementCollection together. 4600 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 4601 return ElementCollection(this.elements ~ rhs.elements); 4602 } 4603 } 4604 4605 4606 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 4607 /// Group: implementations 4608 mixin template JavascriptStyleDispatch() { 4609 /// 4610 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 4611 if(v !is null) 4612 return set(name, v); 4613 return get(name); 4614 } 4615 4616 /// 4617 string opIndex(string key) const { 4618 return get(key); 4619 } 4620 4621 /// 4622 string opIndexAssign(string value, string field) { 4623 return set(field, value); 4624 } 4625 4626 // FIXME: doesn't seem to work 4627 string* opBinary(string op)(string key) if(op == "in") { 4628 return key in fields; 4629 } 4630 } 4631 4632 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 4633 /// 4634 /// Do not create this object directly. 4635 /// Group: implementations 4636 struct DataSet { 4637 /// 4638 this(Element e) { 4639 this._element = e; 4640 } 4641 4642 private Element _element; 4643 /// 4644 string set(string name, string value) { 4645 _element.setAttribute("data-" ~ unCamelCase(name), value); 4646 return value; 4647 } 4648 4649 /// 4650 string get(string name) const { 4651 return _element.getAttribute("data-" ~ unCamelCase(name)); 4652 } 4653 4654 /// 4655 mixin JavascriptStyleDispatch!(); 4656 } 4657 4658 /// Proxy object for attributes which will replace the main opDispatch eventually 4659 /// Group: implementations 4660 struct AttributeSet { 4661 /// Generally, you shouldn't create this yourself, since you can use [Element.attrs] instead. 4662 this(Element e) { 4663 this._element = e; 4664 } 4665 4666 private Element _element; 4667 /++ 4668 Sets a `value` for attribute with `name`. If the attribute doesn't exist, this will create it, even if `value` is `null`. 4669 +/ 4670 string set(string name, string value) { 4671 _element.setAttribute(name, value); 4672 return value; 4673 } 4674 4675 /++ 4676 Provides support for testing presence of an attribute with the `in` operator. 4677 4678 History: 4679 Added December 16, 2020 (dub v10.10) 4680 +/ 4681 auto opBinaryRight(string op : "in")(string name) const 4682 { 4683 return name in _element.attributes; 4684 } 4685 /// 4686 unittest 4687 { 4688 auto doc = new XmlDocument(`<test attr="test"/>`); 4689 assert("attr" in doc.root.attrs); 4690 assert("test" !in doc.root.attrs); 4691 } 4692 4693 /++ 4694 Returns the value of attribute `name`, or `null` if doesn't exist 4695 +/ 4696 string get(string name) const { 4697 return _element.getAttribute(name); 4698 } 4699 4700 /// 4701 mixin JavascriptStyleDispatch!(); 4702 } 4703 4704 private struct InternalAttribute { 4705 // variable length structure 4706 private InternalAttribute* next; 4707 private uint totalLength; 4708 private ushort keyLength; 4709 private char[0] chars; 4710 4711 // this really should be immutable tbh 4712 inout(char)[] key() inout return { 4713 return chars.ptr[0 .. keyLength]; 4714 } 4715 4716 inout(char)[] value() inout return { 4717 return chars.ptr[keyLength .. totalLength]; 4718 } 4719 4720 static InternalAttribute* make(in char[] key, in char[] value) { 4721 // old code was 4722 //auto data = new ubyte[](InternalAttribute.sizeof + key.length + value.length); 4723 //GC.addRange(data.ptr, data.length); // MUST add the range to scan it! 4724 4725 import core.memory; 4726 // but this code is a bit better, notice we did NOT set the NO_SCAN attribute because of the presence of the next pointer 4727 // (this can sometimes be a pessimization over the separate strings but meh, most of these attributes are supposed to be small) 4728 auto obj = cast(InternalAttribute*) GC.calloc(InternalAttribute.sizeof + key.length + value.length); 4729 4730 // assert(key.length > 0); 4731 4732 obj.totalLength = cast(uint) (key.length + value.length); 4733 obj.keyLength = cast(ushort) key.length; 4734 if(key.length != obj.keyLength) 4735 throw new Exception("attribute key overflow"); 4736 if(key.length + value.length != obj.totalLength) 4737 throw new Exception("attribute length overflow"); 4738 4739 obj.key[] = key[]; 4740 obj.value[] = value[]; 4741 4742 return obj; 4743 } 4744 4745 // FIXME: disable default ctor and op new 4746 } 4747 4748 import core.exception; 4749 4750 struct AttributesHolder { 4751 private @system InternalAttribute* attributes; 4752 4753 /+ 4754 invariant() { 4755 const(InternalAttribute)* wtf = attributes; 4756 while(wtf) { 4757 assert(wtf != cast(void*) 1); 4758 assert(wtf.keyLength != 0); 4759 import std.stdio; writeln(wtf.key, "=", wtf.value); 4760 wtf = wtf.next; 4761 } 4762 } 4763 +/ 4764 4765 /+ 4766 It is legal to do foo["key", "default"] to call it with no error... 4767 +/ 4768 string opIndex(scope const char[] key) const { 4769 auto found = find(key); 4770 if(found is null) 4771 throw new RangeError(key.idup); // FIXME 4772 return cast(string) found.value; 4773 } 4774 4775 string get(scope const char[] key, string returnedIfKeyNotFound = null) const { 4776 auto attr = this.find(key); 4777 if(attr is null) 4778 return returnedIfKeyNotFound; 4779 else 4780 return cast(string) attr.value; 4781 } 4782 4783 private string[] keys() const { 4784 string[] ret; 4785 foreach(k, v; this) 4786 ret ~= k; 4787 return ret; 4788 } 4789 4790 /+ 4791 If this were to return a string* it'd be tricky cuz someone could try to rebind it, which is impossible. 4792 4793 This is a breaking change. You can get a similar result though with [get]. 4794 +/ 4795 bool opBinaryRight(string op : "in")(scope const char[] key) const { 4796 return find(key) !is null; 4797 } 4798 4799 private inout(InternalAttribute)* find(scope const char[] key) inout @trusted { 4800 inout(InternalAttribute)* current = attributes; 4801 while(current) { 4802 // assert(current > cast(void*) 1); 4803 if(current.key == key) 4804 return current; 4805 current = current.next; 4806 } 4807 return null; 4808 } 4809 4810 void opAssign(string[string] aa) @trusted { 4811 this.attributes = null; 4812 foreach(k, v; aa) 4813 this[k] = v; 4814 } 4815 4816 void remove(scope const char[] key) @trusted { 4817 if(attributes is null) 4818 return; 4819 auto current = attributes; 4820 InternalAttribute* previous; 4821 while(current) { 4822 if(current.key == key) 4823 break; 4824 previous = current; 4825 current = current.next; 4826 } 4827 if(current is null) 4828 return; 4829 if(previous is null) 4830 attributes = current.next; 4831 else 4832 previous.next = current.next; 4833 // assert(previous.next != cast(void*) 1); 4834 // assert(attributes != cast(void*) 1); 4835 } 4836 4837 void opIndexAssign(scope const char[] value, scope const char[] key) @trusted { 4838 if(attributes is null) { 4839 attributes = InternalAttribute.make(key, value); 4840 return; 4841 } 4842 auto current = attributes; 4843 4844 if(current.key == key) { 4845 if(current.value != value) { 4846 auto replacement = InternalAttribute.make(key, value); 4847 attributes = replacement; 4848 replacement.next = current.next; 4849 // assert(replacement.next != cast(void*) 1); 4850 // assert(attributes != cast(void*) 1); 4851 } 4852 return; 4853 } 4854 4855 while(current.next) { 4856 if(current.next.key == key) { 4857 if(current.next.value == value) 4858 return; // replacing immutable value with self, no change 4859 break; 4860 } 4861 current = current.next; 4862 } 4863 assert(current !is null); 4864 4865 auto replacement = InternalAttribute.make(key, value); 4866 if(current.next !is null) 4867 replacement.next = current.next.next; 4868 current.next = replacement; 4869 // assert(current.next != cast(void*) 1); 4870 // assert(replacement.next != cast(void*) 1); 4871 } 4872 4873 int opApply(int delegate(string key, string value) dg) const @trusted { 4874 const(InternalAttribute)* current = attributes; 4875 while(current !is null) { 4876 if(auto res = dg(cast(string) current.key, cast(string) current.value)) 4877 return res; 4878 current = current.next; 4879 } 4880 return 0; 4881 } 4882 4883 string toString() { 4884 string ret; 4885 foreach(k, v; this) { 4886 if(ret.length) 4887 ret ~= " "; 4888 ret ~= k; 4889 ret ~= `="`; 4890 ret ~= v; 4891 ret ~= `"`; 4892 } 4893 return ret; 4894 } 4895 4896 bool empty() const @trusted { 4897 return attributes is null; 4898 } 4899 } 4900 4901 unittest { 4902 AttributesHolder holder; 4903 assert(holder.empty); 4904 holder["one"] = "1"; 4905 assert(!holder.empty); 4906 holder["two"] = "2"; 4907 holder["three"] = "3"; 4908 4909 { 4910 assert("one" in holder); 4911 assert("two" in holder); 4912 assert("three" in holder); 4913 assert("four" !in holder); 4914 4915 int count; 4916 foreach(k, v; holder) { 4917 switch(count) { 4918 case 0: assert(k == "one" && v == "1"); break; 4919 case 1: assert(k == "two" && v == "2"); break; 4920 case 2: assert(k == "three" && v == "3"); break; 4921 default: assert(0); 4922 } 4923 count++; 4924 } 4925 } 4926 4927 holder["two"] = "dos"; 4928 4929 { 4930 assert("one" in holder); 4931 assert("two" in holder); 4932 assert("three" in holder); 4933 assert("four" !in holder); 4934 4935 int count; 4936 foreach(k, v; holder) { 4937 switch(count) { 4938 case 0: assert(k == "one" && v == "1"); break; 4939 case 1: assert(k == "two" && v == "dos"); break; 4940 case 2: assert(k == "three" && v == "3"); break; 4941 default: assert(0); 4942 } 4943 count++; 4944 } 4945 } 4946 4947 holder["four"] = "4"; 4948 4949 { 4950 assert("one" in holder); 4951 assert("two" in holder); 4952 assert("three" in holder); 4953 assert("four" in holder); 4954 4955 int count; 4956 foreach(k, v; holder) { 4957 switch(count) { 4958 case 0: assert(k == "one" && v == "1"); break; 4959 case 1: assert(k == "two" && v == "dos"); break; 4960 case 2: assert(k == "three" && v == "3"); break; 4961 case 3: assert(k == "four" && v == "4"); break; 4962 default: assert(0); 4963 } 4964 count++; 4965 } 4966 } 4967 4968 holder = ["foo": "bar"]; 4969 assert("foo" in holder); 4970 assert(holder["foo"] == "bar"); 4971 assert("four" !in holder); 4972 } 4973 4974 /// for style, i want to be able to set it with a string like a plain attribute, 4975 /// but also be able to do properties Javascript style. 4976 4977 /// Group: implementations 4978 struct ElementStyle { 4979 this(Element parent) { 4980 _element = parent; 4981 _attribute = _element.getAttribute("style"); 4982 originalAttribute = _attribute; 4983 } 4984 4985 ~this() { 4986 if(_attribute !is originalAttribute) 4987 _element.setAttribute("style", _attribute); 4988 } 4989 4990 Element _element; 4991 string _attribute; 4992 string originalAttribute; 4993 4994 /+ 4995 @property ref inout(string) _attribute() inout { 4996 auto s = "style" in _element.attributes; 4997 if(s is null) { 4998 auto e = cast() _element; // const_cast 4999 e.attributes["style"] = ""; // we need something to reference 5000 s = cast(inout) ("style" in e.attributes); 5001 } 5002 5003 assert(s !is null); 5004 return *s; 5005 } 5006 +/ 5007 5008 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 5009 5010 string set(string name, string value) { 5011 if(name.length == 0) 5012 return value; 5013 if(name == "cssFloat") 5014 name = "float"; 5015 else 5016 name = unCamelCase(name); 5017 auto r = rules(); 5018 r[name] = value; 5019 5020 _attribute = ""; 5021 foreach(k, v; r) { 5022 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 5023 continue; 5024 if(_attribute.length) 5025 _attribute ~= " "; 5026 _attribute ~= k ~ ": " ~ v ~ ";"; 5027 } 5028 5029 _element.setAttribute("style", _attribute); // this is to trigger the observer call 5030 5031 return value; 5032 } 5033 string get(string name) const { 5034 if(name == "cssFloat") 5035 name = "float"; 5036 else 5037 name = unCamelCase(name); 5038 auto r = rules(); 5039 if(name in r) 5040 return r[name]; 5041 return null; 5042 } 5043 5044 string[string] rules() const { 5045 string[string] ret; 5046 foreach(rule; _attribute.split(";")) { 5047 rule = rule.strip(); 5048 if(rule.length == 0) 5049 continue; 5050 auto idx = rule.indexOf(":"); 5051 if(idx == -1) 5052 ret[rule] = ""; 5053 else { 5054 auto name = rule[0 .. idx].strip(); 5055 auto value = rule[idx + 1 .. $].strip(); 5056 5057 ret[name] = value; 5058 } 5059 } 5060 5061 return ret; 5062 } 5063 5064 mixin JavascriptStyleDispatch!(); 5065 } 5066 5067 /// Converts a camel cased propertyName to a css style dashed property-name 5068 string unCamelCase(string a) { 5069 string ret; 5070 foreach(c; a) 5071 if((c >= 'A' && c <= 'Z')) 5072 ret ~= "-" ~ toLower("" ~ c)[0]; 5073 else 5074 ret ~= c; 5075 return ret; 5076 } 5077 5078 /// Translates a css style property-name to a camel cased propertyName 5079 string camelCase(string a) { 5080 string ret; 5081 bool justSawDash = false; 5082 foreach(c; a) 5083 if(c == '-') { 5084 justSawDash = true; 5085 } else { 5086 if(justSawDash) { 5087 justSawDash = false; 5088 ret ~= toUpper("" ~ c); 5089 } else 5090 ret ~= c; 5091 } 5092 return ret; 5093 } 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 // domconvenience ends } 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 // @safe: 5116 5117 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 5118 // Instead, override writeToAppender(); 5119 5120 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 5121 5122 // Stripping them is useful for reading php as html.... but adding them 5123 // is good for building php. 5124 5125 // I need to maintain compatibility with the way it is now too. 5126 5127 //import std.stdio; 5128 5129 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 5130 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 5131 // most likely a typo so I say kill kill kill. 5132 5133 5134 /++ 5135 This might belong in another module, but it represents a file with a mime type and some data. 5136 Document implements this interface with type = text/html (see Document.contentType for more info) 5137 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 5138 +/ 5139 /// Group: bonus_functionality 5140 interface FileResource { 5141 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 5142 @property string contentType() const; 5143 /// the data 5144 immutable(ubyte)[] getData() const; 5145 /++ 5146 filename, return null if none 5147 5148 History: 5149 Added December 25, 2020 5150 +/ 5151 @property string filename() const; 5152 } 5153 5154 5155 5156 5157 ///. 5158 /// Group: bonus_functionality 5159 enum NodeType { Text = 3 } 5160 5161 5162 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 5163 /// Group: core_functionality 5164 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 5165 in {} 5166 out(ret) { assert(ret !is null); } 5167 do { 5168 auto ret = cast(T) e; 5169 if(ret is null) 5170 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 5171 return ret; 5172 } 5173 5174 5175 ///. 5176 /// Group: core_functionality 5177 class DocumentFragment : Element { 5178 ///. 5179 this(Document _parentDocument) { 5180 tagName = "#fragment"; 5181 super(_parentDocument); 5182 } 5183 5184 /++ 5185 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 5186 5187 Since: March 29, 2018 (or git tagged v2.1.0) 5188 +/ 5189 this(Html html) { 5190 this(null); 5191 5192 this.innerHTML = html.source; 5193 } 5194 5195 ///. 5196 override string writeToAppender(Appender!string where = appender!string()) const { 5197 return this.innerHTML(where); 5198 } 5199 5200 override string toPrettyStringImpl(bool insertComments, int indentationLevel, string indentWith) const { 5201 string s; 5202 foreach(child; children) 5203 s ~= child.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 5204 return s; 5205 } 5206 5207 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 5208 /* 5209 override inout(Element) parentNode() inout { 5210 return children.length ? children[0].parentNode : null; 5211 } 5212 */ 5213 /+ 5214 override Element parentNode(Element p) { 5215 this.parentNode = p; 5216 foreach(child; children) 5217 child.parentNode = p; 5218 return p; 5219 } 5220 +/ 5221 } 5222 5223 /// Given text, encode all html entities on it - &, <, >, and ". This function also 5224 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 5225 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 5226 /// 5227 /// The output parameter can be given to append to an existing buffer. You don't have to 5228 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 5229 /// Group: core_functionality 5230 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 5231 // if there's no entities, we can save a lot of time by not bothering with the 5232 // decoding loop. This check cuts the net toString time by better than half in my test. 5233 // let me know if it made your tests worse though, since if you use an entity in just about 5234 // every location, the check will add time... but I suspect the average experience is like mine 5235 // since the check gives up as soon as it can anyway. 5236 5237 bool shortcut = true; 5238 foreach(char c; data) { 5239 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 5240 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 5241 shortcut = false; // there's actual work to be done 5242 break; 5243 } 5244 } 5245 5246 if(shortcut) { 5247 output.put(data); 5248 return data; 5249 } 5250 5251 auto start = output.data.length; 5252 5253 output.reserve(data.length + 64); // grab some extra space for the encoded entities 5254 5255 foreach(dchar d; data) { 5256 if(d == '&') 5257 output.put("&"); 5258 else if (d == '<') 5259 output.put("<"); 5260 else if (d == '>') 5261 output.put(">"); 5262 else if (d == '\"') 5263 output.put("""); 5264 // else if (d == '\'') 5265 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 5266 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 5267 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 5268 // idk about apostrophes though. Might be worth it, might not. 5269 else if (!encodeNonAscii || (d < 128 && d > 0)) 5270 output.put(d); 5271 else 5272 output.put("&#" ~ to!string(cast(int) d) ~ ";"); 5273 } 5274 5275 //assert(output !is null); // this fails on empty attributes..... 5276 return output.data[start .. $]; 5277 5278 // data = data.replace("\u00a0", " "); 5279 } 5280 5281 /// An alias for htmlEntitiesEncode; it works for xml too 5282 /// Group: core_functionality 5283 string xmlEntitiesEncode(string data) { 5284 return htmlEntitiesEncode(data); 5285 } 5286 5287 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 5288 /// Group: core_functionality 5289 dchar parseEntity(in dchar[] entity) { 5290 5291 char[128] buffer; 5292 int bpos; 5293 foreach(char c; entity[1 .. $-1]) 5294 buffer[bpos++] = c; 5295 char[] entityAsString = buffer[0 .. bpos]; 5296 5297 int min = 0; 5298 int max = cast(int) availableEntities.length; 5299 5300 keep_looking: 5301 if(min + 1 < max) { 5302 int spot = (max - min) / 2 + min; 5303 if(availableEntities[spot] == entityAsString) { 5304 return availableEntitiesValues[spot]; 5305 } else if(entityAsString < availableEntities[spot]) { 5306 max = spot; 5307 goto keep_looking; 5308 } else { 5309 min = spot; 5310 goto keep_looking; 5311 } 5312 } 5313 5314 switch(entity[1..$-1]) { 5315 case "quot": 5316 return '"'; 5317 case "apos": 5318 return '\''; 5319 case "lt": 5320 return '<'; 5321 case "gt": 5322 return '>'; 5323 case "amp": 5324 return '&'; 5325 // the next are html rather than xml 5326 5327 // and handling numeric entities 5328 default: 5329 if(entity[1] == '#') { 5330 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5331 auto hex = entity[3..$-1]; 5332 5333 auto p = intFromHex(to!string(hex).toLower()); 5334 return cast(dchar) p; 5335 } else { 5336 auto decimal = entity[2..$-1]; 5337 5338 // dealing with broken html entities 5339 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5340 decimal = decimal[1 .. $]; 5341 5342 while(decimal.length && (decimal[$-1] < '0' || decimal[$-1] > '9')) 5343 decimal = decimal[0 .. $ - 1]; 5344 5345 if(decimal.length == 0) 5346 return ' '; // this is really broken html 5347 // done with dealing with broken stuff 5348 5349 auto p = to!int(decimal); 5350 return cast(dchar) p; 5351 } 5352 } else 5353 return '\ufffd'; // replacement character diamond thing 5354 } 5355 5356 assert(0); 5357 } 5358 5359 unittest { 5360 // not in the binary search 5361 assert(parseEntity("""d) == '"'); 5362 5363 // numeric value 5364 assert(parseEntity("Դ") == '\u0534'); 5365 5366 // not found at all 5367 assert(parseEntity("&asdasdasd;"d) == '\ufffd'); 5368 5369 // random values in the bin search 5370 assert(parseEntity("	"d) == '\t'); 5371 assert(parseEntity("»"d) == '\»'); 5372 5373 // near the middle and edges of the bin search 5374 assert(parseEntity("𝒶"d) == '\U0001d4b6'); 5375 assert(parseEntity("*"d) == '\u002a'); 5376 assert(parseEntity("Æ"d) == '\u00c6'); 5377 assert(parseEntity("‌"d) == '\u200c'); 5378 } 5379 5380 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5381 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5382 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5383 /// Group: core_functionality 5384 string htmlEntitiesDecode(string data, bool strict = false) { 5385 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5386 if(data.indexOf("&") == -1) // all html entities begin with & 5387 return data; // if there are no entities in here, we can return the original slice and save some time 5388 5389 char[] a; // this seems to do a *better* job than appender! 5390 5391 char[4] buffer; 5392 5393 bool tryingEntity = false; 5394 bool tryingNumericEntity = false; 5395 bool tryingHexEntity = false; 5396 dchar[16] entityBeingTried; 5397 int entityBeingTriedLength = 0; 5398 int entityAttemptIndex = 0; 5399 5400 foreach(dchar ch; data) { 5401 if(tryingEntity) { 5402 entityAttemptIndex++; 5403 entityBeingTried[entityBeingTriedLength++] = ch; 5404 5405 if(entityBeingTriedLength == 2 && ch == '#') { 5406 tryingNumericEntity = true; 5407 continue; 5408 } else if(tryingNumericEntity && entityBeingTriedLength == 3 && ch == 'x') { 5409 tryingHexEntity = true; 5410 continue; 5411 } 5412 5413 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5414 if(ch == '&') { 5415 if(strict) 5416 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5417 5418 // if not strict, let's try to parse both. 5419 5420 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") { 5421 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5422 } else { 5423 auto ch2 = parseEntity(entityBeingTried[0 .. entityBeingTriedLength]); 5424 if(ch2 == '\ufffd') { // either someone put this in intentionally (lol) or we failed to get it 5425 // but either way, just abort and keep the plain text 5426 foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now 5427 a ~= c; 5428 } else { 5429 a ~= buffer[0.. utf_encode(buffer, ch2)]; 5430 } 5431 } 5432 5433 // tryingEntity is still true 5434 goto new_entity; 5435 } else 5436 if(ch == ';') { 5437 tryingEntity = false; 5438 a ~= buffer[0.. utf_encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5439 } else if(ch == ' ') { 5440 // e.g. you & i 5441 if(strict) 5442 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5443 else { 5444 tryingEntity = false; 5445 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]); 5446 a ~= buffer[0 .. utf_encode(buffer, ch)]; 5447 } 5448 } else { 5449 if(tryingNumericEntity) { 5450 if(ch < '0' || ch > '9') { 5451 if(tryingHexEntity) { 5452 if(ch < 'A') 5453 goto trouble; 5454 if(ch > 'Z' && ch < 'a') 5455 goto trouble; 5456 if(ch > 'z') 5457 goto trouble; 5458 } else { 5459 trouble: 5460 if(strict) 5461 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5462 tryingEntity = false; 5463 a ~= buffer[0.. utf_encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5464 a ~= ch; 5465 continue; 5466 } 5467 } 5468 } 5469 5470 5471 if(entityAttemptIndex >= 9) { 5472 done: 5473 if(strict) 5474 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5475 else { 5476 tryingEntity = false; 5477 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5478 } 5479 } 5480 } 5481 } else { 5482 if(ch == '&') { 5483 new_entity: 5484 tryingEntity = true; 5485 tryingNumericEntity = false; 5486 tryingHexEntity = false; 5487 entityBeingTriedLength = 0; 5488 entityBeingTried[entityBeingTriedLength++] = ch; 5489 entityAttemptIndex = 0; 5490 } else { 5491 a ~= buffer[0 .. utf_encode(buffer, ch)]; 5492 } 5493 } 5494 } 5495 5496 if(tryingEntity) { 5497 if(strict) 5498 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5499 5500 // otherwise, let's try to recover, at least so we don't drop any data 5501 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5502 // FIXME: what if we have "cool &"? should we try to parse it? 5503 } 5504 5505 return cast(string) a; // assumeUnique is actually kinda slow, lol 5506 } 5507 5508 unittest { 5509 // error recovery 5510 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // unterminated turned back to thing 5511 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // semi-terminated... parse and carry on (is this really sane?) 5512 assert(htmlEntitiesDecode("loc=en_us&tracknum=111") == "loc=en_us&tracknum=111"); // a bit of both, seen in a real life email 5513 assert(htmlEntitiesDecode("& test") == "& test"); // unterminated, just abort 5514 5515 // in strict mode all of these should fail 5516 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5517 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5518 try { assert(htmlEntitiesDecode("loc=en_us&tracknum=111", true) == "<&foo"); assert(0); } catch(Exception e) { } 5519 try { assert(htmlEntitiesDecode("& test", true) == "& test"); assert(0); } catch(Exception e) { } 5520 5521 // correct cases that should pass the same in strict or loose mode 5522 foreach(strict; [false, true]) { 5523 assert(htmlEntitiesDecode("&hello» win", strict) == "&hello\» win"); 5524 } 5525 } 5526 5527 /// Group: implementations 5528 abstract class SpecialElement : Element { 5529 this(Document _parentDocument) { 5530 super(_parentDocument); 5531 } 5532 5533 ///. 5534 override Element appendChild(Element e) { 5535 assert(0, "Cannot append to a special node"); 5536 } 5537 5538 ///. 5539 @property override int nodeType() const { 5540 return 100; 5541 } 5542 } 5543 5544 ///. 5545 /// Group: implementations 5546 class RawSource : SpecialElement { 5547 ///. 5548 this(Document _parentDocument, string s) { 5549 super(_parentDocument); 5550 source = s; 5551 tagName = "#raw"; 5552 } 5553 5554 ///. 5555 override string nodeValue() const { 5556 return this.toString(); 5557 } 5558 5559 ///. 5560 override string writeToAppender(Appender!string where = appender!string()) const { 5561 where.put(source); 5562 return source; 5563 } 5564 5565 override string toPrettyStringImpl(bool, int, string) const { 5566 return source; 5567 } 5568 5569 5570 override RawSource cloneNode(bool deep) { 5571 return new RawSource(parentDocument, source); 5572 } 5573 5574 ///. 5575 string source; 5576 } 5577 5578 /// Group: implementations 5579 abstract class ServerSideCode : SpecialElement { 5580 this(Document _parentDocument, string type) { 5581 super(_parentDocument); 5582 tagName = "#" ~ type; 5583 } 5584 5585 ///. 5586 override string nodeValue() const { 5587 return this.source; 5588 } 5589 5590 ///. 5591 override string writeToAppender(Appender!string where = appender!string()) const { 5592 auto start = where.data.length; 5593 where.put("<"); 5594 where.put(source); 5595 where.put(">"); 5596 return where.data[start .. $]; 5597 } 5598 5599 override string toPrettyStringImpl(bool, int, string) const { 5600 return "<" ~ source ~ ">"; 5601 } 5602 5603 ///. 5604 string source; 5605 } 5606 5607 ///. 5608 /// Group: implementations 5609 class PhpCode : ServerSideCode { 5610 ///. 5611 this(Document _parentDocument, string s) { 5612 super(_parentDocument, "php"); 5613 source = s; 5614 } 5615 5616 override PhpCode cloneNode(bool deep) { 5617 return new PhpCode(parentDocument, source); 5618 } 5619 } 5620 5621 ///. 5622 /// Group: implementations 5623 class AspCode : ServerSideCode { 5624 ///. 5625 this(Document _parentDocument, string s) { 5626 super(_parentDocument, "asp"); 5627 source = s; 5628 } 5629 5630 override AspCode cloneNode(bool deep) { 5631 return new AspCode(parentDocument, source); 5632 } 5633 } 5634 5635 ///. 5636 /// Group: implementations 5637 class BangInstruction : SpecialElement { 5638 ///. 5639 this(Document _parentDocument, string s) { 5640 super(_parentDocument); 5641 source = s; 5642 tagName = "#bpi"; 5643 } 5644 5645 ///. 5646 override string nodeValue() const { 5647 return this.source; 5648 } 5649 5650 override BangInstruction cloneNode(bool deep) { 5651 return new BangInstruction(parentDocument, source); 5652 } 5653 5654 ///. 5655 override string writeToAppender(Appender!string where = appender!string()) const { 5656 auto start = where.data.length; 5657 where.put("<!"); 5658 where.put(source); 5659 where.put(">"); 5660 return where.data[start .. $]; 5661 } 5662 5663 override string toPrettyStringImpl(bool, int, string) const { 5664 string s; 5665 s ~= "<!"; 5666 s ~= source; 5667 s ~= ">"; 5668 return s; 5669 } 5670 5671 ///. 5672 string source; 5673 } 5674 5675 ///. 5676 /// Group: implementations 5677 class QuestionInstruction : SpecialElement { 5678 ///. 5679 this(Document _parentDocument, string s) { 5680 super(_parentDocument); 5681 source = s; 5682 tagName = "#qpi"; 5683 } 5684 5685 override QuestionInstruction cloneNode(bool deep) { 5686 return new QuestionInstruction(parentDocument, source); 5687 } 5688 5689 ///. 5690 override string nodeValue() const { 5691 return this.source; 5692 } 5693 5694 ///. 5695 override string writeToAppender(Appender!string where = appender!string()) const { 5696 auto start = where.data.length; 5697 where.put("<"); 5698 where.put(source); 5699 where.put(">"); 5700 return where.data[start .. $]; 5701 } 5702 5703 override string toPrettyStringImpl(bool, int, string) const { 5704 string s; 5705 s ~= "<"; 5706 s ~= source; 5707 s ~= ">"; 5708 return s; 5709 } 5710 5711 5712 ///. 5713 string source; 5714 } 5715 5716 ///. 5717 /// Group: implementations 5718 class HtmlComment : SpecialElement { 5719 ///. 5720 this(Document _parentDocument, string s) { 5721 super(_parentDocument); 5722 source = s; 5723 tagName = "#comment"; 5724 } 5725 5726 override HtmlComment cloneNode(bool deep) { 5727 return new HtmlComment(parentDocument, source); 5728 } 5729 5730 ///. 5731 override string nodeValue() const { 5732 return this.source; 5733 } 5734 5735 ///. 5736 override string writeToAppender(Appender!string where = appender!string()) const { 5737 auto start = where.data.length; 5738 where.put("<!--"); 5739 where.put(source); 5740 where.put("-->"); 5741 return where.data[start .. $]; 5742 } 5743 5744 override string toPrettyStringImpl(bool, int, string) const { 5745 string s; 5746 s ~= "<!--"; 5747 s ~= source; 5748 s ~= "-->"; 5749 return s; 5750 } 5751 5752 5753 ///. 5754 string source; 5755 } 5756 5757 5758 5759 5760 ///. 5761 /// Group: implementations 5762 class TextNode : Element { 5763 public: 5764 ///. 5765 this(Document _parentDocument, string e) { 5766 super(_parentDocument); 5767 contents = e; 5768 tagName = "#text"; 5769 } 5770 5771 /// 5772 this(string e) { 5773 this(null, e); 5774 } 5775 5776 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5777 5778 ///. 5779 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5780 auto e = new TextNode(_parentDocument, ""); 5781 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5782 return e; 5783 } 5784 5785 ///. 5786 override @property TextNode cloneNode(bool deep) { 5787 auto n = new TextNode(parentDocument, contents); 5788 return n; 5789 } 5790 5791 ///. 5792 override string nodeValue() const { 5793 return this.contents; //toString(); 5794 } 5795 5796 ///. 5797 @property override int nodeType() const { 5798 return NodeType.Text; 5799 } 5800 5801 ///. 5802 override string writeToAppender(Appender!string where = appender!string()) const { 5803 string s; 5804 if(contents.length) 5805 s = htmlEntitiesEncode(contents, where); 5806 else 5807 s = ""; 5808 5809 assert(s !is null); 5810 return s; 5811 } 5812 5813 override string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5814 string s; 5815 5816 string contents = this.contents; 5817 // we will first collapse the whitespace per html 5818 // sort of. note this can break stuff yo!!!! 5819 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5820 string n = ""; 5821 bool lastWasWhitespace = indentationLevel > 0; 5822 foreach(char c; contents) { 5823 if(c.isSimpleWhite) { 5824 if(!lastWasWhitespace) 5825 n ~= ' '; 5826 lastWasWhitespace = true; 5827 } else { 5828 n ~= c; 5829 lastWasWhitespace = false; 5830 } 5831 } 5832 5833 contents = n; 5834 } 5835 5836 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5837 contents = contents.strip; 5838 } 5839 5840 auto e = htmlEntitiesEncode(contents); 5841 bool first = true; 5842 foreach(line; LineSplitter(e)) { 5843 if(first) { 5844 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5845 first = false; 5846 } else { 5847 s ~= "\n"; 5848 if(insertComments) 5849 s ~= "<!--"; 5850 foreach(i; 0 .. indentationLevel) 5851 s ~= "\t"; 5852 if(insertComments) 5853 s ~= "-->"; 5854 } 5855 s ~= line.stripRight; 5856 } 5857 return s; 5858 } 5859 5860 ///. 5861 override Element appendChild(Element e) { 5862 assert(0, "Cannot append to a text node"); 5863 } 5864 5865 ///. 5866 string contents; 5867 // alias contents content; // I just mistype this a lot, 5868 } 5869 5870 /** 5871 There are subclasses of Element offering improved helper 5872 functions for the element in HTML. 5873 */ 5874 5875 /++ 5876 Represents a HTML link. This provides some convenience methods for manipulating query strings, but otherwise is sthe same Element interface. 5877 5878 Please note this object may not be used for all `<a>` tags. 5879 +/ 5880 /// Group: implementations 5881 class Link : Element { 5882 5883 /++ 5884 Constructs `<a href="that href">that text</a>`. 5885 +/ 5886 this(string href, string text) { 5887 super("a"); 5888 setAttribute("href", href); 5889 innerText = text; 5890 } 5891 5892 /// ditto 5893 this(Document _parentDocument) { 5894 super(_parentDocument); 5895 this.tagName = "a"; 5896 } 5897 5898 /+ 5899 /// Returns everything in the href EXCEPT the query string 5900 @property string targetSansQuery() { 5901 5902 } 5903 5904 ///. 5905 @property string domainName() { 5906 5907 } 5908 5909 ///. 5910 @property string path 5911 +/ 5912 /// This gets a variable from the URL's query string. 5913 string getValue(string name) { 5914 auto vars = variablesHash(); 5915 if(name in vars) 5916 return vars[name]; 5917 return null; 5918 } 5919 5920 private string[string] variablesHash() { 5921 string href = getAttribute("href"); 5922 if(href is null) 5923 return null; 5924 5925 auto ques = href.indexOf("?"); 5926 string str = ""; 5927 if(ques != -1) { 5928 str = href[ques+1..$]; 5929 5930 auto fragment = str.indexOf("#"); 5931 if(fragment != -1) 5932 str = str[0..fragment]; 5933 } 5934 5935 string[] variables = str.split("&"); 5936 5937 string[string] hash; 5938 5939 foreach(var; variables) { 5940 auto index = var.indexOf("="); 5941 if(index == -1) 5942 hash[var] = ""; 5943 else { 5944 hash[decodeUriComponent(var[0..index])] = decodeUriComponent(var[index + 1 .. $]); 5945 } 5946 } 5947 5948 return hash; 5949 } 5950 5951 /// Replaces all the stuff after a ? in the link at once with the given assoc array values. 5952 /*private*/ void updateQueryString(string[string] vars) { 5953 string href = getAttribute("href"); 5954 5955 auto question = href.indexOf("?"); 5956 if(question != -1) 5957 href = href[0..question]; 5958 5959 string frag = ""; 5960 auto fragment = href.indexOf("#"); 5961 if(fragment != -1) { 5962 frag = href[fragment..$]; 5963 href = href[0..fragment]; 5964 } 5965 5966 string query = "?"; 5967 bool first = true; 5968 foreach(name, value; vars) { 5969 if(!first) 5970 query ~= "&"; 5971 else 5972 first = false; 5973 5974 query ~= encodeUriComponent(name); 5975 if(value.length) 5976 query ~= "=" ~ encodeUriComponent(value); 5977 } 5978 5979 if(query != "?") 5980 href ~= query; 5981 5982 href ~= frag; 5983 5984 setAttribute("href", href); 5985 } 5986 5987 /// Sets or adds the variable with the given name to the given value 5988 /// It automatically URI encodes the values and takes care of the ? and &. 5989 override void setValue(string name, string variable) { 5990 auto vars = variablesHash(); 5991 vars[name] = variable; 5992 5993 updateQueryString(vars); 5994 } 5995 5996 override void setValue(string name, string[] variable) { 5997 assert(0, "not implemented FIXME"); 5998 } 5999 6000 /// Removes the given variable from the query string 6001 void removeValue(string name) { 6002 auto vars = variablesHash(); 6003 vars.remove(name); 6004 6005 updateQueryString(vars); 6006 } 6007 6008 /* 6009 ///. 6010 override string toString() { 6011 6012 } 6013 6014 ///. 6015 override string getAttribute(string name) { 6016 if(name == "href") { 6017 6018 } else 6019 return super.getAttribute(name); 6020 } 6021 */ 6022 } 6023 6024 /++ 6025 Represents a HTML form. This slightly specializes Element to add a few more convenience methods for adding and extracting form data. 6026 6027 Please note this object may not be used for all `<form>` tags. 6028 +/ 6029 /// Group: implementations 6030 class Form : Element { 6031 6032 ///. 6033 this(Document _parentDocument) { 6034 super(_parentDocument); 6035 tagName = "form"; 6036 } 6037 6038 /// Overrides of the base class implementations that more confirm to *my* conventions when writing form html. 6039 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6040 auto t = this.querySelector("fieldset div"); 6041 if(t is null) 6042 return super.addField(label, name, type, fieldOptions); 6043 else 6044 return t.addField(label, name, type, fieldOptions); 6045 } 6046 6047 /// ditto 6048 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6049 auto type = "text"; 6050 auto t = this.querySelector("fieldset div"); 6051 if(t is null) 6052 return super.addField(label, name, type, fieldOptions); 6053 else 6054 return t.addField(label, name, type, fieldOptions); 6055 } 6056 6057 /// ditto 6058 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6059 auto t = this.querySelector("fieldset div"); 6060 if(t is null) 6061 return super.addField(label, name, options, fieldOptions); 6062 else 6063 return t.addField(label, name, options, fieldOptions); 6064 } 6065 6066 /// ditto 6067 override void setValue(string field, string value) { 6068 setValue(field, value, true); 6069 } 6070 6071 override void setValue(string name, string[] variable) { 6072 assert(0, "not implemented FIXME"); 6073 } 6074 6075 // FIXME: doesn't handle arrays; multiple fields can have the same name 6076 6077 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6078 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6079 /// the checked/selected attribute from all, and adds it to the one matching the value. 6080 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6081 6082 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6083 /// Otherwise, it makes a new input with type=hidden to keep the value. 6084 void setValue(string field, string value, bool makeNew) { 6085 auto eles = getField(field); 6086 if(eles.length == 0) { 6087 if(makeNew) { 6088 addInput(field, value); 6089 return; 6090 } else 6091 throw new Exception("form field does not exist"); 6092 } 6093 6094 if(eles.length == 1) { 6095 auto e = eles[0]; 6096 switch(e.tagName) { 6097 default: assert(0); 6098 case "textarea": 6099 e.innerText = value; 6100 break; 6101 case "input": 6102 string type = e.getAttribute("type"); 6103 if(type is null) { 6104 e.value = value; 6105 return; 6106 } 6107 switch(type) { 6108 case "checkbox": 6109 case "radio": 6110 if(value.length && value != "false") 6111 e.setAttribute("checked", "checked"); 6112 else 6113 e.removeAttribute("checked"); 6114 break; 6115 default: 6116 e.value = value; 6117 return; 6118 } 6119 break; 6120 case "select": 6121 bool found = false; 6122 foreach(child; e.tree) { 6123 if(child.tagName != "option") 6124 continue; 6125 string val = child.getAttribute("value"); 6126 if(val is null) 6127 val = child.innerText; 6128 if(val == value) { 6129 child.setAttribute("selected", "selected"); 6130 found = true; 6131 } else 6132 child.removeAttribute("selected"); 6133 } 6134 6135 if(!found) { 6136 e.addChild("option", value) 6137 .setAttribute("selected", "selected"); 6138 } 6139 break; 6140 } 6141 } else { 6142 // assume radio boxes 6143 foreach(e; eles) { 6144 string val = e.getAttribute("value"); 6145 //if(val is null) 6146 // throw new Exception("don't know what to do with radio boxes with null value"); 6147 if(val == value) 6148 e.setAttribute("checked", "checked"); 6149 else 6150 e.removeAttribute("checked"); 6151 } 6152 } 6153 } 6154 6155 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6156 /// it makes no attempt to find and modify existing elements in the form to the new values. 6157 void addValueArray(string key, string[] arrayOfValues) { 6158 foreach(arr; arrayOfValues) 6159 addChild("input", key, arr); 6160 } 6161 6162 /// Gets the value of the field; what would be given if it submitted right now. (so 6163 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6164 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6165 string getValue(string field) { 6166 auto eles = getField(field); 6167 if(eles.length == 0) 6168 return ""; 6169 if(eles.length == 1) { 6170 auto e = eles[0]; 6171 switch(e.tagName) { 6172 default: assert(0); 6173 case "input": 6174 if(e.type == "checkbox") { 6175 if(e.checked) 6176 return e.value.length ? e.value : "checked"; 6177 return ""; 6178 } else 6179 return e.value; 6180 case "textarea": 6181 return e.innerText; 6182 case "select": 6183 foreach(child; e.tree) { 6184 if(child.tagName != "option") 6185 continue; 6186 if(child.selected) 6187 return child.value; 6188 } 6189 break; 6190 } 6191 } else { 6192 // assuming radio 6193 foreach(e; eles) { 6194 if(e.checked) 6195 return e.value; 6196 } 6197 } 6198 6199 return ""; 6200 } 6201 6202 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6203 /++ 6204 Returns the form's contents in application/x-www-form-urlencoded format. 6205 6206 Bugs: 6207 Doesn't handle repeated elements of the same name nor files. 6208 +/ 6209 string getPostableData() { 6210 bool[string] namesDone; 6211 6212 string ret; 6213 bool outputted = false; 6214 6215 foreach(e; getElementsBySelector("[name]")) { 6216 if(e.name in namesDone) 6217 continue; 6218 6219 if(outputted) 6220 ret ~= "&"; 6221 else 6222 outputted = true; 6223 6224 ret ~= encodeUriComponent(e.name) ~ "=" ~ encodeUriComponent(getValue(e.name)); 6225 6226 namesDone[e.name] = true; 6227 } 6228 6229 return ret; 6230 } 6231 6232 /// Gets the actual elements with the given name 6233 Element[] getField(string name) { 6234 Element[] ret; 6235 foreach(e; tree) { 6236 if(e.name == name) 6237 ret ~= e; 6238 } 6239 return ret; 6240 } 6241 6242 /// Grabs the <label> with the given for tag, if there is one. 6243 Element getLabel(string forId) { 6244 foreach(e; tree) 6245 if(e.tagName == "label" && e.getAttribute("for") == forId) 6246 return e; 6247 return null; 6248 } 6249 6250 /// Adds a new INPUT field to the end of the form with the given attributes. 6251 Element addInput(string name, string value, string type = "hidden") { 6252 auto e = new Element(parentDocument, "input", null, true); 6253 e.name = name; 6254 e.value = value; 6255 e.type = type; 6256 6257 appendChild(e); 6258 6259 return e; 6260 } 6261 6262 /// Removes the given field from the form. It finds the element and knocks it right out. 6263 void removeField(string name) { 6264 foreach(e; getField(name)) 6265 e.parentNode.removeChild(e); 6266 } 6267 6268 /+ 6269 /// Returns all form members. 6270 @property Element[] elements() { 6271 6272 } 6273 6274 ///. 6275 string opDispatch(string name)(string v = null) 6276 // filter things that should actually be attributes on the form 6277 if( name != "method" && name != "action" && name != "enctype" 6278 && name != "style" && name != "name" && name != "id" && name != "class") 6279 { 6280 6281 } 6282 +/ 6283 /+ 6284 void submit() { 6285 // take its elements and submit them through http 6286 } 6287 +/ 6288 } 6289 6290 /++ 6291 Represents a HTML table. Has some convenience methods for working with tabular data. 6292 +/ 6293 /// Group: implementations 6294 class Table : Element { 6295 6296 /// You can make this yourself but you'd generally get one of these object out of a html parse or [Element.make] call. 6297 this(Document _parentDocument) { 6298 super(_parentDocument); 6299 tagName = "table"; 6300 } 6301 6302 /++ 6303 Creates an element with the given type and content. The argument can be an Element, Html, or other data which is converted to text with `to!string` 6304 6305 The element is $(I not) appended to the table. 6306 +/ 6307 Element th(T)(T t) { 6308 Element e; 6309 if(parentDocument !is null) 6310 e = parentDocument.createElement("th"); 6311 else 6312 e = Element.make("th"); 6313 static if(is(T == Html)) 6314 e.innerHTML = t; 6315 else static if(is(T : Element)) 6316 e.appendChild(t); 6317 else 6318 e.innerText = to!string(t); 6319 return e; 6320 } 6321 6322 /// ditto 6323 Element td(T)(T t) { 6324 Element e; 6325 if(parentDocument !is null) 6326 e = parentDocument.createElement("td"); 6327 else 6328 e = Element.make("td"); 6329 static if(is(T == Html)) 6330 e.innerHTML = t; 6331 else static if(is(T : Element)) 6332 e.appendChild(t); 6333 else 6334 e.innerText = to!string(t); 6335 return e; 6336 } 6337 6338 /++ 6339 Passes each argument to the [th] method for `appendHeaderRow` or [td] method for the others, appends them all to the `<tbody>` element for `appendRow`, `<thead>` element for `appendHeaderRow`, or a `<tfoot>` element for `appendFooterRow`, and ensures it is appended it to the table. 6340 +/ 6341 Element appendHeaderRow(T...)(T t) { 6342 return appendRowInternal("th", "thead", t); 6343 } 6344 6345 /// ditto 6346 Element appendFooterRow(T...)(T t) { 6347 return appendRowInternal("td", "tfoot", t); 6348 } 6349 6350 /// ditto 6351 Element appendRow(T...)(T t) { 6352 return appendRowInternal("td", "tbody", t); 6353 } 6354 6355 /++ 6356 Takes each argument as a class name and calls [Element.addClass] for each element in the column associated with that index. 6357 6358 Please note this does not use the html `<col>` element. 6359 +/ 6360 void addColumnClasses(string[] classes...) { 6361 auto grid = getGrid(); 6362 foreach(row; grid) 6363 foreach(i, cl; classes) { 6364 if(cl.length) 6365 if(i < row.length) 6366 row[i].addClass(cl); 6367 } 6368 } 6369 6370 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6371 Element row = Element.make("tr"); 6372 6373 foreach(e; t) { 6374 static if(is(typeof(e) : Element)) { 6375 if(e.tagName == "td" || e.tagName == "th") 6376 row.appendChild(e); 6377 else { 6378 Element a = Element.make(innerType); 6379 6380 a.appendChild(e); 6381 6382 row.appendChild(a); 6383 } 6384 } else static if(is(typeof(e) == Html)) { 6385 Element a = Element.make(innerType); 6386 a.innerHTML = e.source; 6387 row.appendChild(a); 6388 } else static if(is(typeof(e) == Element[])) { 6389 Element a = Element.make(innerType); 6390 foreach(ele; e) 6391 a.appendChild(ele); 6392 row.appendChild(a); 6393 } else static if(is(typeof(e) == string[])) { 6394 foreach(ele; e) { 6395 Element a = Element.make(innerType); 6396 a.innerText = to!string(ele); 6397 row.appendChild(a); 6398 } 6399 } else { 6400 Element a = Element.make(innerType); 6401 a.innerText = to!string(e); 6402 row.appendChild(a); 6403 } 6404 } 6405 6406 foreach(e; children) { 6407 if(e.tagName == findType) { 6408 e.appendChild(row); 6409 return row; 6410 } 6411 } 6412 6413 // the type was not found if we are here... let's add it so it is well-formed 6414 auto lol = this.addChild(findType); 6415 lol.appendChild(row); 6416 6417 return row; 6418 } 6419 6420 /// Returns the `<caption>` element of the table, creating one if it isn't there. 6421 Element captionElement() { 6422 Element cap; 6423 foreach(c; children) { 6424 if(c.tagName == "caption") { 6425 cap = c; 6426 break; 6427 } 6428 } 6429 6430 if(cap is null) { 6431 cap = Element.make("caption"); 6432 appendChild(cap); 6433 } 6434 6435 return cap; 6436 } 6437 6438 /// Returns or sets the text inside the `<caption>` element, creating that element if it isnt' there. 6439 @property string caption() { 6440 return captionElement().innerText; 6441 } 6442 6443 /// ditto 6444 @property void caption(string text) { 6445 captionElement().innerText = text; 6446 } 6447 6448 /// Gets the logical layout of the table as a rectangular grid of 6449 /// cells. It considers rowspan and colspan. A cell with a large 6450 /// span is represented in the grid by being referenced several times. 6451 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6452 /// <tfoot> portion if you pass one. 6453 /// 6454 /// Note: the rectangular grid might include null cells. 6455 /// 6456 /// This is kinda expensive so you should call once when you want the grid, 6457 /// then do lookups on the returned array. 6458 TableCell[][] getGrid(Element tablePortition = null) 6459 in { 6460 if(tablePortition is null) 6461 assert(tablePortition is null); 6462 else { 6463 assert(tablePortition !is null); 6464 assert(tablePortition.parentNode is this); 6465 assert( 6466 tablePortition.tagName == "tbody" 6467 || 6468 tablePortition.tagName == "tfoot" 6469 || 6470 tablePortition.tagName == "thead" 6471 ); 6472 } 6473 } 6474 do { 6475 if(tablePortition is null) 6476 tablePortition = this; 6477 6478 TableCell[][] ret; 6479 6480 // FIXME: will also return rows of sub tables! 6481 auto rows = tablePortition.getElementsByTagName("tr"); 6482 ret.length = rows.length; 6483 6484 int maxLength = 0; 6485 6486 int insertCell(int row, int position, TableCell cell) { 6487 if(row >= ret.length) 6488 return position; // not supposed to happen - a rowspan is prolly too big. 6489 6490 if(position == -1) { 6491 position++; 6492 foreach(item; ret[row]) { 6493 if(item is null) 6494 break; 6495 position++; 6496 } 6497 } 6498 6499 if(position < ret[row].length) 6500 ret[row][position] = cell; 6501 else 6502 foreach(i; ret[row].length .. position + 1) { 6503 if(i == position) 6504 ret[row] ~= cell; 6505 else 6506 ret[row] ~= null; 6507 } 6508 return position; 6509 } 6510 6511 foreach(i, rowElement; rows) { 6512 auto row = cast(TableRow) rowElement; 6513 assert(row !is null); 6514 assert(i < ret.length); 6515 6516 int position = 0; 6517 foreach(cellElement; rowElement.childNodes) { 6518 auto cell = cast(TableCell) cellElement; 6519 if(cell is null) 6520 continue; 6521 6522 // FIXME: colspan == 0 or rowspan == 0 6523 // is supposed to mean fill in the rest of 6524 // the table, not skip it 6525 foreach(int j; 0 .. cell.colspan) { 6526 foreach(int k; 0 .. cell.rowspan) 6527 // if the first row, always append. 6528 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6529 position++; 6530 } 6531 } 6532 6533 if(ret[i].length > maxLength) 6534 maxLength = cast(int) ret[i].length; 6535 } 6536 6537 // want to ensure it's rectangular 6538 foreach(ref r; ret) { 6539 foreach(i; r.length .. maxLength) 6540 r ~= null; 6541 } 6542 6543 return ret; 6544 } 6545 } 6546 6547 /// Represents a table row element - a <tr> 6548 /// Group: implementations 6549 class TableRow : Element { 6550 ///. 6551 this(Document _parentDocument) { 6552 super(_parentDocument); 6553 tagName = "tr"; 6554 } 6555 6556 // FIXME: the standard says there should be a lot more in here, 6557 // but meh, I never use it and it's a pain to implement. 6558 } 6559 6560 /// Represents anything that can be a table cell - <td> or <th> html. 6561 /// Group: implementations 6562 class TableCell : Element { 6563 ///. 6564 this(Document _parentDocument, string _tagName) { 6565 super(_parentDocument, _tagName); 6566 } 6567 6568 /// Gets and sets the row/colspan attributes as integers 6569 @property int rowspan() const { 6570 int ret = 1; 6571 auto it = getAttribute("rowspan"); 6572 if(it.length) 6573 ret = to!int(it); 6574 return ret; 6575 } 6576 6577 /// ditto 6578 @property int colspan() const { 6579 int ret = 1; 6580 auto it = getAttribute("colspan"); 6581 if(it.length) 6582 ret = to!int(it); 6583 return ret; 6584 } 6585 6586 /// ditto 6587 @property int rowspan(int i) { 6588 setAttribute("rowspan", to!string(i)); 6589 return i; 6590 } 6591 6592 /// ditto 6593 @property int colspan(int i) { 6594 setAttribute("colspan", to!string(i)); 6595 return i; 6596 } 6597 6598 } 6599 6600 6601 /// This is thrown on parse errors. 6602 /// Group: implementations 6603 class MarkupException : Exception { 6604 6605 ///. 6606 this(string message, string file = __FILE__, size_t line = __LINE__) { 6607 super(message, file, line); 6608 } 6609 } 6610 6611 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6612 /// Group: implementations 6613 class ElementNotFoundException : Exception { 6614 6615 /// type == kind of element you were looking for and search == a selector describing the search. 6616 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6617 this.searchContext = searchContext; 6618 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6619 } 6620 6621 Element searchContext; 6622 } 6623 6624 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6625 /// 6626 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6627 /// Group: core_functionality 6628 struct Html { 6629 /// This string holds the actual html. Use it to retrieve the contents. 6630 string source; 6631 } 6632 6633 // for the observers 6634 enum DomMutationOperations { 6635 setAttribute, 6636 removeAttribute, 6637 appendChild, // tagname, attributes[], innerHTML 6638 insertBefore, 6639 truncateChildren, 6640 removeChild, 6641 appendHtml, 6642 replaceHtml, 6643 appendText, 6644 replaceText, 6645 replaceTextOnly 6646 } 6647 6648 // and for observers too 6649 struct DomMutationEvent { 6650 DomMutationOperations operation; 6651 Element target; 6652 Element related; // what this means differs with the operation 6653 Element related2; 6654 string relatedString; 6655 string relatedString2; 6656 } 6657 6658 6659 private immutable static string[] htmlSelfClosedElements = [ 6660 // html 4 6661 "area","base","br","col","hr","img","input","link","meta","param", 6662 6663 // html 5 6664 "embed","source","track","wbr" 6665 ]; 6666 6667 private immutable static string[] htmlRawSourceElements = [ 6668 "script", "style" 6669 ]; 6670 6671 private immutable static string[] htmlInlineElements = [ 6672 "span", "strong", "em", "b", "i", "a" 6673 ]; 6674 6675 6676 /// helper function for decoding html entities 6677 int intFromHex(string hex) { 6678 int place = 1; 6679 int value = 0; 6680 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6681 int v; 6682 char q = hex[a]; 6683 if( q >= '0' && q <= '9') 6684 v = q - '0'; 6685 else if (q >= 'a' && q <= 'f') 6686 v = q - 'a' + 10; 6687 else if (q >= 'A' && q <= 'F') 6688 v = q - 'A' + 10; 6689 else throw new Exception("Illegal hex character: " ~ q); 6690 6691 value += v * place; 6692 6693 place *= 16; 6694 } 6695 6696 return value; 6697 } 6698 6699 6700 // CSS selector handling 6701 6702 // EXTENSIONS 6703 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6704 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6705 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6706 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6707 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6708 6709 6710 6711 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6712 // That might be useful to implement, though I do have parent selectors too. 6713 6714 ///. 6715 static immutable string[] selectorTokens = [ 6716 // It is important that the 2 character possibilities go first here for accurate lexing 6717 "~=", "*=", "|=", "^=", "$=", "!=", 6718 "::", ">>", 6719 "<<", // my any-parent extension (reciprocal of whitespace) 6720 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6721 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6722 ]; // other is white space or a name. 6723 6724 ///. 6725 sizediff_t idToken(string str, sizediff_t position) { 6726 sizediff_t tid = -1; 6727 char c = str[position]; 6728 foreach(a, token; selectorTokens) 6729 6730 if(c == token[0]) { 6731 if(token.length > 1) { 6732 if(position + 1 >= str.length || str[position+1] != token[1]) 6733 continue; // not this token 6734 } 6735 tid = a; 6736 break; 6737 } 6738 return tid; 6739 } 6740 6741 /// Parts of the CSS selector implementation 6742 // look, ma, no phobos! 6743 // new lexer by ketmar 6744 string[] lexSelector (string selstr) { 6745 6746 static sizediff_t idToken (string str, size_t stpos) { 6747 char c = str[stpos]; 6748 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6749 if (c == token[0]) { 6750 if (token.length > 1) { 6751 assert(token.length == 2, token); // we don't have 3-char tokens yet 6752 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6753 } 6754 return tidx; 6755 } 6756 } 6757 return -1; 6758 } 6759 6760 // skip spaces and comments 6761 static string removeLeadingBlanks (string str) { 6762 size_t curpos = 0; 6763 while (curpos < str.length) { 6764 immutable char ch = str[curpos]; 6765 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6766 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6767 // comment 6768 curpos += 2; 6769 while (curpos < str.length) { 6770 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6771 curpos += 2; 6772 break; 6773 } 6774 ++curpos; 6775 } 6776 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6777 ++curpos; 6778 6779 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6780 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6781 // That is not the same as ".foo.bar". If the space is stripped, important 6782 // information is lost, despite the tokens being separatable anyway. 6783 // 6784 // The parser really needs to be aware of the presence of a space. 6785 } else { 6786 break; 6787 } 6788 } 6789 return str[curpos..$]; 6790 } 6791 6792 static bool isBlankAt() (string str, size_t pos) { 6793 // we should consider unicode spaces too, but... unicode sux anyway. 6794 return 6795 (pos < str.length && // in string 6796 (str[pos] <= 32 || // space 6797 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6798 } 6799 6800 string[] tokens; 6801 // lexx it! 6802 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6803 if(selstr[0] == '\"' || selstr[0] == '\'') { 6804 auto end = selstr[0]; 6805 auto pos = 1; 6806 bool escaping; 6807 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6808 if(escaping) 6809 escaping = false; 6810 else if(selstr[pos] == '\\') 6811 escaping = true; 6812 pos++; 6813 } 6814 6815 // FIXME: do better unescaping 6816 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6817 if(pos+1 >= selstr.length) 6818 assert(0, selstr); 6819 selstr = selstr[pos + 1.. $]; 6820 continue; 6821 } 6822 6823 6824 // no tokens starts with escape 6825 immutable tid = idToken(selstr, 0); 6826 if (tid >= 0) { 6827 // special token 6828 tokens ~= selectorTokens[tid]; // it's funnier this way 6829 selstr = selstr[selectorTokens[tid].length..$]; 6830 continue; 6831 } 6832 // from start to space or special token 6833 size_t escapePos = size_t.max; 6834 size_t curpos = 0; // i can has chizburger^w escape at the start 6835 while (curpos < selstr.length) { 6836 if (selstr[curpos] == '\\') { 6837 // this is escape, just skip it and next char 6838 if (escapePos == size_t.max) escapePos = curpos; 6839 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6840 } else { 6841 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6842 ++curpos; 6843 } 6844 } 6845 // identifier 6846 if (escapePos != size_t.max) { 6847 // i hate it when it happens 6848 string id = selstr[0..escapePos]; 6849 while (escapePos < curpos) { 6850 if (curpos-escapePos < 2) break; 6851 id ~= selstr[escapePos+1]; // escaped char 6852 escapePos += 2; 6853 immutable stp = escapePos; 6854 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6855 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6856 } 6857 if (id.length > 0) tokens ~= id; 6858 } else { 6859 tokens ~= selstr[0..curpos]; 6860 } 6861 selstr = selstr[curpos..$]; 6862 } 6863 return tokens; 6864 } 6865 version(unittest_domd_lexer) unittest { 6866 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6867 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6868 assert(lexSelector(r" < <") == ["<", "<"]); 6869 assert(lexSelector(r" <<") == ["<<"]); 6870 assert(lexSelector(r" <</") == ["<<", "/"]); 6871 assert(lexSelector(r" <</*") == ["<<"]); 6872 assert(lexSelector(r" <\</*") == ["<", "<"]); 6873 assert(lexSelector(r"heh\") == ["heh"]); 6874 assert(lexSelector(r"alice \") == ["alice"]); 6875 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6876 } 6877 6878 /// ditto 6879 struct SelectorPart { 6880 string tagNameFilter; ///. 6881 string[] attributesPresent; /// [attr] 6882 string[2][] attributesEqual; /// [attr=value] 6883 string[2][] attributesStartsWith; /// [attr^=value] 6884 string[2][] attributesEndsWith; /// [attr$=value] 6885 // split it on space, then match to these 6886 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6887 // split it on dash, then match to these 6888 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6889 string[2][] attributesInclude; /// [attr*=value] 6890 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6891 6892 string[] hasSelectors; /// :has(this) 6893 string[] notSelectors; /// :not(this) 6894 6895 string[] isSelectors; /// :is(this) 6896 string[] whereSelectors; /// :where(this) 6897 6898 ParsedNth[] nthOfType; /// . 6899 ParsedNth[] nthLastOfType; /// . 6900 ParsedNth[] nthChild; /// . 6901 6902 bool firstChild; ///. 6903 bool lastChild; ///. 6904 6905 bool firstOfType; /// . 6906 bool lastOfType; /// . 6907 6908 bool emptyElement; ///. 6909 bool whitespaceOnly; /// 6910 bool oddChild; ///. 6911 bool evenChild; ///. 6912 6913 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6914 6915 bool rootElement; ///. 6916 6917 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6918 6919 bool isCleanSlateExceptSeparation() { 6920 auto cp = this; 6921 cp.separation = -1; 6922 return cp is SelectorPart.init; 6923 } 6924 6925 ///. 6926 string toString() { 6927 string ret; 6928 switch(separation) { 6929 default: assert(0); 6930 case -1: break; 6931 case 0: ret ~= " "; break; 6932 case 1: ret ~= " > "; break; 6933 case 2: ret ~= " + "; break; 6934 case 3: ret ~= " ~ "; break; 6935 case 4: ret ~= " < "; break; 6936 } 6937 ret ~= tagNameFilter; 6938 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6939 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6940 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6941 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6942 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6943 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6944 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6945 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6946 6947 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6948 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6949 6950 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 6951 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 6952 6953 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6954 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6955 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6956 6957 if(firstChild) ret ~= ":first-child"; 6958 if(lastChild) ret ~= ":last-child"; 6959 if(firstOfType) ret ~= ":first-of-type"; 6960 if(lastOfType) ret ~= ":last-of-type"; 6961 if(emptyElement) ret ~= ":empty"; 6962 if(whitespaceOnly) ret ~= ":whitespace-only"; 6963 if(oddChild) ret ~= ":odd-child"; 6964 if(evenChild) ret ~= ":even-child"; 6965 if(rootElement) ret ~= ":root"; 6966 if(scopeElement) ret ~= ":scope"; 6967 6968 return ret; 6969 } 6970 6971 // USEFUL 6972 /// Returns true if the given element matches this part 6973 bool matchElement(Element e, Element scopeElementNow = null) { 6974 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 6975 // Each individual call is reasonably fast already, but it adds up. 6976 if(e is null) return false; 6977 if(e.nodeType != 1) return false; 6978 6979 if(tagNameFilter != "" && tagNameFilter != "*") 6980 if(e.tagName != tagNameFilter) 6981 return false; 6982 if(firstChild) { 6983 if(e.parentNode is null) 6984 return false; 6985 if(e.parentNode.childElements[0] !is e) 6986 return false; 6987 } 6988 if(lastChild) { 6989 if(e.parentNode is null) 6990 return false; 6991 auto ce = e.parentNode.childElements; 6992 if(ce[$-1] !is e) 6993 return false; 6994 } 6995 if(firstOfType) { 6996 if(e.parentNode is null) 6997 return false; 6998 auto ce = e.parentNode.childElements; 6999 foreach(c; ce) { 7000 if(c.tagName == e.tagName) { 7001 if(c is e) 7002 return true; 7003 else 7004 return false; 7005 } 7006 } 7007 } 7008 if(lastOfType) { 7009 if(e.parentNode is null) 7010 return false; 7011 auto ce = e.parentNode.childElements; 7012 foreach_reverse(c; ce) { 7013 if(c.tagName == e.tagName) { 7014 if(c is e) 7015 return true; 7016 else 7017 return false; 7018 } 7019 } 7020 } 7021 if(scopeElement) { 7022 if(e !is scopeElementNow) 7023 return false; 7024 } 7025 if(emptyElement) { 7026 if(e.isEmpty()) 7027 return false; 7028 } 7029 if(whitespaceOnly) { 7030 if(e.innerText.strip.length) 7031 return false; 7032 } 7033 if(rootElement) { 7034 if(e.parentNode !is null) 7035 return false; 7036 } 7037 if(oddChild || evenChild) { 7038 if(e.parentNode is null) 7039 return false; 7040 foreach(i, child; e.parentNode.childElements) { 7041 if(child is e) { 7042 if(oddChild && !(i&1)) 7043 return false; 7044 if(evenChild && (i&1)) 7045 return false; 7046 break; 7047 } 7048 } 7049 } 7050 7051 bool matchWithSeparator(string attr, string value, string separator) { 7052 foreach(s; attr.split(separator)) 7053 if(s == value) 7054 return true; 7055 return false; 7056 } 7057 7058 foreach(a; attributesPresent) 7059 if(a !in e.attributes) 7060 return false; 7061 foreach(a; attributesEqual) 7062 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7063 return false; 7064 foreach(a; attributesNotEqual) 7065 // FIXME: maybe it should say null counts... this just bit me. 7066 // I did [attr][attr!=value] to work around. 7067 // 7068 // if it's null, it's not equal, right? 7069 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7070 if(e.getAttribute(a[0]) == a[1]) 7071 return false; 7072 foreach(a; attributesInclude) 7073 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7074 return false; 7075 foreach(a; attributesStartsWith) 7076 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7077 return false; 7078 foreach(a; attributesEndsWith) 7079 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7080 return false; 7081 foreach(a; attributesIncludesSeparatedBySpaces) 7082 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7083 return false; 7084 foreach(a; attributesIncludesSeparatedByDashes) 7085 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7086 return false; 7087 foreach(a; hasSelectors) { 7088 if(e.querySelector(a) is null) 7089 return false; 7090 } 7091 foreach(a; notSelectors) { 7092 auto sel = Selector(a); 7093 if(sel.matchesElement(e)) 7094 return false; 7095 } 7096 foreach(a; isSelectors) { 7097 auto sel = Selector(a); 7098 if(!sel.matchesElement(e)) 7099 return false; 7100 } 7101 foreach(a; whereSelectors) { 7102 auto sel = Selector(a); 7103 if(!sel.matchesElement(e)) 7104 return false; 7105 } 7106 7107 foreach(a; nthChild) { 7108 if(e.parentNode is null) 7109 return false; 7110 7111 auto among = e.parentNode.childElements; 7112 7113 if(!a.solvesFor(among, e)) 7114 return false; 7115 } 7116 foreach(a; nthOfType) { 7117 if(e.parentNode is null) 7118 return false; 7119 7120 auto among = e.parentNode.childElements(e.tagName); 7121 7122 if(!a.solvesFor(among, e)) 7123 return false; 7124 } 7125 foreach(a; nthLastOfType) { 7126 if(e.parentNode is null) 7127 return false; 7128 7129 auto among = Retro!Element(e.parentNode.childElements(e.tagName)); 7130 7131 if(!a.solvesFor(among, e)) 7132 return false; 7133 } 7134 7135 return true; 7136 } 7137 } 7138 7139 struct ParsedNth { 7140 int multiplier; 7141 int adder; 7142 7143 string of; 7144 7145 this(string text) { 7146 auto original = text; 7147 consumeWhitespace(text); 7148 if(text.startsWith("odd")) { 7149 multiplier = 2; 7150 adder = 1; 7151 7152 text = text[3 .. $]; 7153 } else if(text.startsWith("even")) { 7154 multiplier = 2; 7155 adder = 1; 7156 7157 text = text[4 .. $]; 7158 } else { 7159 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7160 consumeWhitespace(text); 7161 if(text.length && text[0] == 'n') { 7162 multiplier = n; 7163 text = text[1 .. $]; 7164 consumeWhitespace(text); 7165 if(text.length) { 7166 if(text[0] == '+') { 7167 text = text[1 .. $]; 7168 adder = parseNumber(text); 7169 } else if(text[0] == '-') { 7170 text = text[1 .. $]; 7171 adder = -parseNumber(text); 7172 } else if(text[0] == 'o') { 7173 // continue, this is handled below 7174 } else 7175 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7176 } 7177 } else { 7178 adder = n; 7179 } 7180 } 7181 7182 consumeWhitespace(text); 7183 if(text.startsWith("of")) { 7184 text = text[2 .. $]; 7185 consumeWhitespace(text); 7186 of = text[0 .. $]; 7187 } 7188 } 7189 7190 string toString() { 7191 return (to!string(multiplier) ~ "n" ~ (adder >= 0 ? "+" : "") ~ to!string(adder) ~ (of.length ? " of " : "") ~ of); 7192 } 7193 7194 bool solvesFor(R)(R elements, Element e) { 7195 int idx = 1; 7196 bool found = false; 7197 foreach(ele; elements) { 7198 if(of.length) { 7199 auto sel = Selector(of); 7200 if(!sel.matchesElement(ele)) 7201 continue; 7202 } 7203 if(ele is e) { 7204 found = true; 7205 break; 7206 } 7207 idx++; 7208 } 7209 if(!found) return false; 7210 7211 // multiplier* n + adder = idx 7212 // if there is a solution for integral n, it matches 7213 7214 idx -= adder; 7215 if(multiplier) { 7216 if(idx % multiplier == 0) 7217 return true; 7218 } else { 7219 return idx == 0; 7220 } 7221 return false; 7222 } 7223 7224 private void consumeWhitespace(ref string text) { 7225 while(text.length && text[0] == ' ') 7226 text = text[1 .. $]; 7227 } 7228 7229 private int parseNumber(ref string text) { 7230 consumeWhitespace(text); 7231 if(text.length == 0) return 0; 7232 bool negative = text[0] == '-'; 7233 if(text[0] == '+') 7234 text = text[1 .. $]; 7235 if(negative) text = text[1 .. $]; 7236 int i = 0; 7237 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7238 i++; 7239 if(i == 0) 7240 return 0; 7241 int cool = to!int(text[0 .. i]); 7242 text = text[i .. $]; 7243 return negative ? -cool : cool; 7244 } 7245 } 7246 7247 // USEFUL 7248 /// ditto 7249 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts, Element scopeElementNow = null) { 7250 Element[] ret; 7251 if(!parts.length) { 7252 return [start]; // the null selector only matches the start point; it 7253 // is what terminates the recursion 7254 } 7255 7256 auto part = parts[0]; 7257 //writeln("checking ", part, " against ", start, " with ", part.separation); 7258 switch(part.separation) { 7259 default: assert(0); 7260 case -1: 7261 case 0: // tree 7262 foreach(e; start.tree) { 7263 if(part.separation == 0 && start is e) 7264 continue; // space doesn't match itself! 7265 if(part.matchElement(e, scopeElementNow)) { 7266 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7267 } 7268 } 7269 break; 7270 case 1: // children 7271 foreach(e; start.childNodes) { 7272 if(part.matchElement(e, scopeElementNow)) { 7273 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7274 } 7275 } 7276 break; 7277 case 2: // next-sibling 7278 auto e = start.nextSibling("*"); 7279 if(part.matchElement(e, scopeElementNow)) 7280 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7281 break; 7282 case 3: // younger sibling 7283 auto tmp = start.parentNode; 7284 if(tmp !is null) { 7285 sizediff_t pos = -1; 7286 auto children = tmp.childElements; 7287 foreach(i, child; children) { 7288 if(child is start) { 7289 pos = i; 7290 break; 7291 } 7292 } 7293 assert(pos != -1); 7294 foreach(e; children[pos+1..$]) { 7295 if(part.matchElement(e, scopeElementNow)) 7296 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7297 } 7298 } 7299 break; 7300 case 4: // immediate parent node, an extension of mine to walk back up the tree 7301 auto e = start.parentNode; 7302 if(part.matchElement(e, scopeElementNow)) { 7303 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7304 } 7305 /* 7306 Example of usefulness: 7307 7308 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7309 7310 table th < tr 7311 7312 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7313 */ 7314 break; 7315 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7316 /* 7317 Like with the < operator, this is best used to find some parent of a particular known element. 7318 7319 Say you have an anchor inside a 7320 */ 7321 } 7322 7323 return ret; 7324 } 7325 7326 /++ 7327 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7328 7329 See_Also: 7330 $(LIST 7331 * [Element.querySelector] 7332 * [Element.querySelectorAll] 7333 * [Element.matches] 7334 * [Element.closest] 7335 * [Document.querySelector] 7336 * [Document.querySelectorAll] 7337 ) 7338 +/ 7339 /// Group: core_functionality 7340 struct Selector { 7341 SelectorComponent[] components; 7342 string original; 7343 /++ 7344 Parses the selector string and constructs the usable structure. 7345 +/ 7346 this(string cssSelector) { 7347 components = parseSelectorString(cssSelector); 7348 original = cssSelector; 7349 } 7350 7351 /++ 7352 Returns true if the given element matches this selector, 7353 considered relative to an arbitrary element. 7354 7355 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7356 with [std.algorithm.iteration.filter]: 7357 7358 --- 7359 Selector sel = Selector("foo > bar"); 7360 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7361 --- 7362 +/ 7363 bool matchesElement(Element e, Element relativeTo = null) { 7364 foreach(component; components) 7365 if(component.matchElement(e, relativeTo)) 7366 return true; 7367 7368 return false; 7369 } 7370 7371 /++ 7372 Reciprocal of [Element.querySelectorAll] 7373 +/ 7374 Element[] getMatchingElements(Element start, Element relativeTo = null) { 7375 Element[] ret; 7376 foreach(component; components) 7377 ret ~= getElementsBySelectorParts(start, component.parts, relativeTo); 7378 return removeDuplicates(ret); 7379 } 7380 7381 /++ 7382 Like [getMatchingElements], but returns a lazy range. Be careful 7383 about mutating the dom as you iterate through this. 7384 +/ 7385 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7386 return ElementStreamFilter(start.tree, (Element a) => this.matchesElement(a, relativeTo)); 7387 } 7388 7389 7390 /// Returns the string this was built from 7391 string toString() { 7392 return original; 7393 } 7394 7395 /++ 7396 Returns a string from the parsed result 7397 7398 7399 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7400 +/ 7401 string parsedToString() { 7402 string ret; 7403 7404 foreach(idx, component; components) { 7405 if(idx) ret ~= ", "; 7406 ret ~= component.toString(); 7407 } 7408 7409 return ret; 7410 } 7411 } 7412 7413 ///. 7414 struct SelectorComponent { 7415 ///. 7416 SelectorPart[] parts; 7417 7418 ///. 7419 string toString() { 7420 string ret; 7421 foreach(part; parts) 7422 ret ~= part.toString(); 7423 return ret; 7424 } 7425 7426 // USEFUL 7427 ///. 7428 Element[] getElements(Element start, Element relativeTo = null) { 7429 return removeDuplicates(getElementsBySelectorParts(start, parts, relativeTo)); 7430 } 7431 7432 // USEFUL (but not implemented) 7433 /// If relativeTo == null, it assumes the root of the parent document. 7434 bool matchElement(Element e, Element relativeTo = null) { 7435 if(e is null) return false; 7436 Element where = e; 7437 int lastSeparation = -1; 7438 7439 auto lparts = parts; 7440 7441 if(parts.length && parts[0].separation > 0) { 7442 throw new Exception("invalid selector"); 7443 /+ 7444 // if it starts with a non-trivial separator, inject 7445 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7446 // which implies html 7447 7448 // however, if it is a child-matching selector and there are no children, 7449 // bail out early as it obviously cannot match. 7450 bool hasNonTextChildren = false; 7451 foreach(c; e.children) 7452 if(c.nodeType != 3) { 7453 hasNonTextChildren = true; 7454 break; 7455 } 7456 if(!hasNonTextChildren) 7457 return false; 7458 7459 // there is probably a MUCH better way to do this. 7460 auto dummy = SelectorPart.init; 7461 dummy.tagNameFilter = "*"; 7462 dummy.separation = 0; 7463 lparts = dummy ~ lparts; 7464 +/ 7465 } 7466 7467 foreach_reverse(part; lparts) { 7468 7469 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7470 // writeln(parts); 7471 7472 if(lastSeparation == -1) { 7473 if(!part.matchElement(where, relativeTo)) 7474 return false; 7475 } else if(lastSeparation == 0) { // generic parent 7476 // need to go up the whole chain 7477 where = where.parentNode; 7478 7479 while(where !is null) { 7480 if(part.matchElement(where, relativeTo)) 7481 break; 7482 7483 if(where is relativeTo) 7484 return false; 7485 7486 where = where.parentNode; 7487 } 7488 7489 if(where is null) 7490 return false; 7491 } else if(lastSeparation == 1) { // the > operator 7492 where = where.parentNode; 7493 7494 if(!part.matchElement(where, relativeTo)) 7495 return false; 7496 } else if(lastSeparation == 2) { // the + operator 7497 //writeln("WHERE", where, " ", part); 7498 where = where.previousSibling("*"); 7499 7500 if(!part.matchElement(where, relativeTo)) 7501 return false; 7502 } else if(lastSeparation == 3) { // the ~ operator 7503 where = where.previousSibling("*"); 7504 while(where !is null) { 7505 if(part.matchElement(where, relativeTo)) 7506 break; 7507 7508 if(where is relativeTo) 7509 return false; 7510 7511 where = where.previousSibling("*"); 7512 } 7513 7514 if(where is null) 7515 return false; 7516 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7517 // FIXME 7518 } 7519 7520 lastSeparation = part.separation; 7521 7522 /* 7523 /+ 7524 I commented this to magically make unittest pass and I think the reason it works 7525 when commented is that I inject a :scope iff there's a selector at top level now 7526 and if not, it follows the (frankly stupid) w3c standard behavior at arbitrary id 7527 asduiwh . but me injecting the :scope also acts as a terminating condition. 7528 7529 tbh this prolly needs like a trillion more tests. 7530 +/ 7531 if(where is relativeTo) 7532 return false; // at end of line, if we aren't done by now, the match fails 7533 */ 7534 } 7535 return true; // if we got here, it is a success 7536 } 7537 7538 // the string should NOT have commas. Use parseSelectorString for that instead 7539 ///. 7540 static SelectorComponent fromString(string selector) { 7541 return parseSelector(lexSelector(selector)); 7542 } 7543 } 7544 7545 ///. 7546 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7547 SelectorComponent[] ret; 7548 auto tokens = lexSelector(selector); // this will parse commas too 7549 // and now do comma-separated slices (i haz phobosophobia!) 7550 int parensCount = 0; 7551 while (tokens.length > 0) { 7552 size_t end = 0; 7553 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7554 if(tokens[end] == "(") parensCount++; 7555 if(tokens[end] == ")") parensCount--; 7556 ++end; 7557 } 7558 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7559 if (tokens.length-end < 2) break; 7560 tokens = tokens[end+1..$]; 7561 } 7562 return ret; 7563 } 7564 7565 ///. 7566 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7567 SelectorComponent s; 7568 7569 SelectorPart current; 7570 void commit() { 7571 // might as well skip null items 7572 if(!current.isCleanSlateExceptSeparation()) { 7573 s.parts ~= current; 7574 current = current.init; // start right over 7575 } 7576 } 7577 enum State { 7578 Starting, 7579 ReadingClass, 7580 ReadingId, 7581 ReadingAttributeSelector, 7582 ReadingAttributeComparison, 7583 ExpectingAttributeCloser, 7584 ReadingPseudoClass, 7585 ReadingAttributeValue, 7586 7587 SkippingFunctionalSelector, 7588 } 7589 State state = State.Starting; 7590 string attributeName, attributeValue, attributeComparison; 7591 int parensCount; 7592 foreach(idx, token; tokens) { 7593 string readFunctionalSelector() { 7594 string s; 7595 if(tokens[idx + 1] != "(") 7596 throw new Exception("parse error"); 7597 int pc = 1; 7598 foreach(t; tokens[idx + 2 .. $]) { 7599 if(t == "(") 7600 pc++; 7601 if(t == ")") 7602 pc--; 7603 if(pc == 0) 7604 break; 7605 s ~= t; 7606 } 7607 7608 return s; 7609 } 7610 7611 sizediff_t tid = -1; 7612 foreach(i, item; selectorTokens) 7613 if(token == item) { 7614 tid = i; 7615 break; 7616 } 7617 final switch(state) { 7618 case State.Starting: // fresh, might be reading an operator or a tagname 7619 if(tid == -1) { 7620 if(!caseSensitiveTags) 7621 token = token.toLower(); 7622 7623 if(current.isCleanSlateExceptSeparation()) { 7624 current.tagNameFilter = token; 7625 // default thing, see comment under "*" below 7626 if(current.separation == -1) current.separation = 0; 7627 } else { 7628 // if it was already set, we must see two thingies 7629 // separated by whitespace... 7630 commit(); 7631 current.separation = 0; // tree 7632 current.tagNameFilter = token; 7633 } 7634 } else { 7635 // Selector operators 7636 switch(token) { 7637 case "*": 7638 current.tagNameFilter = "*"; 7639 // the idea here is if we haven't actually set a separation 7640 // yet (e.g. the > operator), it should assume the generic 7641 // whitespace (descendant) mode to avoid matching self with -1 7642 if(current.separation == -1) current.separation = 0; 7643 break; 7644 case " ": 7645 // If some other separation has already been set, 7646 // this is irrelevant whitespace, so we should skip it. 7647 // this happens in the case of "foo > bar" for example. 7648 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7649 continue; 7650 commit(); 7651 current.separation = 0; // tree 7652 break; 7653 case ">>": 7654 commit(); 7655 current.separation = 0; // alternate syntax for tree from html5 css 7656 break; 7657 case ">": 7658 commit(); 7659 current.separation = 1; // child 7660 break; 7661 case "+": 7662 commit(); 7663 current.separation = 2; // sibling directly after 7664 break; 7665 case "~": 7666 commit(); 7667 current.separation = 3; // any sibling after 7668 break; 7669 case "<": 7670 commit(); 7671 current.separation = 4; // immediate parent of 7672 break; 7673 case "[": 7674 state = State.ReadingAttributeSelector; 7675 if(current.separation == -1) current.separation = 0; 7676 break; 7677 case ".": 7678 state = State.ReadingClass; 7679 if(current.separation == -1) current.separation = 0; 7680 break; 7681 case "#": 7682 state = State.ReadingId; 7683 if(current.separation == -1) current.separation = 0; 7684 break; 7685 case ":": 7686 case "::": 7687 state = State.ReadingPseudoClass; 7688 if(current.separation == -1) current.separation = 0; 7689 break; 7690 7691 default: 7692 import arsd.core; 7693 throw ArsdException!"CSS Selector Problem"(token, tokens, cast(int) state); 7694 } 7695 } 7696 break; 7697 case State.ReadingClass: 7698 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7699 state = State.Starting; 7700 break; 7701 case State.ReadingId: 7702 current.attributesEqual ~= ["id", token]; 7703 state = State.Starting; 7704 break; 7705 case State.ReadingPseudoClass: 7706 switch(token) { 7707 case "first-of-type": 7708 current.firstOfType = true; 7709 break; 7710 case "last-of-type": 7711 current.lastOfType = true; 7712 break; 7713 case "only-of-type": 7714 current.firstOfType = true; 7715 current.lastOfType = true; 7716 break; 7717 case "first-child": 7718 current.firstChild = true; 7719 break; 7720 case "last-child": 7721 current.lastChild = true; 7722 break; 7723 case "only-child": 7724 current.firstChild = true; 7725 current.lastChild = true; 7726 break; 7727 case "scope": 7728 current.scopeElement = true; 7729 break; 7730 case "empty": 7731 // one with no children 7732 current.emptyElement = true; 7733 break; 7734 case "whitespace-only": 7735 current.whitespaceOnly = true; 7736 break; 7737 case "link": 7738 current.attributesPresent ~= "href"; 7739 break; 7740 case "root": 7741 current.rootElement = true; 7742 break; 7743 case "lang": 7744 state = State.SkippingFunctionalSelector; 7745 continue; 7746 case "nth-child": 7747 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7748 state = State.SkippingFunctionalSelector; 7749 continue; 7750 case "nth-of-type": 7751 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7752 state = State.SkippingFunctionalSelector; 7753 continue; 7754 case "nth-last-of-type": 7755 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7756 state = State.SkippingFunctionalSelector; 7757 continue; 7758 case "nth-last-child": 7759 // FIXME 7760 //current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7761 state = State.SkippingFunctionalSelector; 7762 continue; 7763 case "is": 7764 state = State.SkippingFunctionalSelector; 7765 current.isSelectors ~= readFunctionalSelector(); 7766 continue; // now the rest of the parser skips past the parens we just handled 7767 case "where": 7768 state = State.SkippingFunctionalSelector; 7769 current.whereSelectors ~= readFunctionalSelector(); 7770 continue; // now the rest of the parser skips past the parens we just handled 7771 case "not": 7772 state = State.SkippingFunctionalSelector; 7773 current.notSelectors ~= readFunctionalSelector(); 7774 continue; // now the rest of the parser skips past the parens we just handled 7775 case "has": 7776 state = State.SkippingFunctionalSelector; 7777 current.hasSelectors ~= readFunctionalSelector(); 7778 continue; // now the rest of the parser skips past the parens we just handled 7779 // back to standards though not quite right lol 7780 case "disabled": 7781 current.attributesPresent ~= "disabled"; 7782 break; 7783 case "checked": 7784 current.attributesPresent ~= "checked"; 7785 break; 7786 7787 case "visited", "active", "hover", "target", "focus", "selected": 7788 current.attributesPresent ~= "nothing"; 7789 // FIXME 7790 /+ 7791 // extensions not implemented 7792 //case "text": // takes the text in the element and wraps it in an element, returning it 7793 +/ 7794 goto case; 7795 case "before", "after": 7796 current.attributesPresent ~= "FIXME"; 7797 7798 break; 7799 // My extensions 7800 case "odd-child": 7801 current.oddChild = true; 7802 break; 7803 case "even-child": 7804 current.evenChild = true; 7805 break; 7806 default: 7807 //if(token.indexOf("lang") == -1) 7808 //assert(0, token); 7809 break; 7810 } 7811 state = State.Starting; 7812 break; 7813 case State.SkippingFunctionalSelector: 7814 if(token == "(") { 7815 parensCount++; 7816 } else if(token == ")") { 7817 parensCount--; 7818 } 7819 7820 if(parensCount == 0) 7821 state = State.Starting; 7822 break; 7823 case State.ReadingAttributeSelector: 7824 attributeName = token; 7825 attributeComparison = null; 7826 attributeValue = null; 7827 state = State.ReadingAttributeComparison; 7828 break; 7829 case State.ReadingAttributeComparison: 7830 // FIXME: these things really should be quotable in the proper lexer... 7831 if(token != "]") { 7832 if(token.indexOf("=") == -1) { 7833 // not a comparison; consider it 7834 // part of the attribute 7835 attributeValue ~= token; 7836 } else { 7837 attributeComparison = token; 7838 state = State.ReadingAttributeValue; 7839 } 7840 break; 7841 } 7842 goto case; 7843 case State.ExpectingAttributeCloser: 7844 if(token != "]") { 7845 // not the closer; consider it part of comparison 7846 if(attributeComparison == "") 7847 attributeName ~= token; 7848 else 7849 attributeValue ~= token; 7850 break; 7851 } 7852 7853 // Selector operators 7854 switch(attributeComparison) { 7855 default: assert(0); 7856 case "": 7857 current.attributesPresent ~= attributeName; 7858 break; 7859 case "=": 7860 current.attributesEqual ~= [attributeName, attributeValue]; 7861 break; 7862 case "|=": 7863 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7864 break; 7865 case "~=": 7866 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7867 break; 7868 case "$=": 7869 current.attributesEndsWith ~= [attributeName, attributeValue]; 7870 break; 7871 case "^=": 7872 current.attributesStartsWith ~= [attributeName, attributeValue]; 7873 break; 7874 case "*=": 7875 current.attributesInclude ~= [attributeName, attributeValue]; 7876 break; 7877 case "!=": 7878 current.attributesNotEqual ~= [attributeName, attributeValue]; 7879 break; 7880 } 7881 7882 state = State.Starting; 7883 break; 7884 case State.ReadingAttributeValue: 7885 attributeValue = token; 7886 state = State.ExpectingAttributeCloser; 7887 break; 7888 } 7889 } 7890 7891 commit(); 7892 7893 return s; 7894 } 7895 7896 ///. 7897 Element[] removeDuplicates(Element[] input) { 7898 Element[] ret; 7899 7900 bool[Element] already; 7901 foreach(e; input) { 7902 if(e in already) continue; 7903 already[e] = true; 7904 ret ~= e; 7905 } 7906 7907 return ret; 7908 } 7909 7910 // done with CSS selector handling 7911 7912 /++ 7913 This delegate is called if you call [Element.computedStyle] to attach an object to the element 7914 that holds stylesheet information. You can rebind it to something else to return a subclass 7915 if you want to hold more per-element extension data than the normal computed style object holds 7916 (e.g. layout info as well). 7917 7918 The default is `return new CssStyle(null, element.style);` 7919 7920 History: 7921 Added September 13, 2024 (dub v11.6) 7922 +/ 7923 CssStyle function(Element e) computedStyleFactory = &defaultComputedStyleFactory; 7924 7925 /// ditto 7926 CssStyle defaultComputedStyleFactory(Element e) { 7927 return new CssStyle(null, e.style); // gives at least something to work with 7928 } 7929 7930 7931 // FIXME: use the better parser from html.d 7932 /// This is probably not useful to you unless you're writing a browser or something like that. 7933 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7934 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7935 class CssStyle { 7936 ///. 7937 this(string rule, string content) { 7938 rule = rule.strip(); 7939 content = content.strip(); 7940 7941 if(content.length == 0) 7942 return; 7943 7944 originatingRule = rule; 7945 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7946 7947 foreach(part; content.split(";")) { 7948 part = part.strip(); 7949 if(part.length == 0) 7950 continue; 7951 auto idx = part.indexOf(":"); 7952 if(idx == -1) 7953 continue; 7954 //throw new Exception("Bad css rule (no colon): " ~ part); 7955 7956 Property p; 7957 7958 p.name = part[0 .. idx].strip(); 7959 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7960 p.givenExplicitly = true; 7961 p.specificity = originatingSpecificity; 7962 7963 properties ~= p; 7964 7965 } 7966 7967 foreach(property; properties) 7968 expandShortForm(property, originatingSpecificity); 7969 } 7970 7971 ///. 7972 Specificity getSpecificityOfRule(string rule) { 7973 Specificity s; 7974 if(rule.length == 0) { // inline 7975 s.important = 2; 7976 } else { 7977 // SO. WRONG. 7978 foreach(ch; rule) { 7979 if(ch == '.') 7980 s.classes++; 7981 if(ch == '#') 7982 s.ids++; 7983 if(ch == ' ') 7984 s.tags++; 7985 if(ch == ',') 7986 break; 7987 } 7988 // FIXME 7989 } 7990 7991 return s; 7992 } 7993 7994 string originatingRule; ///. 7995 Specificity originatingSpecificity; ///. 7996 7997 ///. 7998 union Specificity { 7999 uint score; ///. 8000 // version(little_endian) 8001 ///. 8002 struct { 8003 ubyte tags; ///. 8004 ubyte classes; ///. 8005 ubyte ids; ///. 8006 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8007 } 8008 } 8009 8010 ///. 8011 struct Property { 8012 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8013 string name; ///. 8014 string value; ///. 8015 Specificity specificity; ///. 8016 // do we care about the original source rule? 8017 } 8018 8019 ///. 8020 Property[] properties; 8021 8022 ///. 8023 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8024 string name = unCamelCase(nameGiven); 8025 if(value is null) 8026 return getValue(name); 8027 else 8028 return setValue(name, value, Specificity(0x02000000) /* inline specificity */); 8029 } 8030 8031 /// takes dash style name 8032 string getValue(string name) { 8033 foreach(property; properties) 8034 if(property.name == name) 8035 return property.value; 8036 return null; 8037 } 8038 8039 /// takes dash style name 8040 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8041 value = value.replace("! important", "!important"); 8042 if(value.indexOf("!important") != -1) { 8043 newSpecificity.important = 1; // FIXME 8044 value = value.replace("!important", "").strip(); 8045 } 8046 8047 foreach(ref property; properties) 8048 if(property.name == name) { 8049 if(newSpecificity.score >= property.specificity.score) { 8050 property.givenExplicitly = explicit; 8051 expandShortForm(property, newSpecificity); 8052 property.specificity = newSpecificity; 8053 return (property.value = value); 8054 } else { 8055 if(name == "display") 8056 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8057 return value; // do nothing - the specificity is too low 8058 } 8059 } 8060 8061 // it's not here... 8062 8063 Property p; 8064 p.givenExplicitly = true; 8065 p.name = name; 8066 p.value = value; 8067 p.specificity = originatingSpecificity; 8068 8069 properties ~= p; 8070 expandShortForm(p, originatingSpecificity); 8071 8072 return value; 8073 } 8074 8075 private void expandQuadShort(string name, string value, Specificity specificity) { 8076 auto parts = value.split(" "); 8077 switch(parts.length) { 8078 case 1: 8079 setValue(name ~"-left", parts[0], specificity, false); 8080 setValue(name ~"-right", parts[0], specificity, false); 8081 setValue(name ~"-top", parts[0], specificity, false); 8082 setValue(name ~"-bottom", parts[0], specificity, false); 8083 break; 8084 case 2: 8085 setValue(name ~"-left", parts[1], specificity, false); 8086 setValue(name ~"-right", parts[1], specificity, false); 8087 setValue(name ~"-top", parts[0], specificity, false); 8088 setValue(name ~"-bottom", parts[0], specificity, false); 8089 break; 8090 case 3: 8091 setValue(name ~"-top", parts[0], specificity, false); 8092 setValue(name ~"-right", parts[1], specificity, false); 8093 setValue(name ~"-bottom", parts[2], specificity, false); 8094 setValue(name ~"-left", parts[2], specificity, false); 8095 8096 break; 8097 case 4: 8098 setValue(name ~"-top", parts[0], specificity, false); 8099 setValue(name ~"-right", parts[1], specificity, false); 8100 setValue(name ~"-bottom", parts[2], specificity, false); 8101 setValue(name ~"-left", parts[3], specificity, false); 8102 break; 8103 default: 8104 // assert(0, value); 8105 } 8106 } 8107 8108 ///. 8109 void expandShortForm(Property p, Specificity specificity) { 8110 switch(p.name) { 8111 case "margin": 8112 case "padding": 8113 expandQuadShort(p.name, p.value, specificity); 8114 break; 8115 case "border": 8116 case "outline": 8117 setValue(p.name ~ "-left", p.value, specificity, false); 8118 setValue(p.name ~ "-right", p.value, specificity, false); 8119 setValue(p.name ~ "-top", p.value, specificity, false); 8120 setValue(p.name ~ "-bottom", p.value, specificity, false); 8121 break; 8122 8123 case "border-top": 8124 case "border-bottom": 8125 case "border-left": 8126 case "border-right": 8127 case "outline-top": 8128 case "outline-bottom": 8129 case "outline-left": 8130 case "outline-right": 8131 8132 default: {} 8133 } 8134 } 8135 8136 ///. 8137 override string toString() { 8138 string ret; 8139 if(originatingRule.length) 8140 ret = originatingRule ~ " {"; 8141 8142 foreach(property; properties) { 8143 if(!property.givenExplicitly) 8144 continue; // skip the inferred shit 8145 8146 if(originatingRule.length) 8147 ret ~= "\n\t"; 8148 else 8149 ret ~= " "; 8150 8151 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8152 } 8153 8154 if(originatingRule.length) 8155 ret ~= "\n}\n"; 8156 8157 return ret; 8158 } 8159 } 8160 8161 string cssUrl(string url) { 8162 return "url(\"" ~ url ~ "\")"; 8163 } 8164 8165 /// This probably isn't useful, unless you're writing a browser or something like that. 8166 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8167 /// as text. 8168 /// 8169 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8170 /// that you can apply to your documents to build the complete computedStyle object. 8171 class StyleSheet { 8172 ///. 8173 CssStyle[] rules; 8174 8175 ///. 8176 this(string source) { 8177 // FIXME: handle @ rules and probably could improve lexer 8178 // add nesting? 8179 int state; 8180 string currentRule; 8181 string currentValue; 8182 8183 string* currentThing = ¤tRule; 8184 foreach(c; source) { 8185 handle: switch(state) { 8186 default: assert(0); 8187 case 0: // starting - we assume we're reading a rule 8188 switch(c) { 8189 case '@': 8190 state = 4; 8191 break; 8192 case '/': 8193 state = 1; 8194 break; 8195 case '{': 8196 currentThing = ¤tValue; 8197 break; 8198 case '}': 8199 if(currentThing is ¤tValue) { 8200 rules ~= new CssStyle(currentRule, currentValue); 8201 8202 currentRule = ""; 8203 currentValue = ""; 8204 8205 currentThing = ¤tRule; 8206 } else { 8207 // idk what is going on here. 8208 // check sveit.com to reproduce 8209 currentRule = ""; 8210 currentValue = ""; 8211 } 8212 break; 8213 default: 8214 (*currentThing) ~= c; 8215 } 8216 break; 8217 case 1: // expecting * 8218 if(c == '*') 8219 state = 2; 8220 else { 8221 state = 0; 8222 (*currentThing) ~= "/" ~ c; 8223 } 8224 break; 8225 case 2: // inside comment 8226 if(c == '*') 8227 state = 3; 8228 break; 8229 case 3: // expecting / to end comment 8230 if(c == '/') 8231 state = 0; 8232 else 8233 state = 2; // it's just a comment so no need to append 8234 break; 8235 case 4: 8236 if(c == '{') 8237 state = 5; 8238 if(c == ';') 8239 state = 0; // just skipping import 8240 break; 8241 case 5: 8242 if(c == '}') 8243 state = 0; // skipping font face probably 8244 } 8245 } 8246 } 8247 8248 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8249 void apply(Document document) { 8250 foreach(rule; rules) { 8251 if(rule.originatingRule.length == 0) 8252 continue; // this shouldn't happen here in a stylesheet 8253 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8254 // note: this should be a different object than the inline style 8255 // since givenExplicitly is likely destroyed here 8256 auto current = element.computedStyle; 8257 8258 foreach(item; rule.properties) { 8259 current.setValue(item.name, item.value, item.specificity); 8260 } 8261 } 8262 } 8263 } 8264 } 8265 8266 8267 /// This is kinda private; just a little utility container for use by the ElementStream class. 8268 final class Stack(T) { 8269 this() { 8270 internalLength = 0; 8271 arr = initialBuffer[]; 8272 } 8273 8274 ///. 8275 void push(T t) { 8276 if(internalLength >= arr.length) { 8277 auto oldarr = arr; 8278 if(arr.length < 4096) 8279 arr = new T[arr.length * 2]; 8280 else 8281 arr = new T[arr.length + 4096]; 8282 arr[0 .. oldarr.length] = oldarr[]; 8283 } 8284 8285 arr[internalLength] = t; 8286 internalLength++; 8287 } 8288 8289 ///. 8290 T pop() { 8291 assert(internalLength); 8292 internalLength--; 8293 return arr[internalLength]; 8294 } 8295 8296 ///. 8297 T peek() { 8298 assert(internalLength); 8299 return arr[internalLength - 1]; 8300 } 8301 8302 ///. 8303 @property bool empty() { 8304 return internalLength ? false : true; 8305 } 8306 8307 ///. 8308 private T[] arr; 8309 private size_t internalLength; 8310 private T[64] initialBuffer; 8311 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8312 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8313 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8314 } 8315 8316 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8317 final class ElementStream { 8318 8319 ///. 8320 @property Element front() { 8321 return current.element; 8322 } 8323 8324 /// Use Element.tree instead. 8325 this(Element start) { 8326 current.element = start; 8327 current.childPosition = -1; 8328 isEmpty = false; 8329 stack = new Stack!(Current); 8330 } 8331 8332 /* 8333 Handle it 8334 handle its children 8335 8336 */ 8337 8338 ///. 8339 void popFront() { 8340 more: 8341 if(isEmpty) return; 8342 8343 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8344 8345 current.childPosition++; 8346 if(current.childPosition >= current.element.children.length) { 8347 if(stack.empty()) 8348 isEmpty = true; 8349 else { 8350 current = stack.pop(); 8351 goto more; 8352 } 8353 } else { 8354 stack.push(current); 8355 current.element = current.element.children[current.childPosition]; 8356 current.childPosition = -1; 8357 } 8358 } 8359 8360 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8361 void currentKilled() { 8362 if(stack.empty) // should never happen 8363 isEmpty = true; 8364 else { 8365 current = stack.pop(); 8366 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8367 } 8368 } 8369 8370 ///. 8371 @property bool empty() { 8372 return isEmpty; 8373 } 8374 8375 private: 8376 8377 struct Current { 8378 Element element; 8379 int childPosition; 8380 } 8381 8382 Current current; 8383 8384 Stack!(Current) stack; 8385 8386 bool isEmpty; 8387 } 8388 8389 8390 8391 // unbelievable. 8392 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8393 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8394 foreach(idx, b; haystack) { 8395 if(idx + needle.length > haystack.length) 8396 return -1; 8397 if(haystack[idx .. idx + needle.length] == needle[]) 8398 return idx; 8399 } 8400 return -1; 8401 } 8402 8403 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8404 assert(position < arr.length); 8405 T[] ret; 8406 ret.length = arr.length + what.length; 8407 int a = 0; 8408 foreach(i; arr[0..position+1]) 8409 ret[a++] = i; 8410 8411 foreach(i; what) 8412 ret[a++] = i; 8413 8414 foreach(i; arr[position+1..$]) 8415 ret[a++] = i; 8416 8417 return ret; 8418 } 8419 8420 package bool isInArray(T)(T item, T[] arr) { 8421 foreach(i; arr) 8422 if(item == i) 8423 return true; 8424 return false; 8425 } 8426 8427 private string[string] aadup(in string[string] arr) { 8428 string[string] ret; 8429 foreach(k, v; arr) 8430 ret[k] = v; 8431 return ret; 8432 } 8433 8434 private AttributesHolder aadup(const AttributesHolder arr) { 8435 AttributesHolder ret; 8436 foreach(k, v; arr) 8437 ret[k] = v; 8438 return ret; 8439 } 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 // These MUST be sorted. See generatedomcases.d for a program to generate it if you need to add more than a few (otherwise maybe you can work it in yourself but yikes) 8456 8457 immutable string[] availableEntities = 8458 ["AElig", "AElig", "AMP", "AMP", "Aacute", "Aacute", "Abreve", "Abreve", "Acirc", "Acirc", "Acy", "Acy", "Afr", "Afr", "Agrave", "Agrave", "Alpha", "Alpha", "Amacr", "Amacr", "And", "And", "Aogon", "Aogon", "Aopf", "Aopf", "ApplyFunction", "ApplyFunction", "Aring", "Aring", "Ascr", "Ascr", "Assign", "Assign", "Atilde", 8459 "Atilde", "Auml", "Auml", "Backslash", "Backslash", "Barv", "Barv", "Barwed", "Barwed", "Bcy", "Bcy", "Because", "Because", "Bernoullis", "Bernoullis", "Beta", "Beta", "Bfr", "Bfr", "Bopf", "Bopf", "Breve", "Breve", "Bscr", "Bscr", "Bumpeq", "Bumpeq", "CHcy", "CHcy", "COPY", "COPY", "Cacute", "Cacute", "Cap", "Cap", "CapitalDifferentialD", 8460 "CapitalDifferentialD", "Cayleys", "Cayleys", "Ccaron", "Ccaron", "Ccedil", "Ccedil", "Ccirc", "Ccirc", "Cconint", "Cconint", "Cdot", "Cdot", "Cedilla", "Cedilla", "CenterDot", "CenterDot", "Cfr", "Cfr", "Chi", "Chi", "CircleDot", "CircleDot", "CircleMinus", "CircleMinus", "CirclePlus", "CirclePlus", "CircleTimes", "CircleTimes", 8461 "ClockwiseContourIntegral", "ClockwiseContourIntegral", "CloseCurlyDoubleQuote", "CloseCurlyDoubleQuote", "CloseCurlyQuote", "CloseCurlyQuote", "Colon", "Colon", "Colone", "Colone", "Congruent", "Congruent", "Conint", "Conint", "ContourIntegral", "ContourIntegral", "Copf", "Copf", "Coproduct", "Coproduct", "CounterClockwiseContourIntegral", 8462 "CounterClockwiseContourIntegral", "Cross", "Cross", "Cscr", "Cscr", "Cup", "Cup", "CupCap", "CupCap", "DD", "DD", "DDotrahd", "DDotrahd", "DJcy", "DJcy", "DScy", "DScy", "DZcy", "DZcy", "Dagger", "Dagger", "Darr", "Darr", "Dashv", "Dashv", "Dcaron", "Dcaron", "Dcy", "Dcy", "Del", "Del", "Delta", "Delta", "Dfr", "Dfr", 8463 "DiacriticalAcute", "DiacriticalAcute", "DiacriticalDot", "DiacriticalDot", "DiacriticalDoubleAcute", "DiacriticalDoubleAcute", "DiacriticalGrave", "DiacriticalGrave", "DiacriticalTilde", "DiacriticalTilde", "Diamond", "Diamond", "DifferentialD", "DifferentialD", "Dopf", "Dopf", "Dot", "Dot", "DotDot", "DotDot", "DotEqual", 8464 "DotEqual", "DoubleContourIntegral", "DoubleContourIntegral", "DoubleDot", "DoubleDot", "DoubleDownArrow", "DoubleDownArrow", "DoubleLeftArrow", "DoubleLeftArrow", "DoubleLeftRightArrow", "DoubleLeftRightArrow", "DoubleLeftTee", "DoubleLeftTee", "DoubleLongLeftArrow", "DoubleLongLeftArrow", "DoubleLongLeftRightArrow", 8465 "DoubleLongLeftRightArrow", "DoubleLongRightArrow", "DoubleLongRightArrow", "DoubleRightArrow", "DoubleRightArrow", "DoubleRightTee", "DoubleRightTee", "DoubleUpArrow", "DoubleUpArrow", "DoubleUpDownArrow", "DoubleUpDownArrow", "DoubleVerticalBar", "DoubleVerticalBar", "DownArrow", "DownArrow", "DownArrowBar", "DownArrowBar", 8466 "DownArrowUpArrow", "DownArrowUpArrow", "DownBreve", "DownBreve", "DownLeftRightVector", "DownLeftRightVector", "DownLeftTeeVector", "DownLeftTeeVector", "DownLeftVector", "DownLeftVector", "DownLeftVectorBar", "DownLeftVectorBar", "DownRightTeeVector", "DownRightTeeVector", "DownRightVector", "DownRightVector", "DownRightVectorBar", 8467 "DownRightVectorBar", "DownTee", "DownTee", "DownTeeArrow", "DownTeeArrow", "Downarrow", "Downarrow", "Dscr", "Dscr", "Dstrok", "Dstrok", "ENG", "ENG", "ETH", "ETH", "Eacute", "Eacute", "Ecaron", "Ecaron", "Ecirc", "Ecirc", "Ecy", "Ecy", "Edot", "Edot", "Efr", "Efr", "Egrave", "Egrave", "Element", "Element", "Emacr", "Emacr", 8468 "EmptySmallSquare", "EmptySmallSquare", "EmptyVerySmallSquare", "EmptyVerySmallSquare", "Eogon", "Eogon", "Eopf", "Eopf", "Epsilon", "Epsilon", "Equal", "Equal", "EqualTilde", "EqualTilde", "Equilibrium", "Equilibrium", "Escr", "Escr", "Esim", "Esim", "Eta", "Eta", "Euml", "Euml", "Exists", "Exists", "ExponentialE", "ExponentialE", 8469 "Fcy", "Fcy", "Ffr", "Ffr", "FilledSmallSquare", "FilledSmallSquare", "FilledVerySmallSquare", "FilledVerySmallSquare", "Fopf", "Fopf", "ForAll", "ForAll", "Fouriertrf", "Fouriertrf", "Fscr", "Fscr", "GJcy", "GJcy", "GT", "GT", "Gamma", "Gamma", "Gammad", "Gammad", "Gbreve", "Gbreve", "Gcedil", "Gcedil", "Gcirc", "Gcirc", 8470 "Gcy", "Gcy", "Gdot", "Gdot", "Gfr", "Gfr", "Gg", "Gg", "Gopf", "Gopf", "GreaterEqual", "GreaterEqual", "GreaterEqualLess", "GreaterEqualLess", "GreaterFullEqual", "GreaterFullEqual", "GreaterGreater", "GreaterGreater", "GreaterLess", "GreaterLess", "GreaterSlantEqual", "GreaterSlantEqual", "GreaterTilde", "GreaterTilde", 8471 "Gscr", "Gscr", "Gt", "Gt", "HARDcy", "HARDcy", "Hacek", "Hacek", "Hat", "Hat", "Hcirc", "Hcirc", "Hfr", "Hfr", "HilbertSpace", "HilbertSpace", "Hopf", "Hopf", "HorizontalLine", "HorizontalLine", "Hscr", "Hscr", "Hstrok", "Hstrok", "HumpDownHump", "HumpDownHump", "HumpEqual", "HumpEqual", "IEcy", "IEcy", "IJlig", "IJlig", 8472 "IOcy", "IOcy", "Iacute", "Iacute", "Icirc", "Icirc", "Icy", "Icy", "Idot", "Idot", "Ifr", "Ifr", "Igrave", "Igrave", "Im", "Im", "Imacr", "Imacr", "ImaginaryI", "ImaginaryI", "Implies", "Implies", "Int", "Int", "Integral", "Integral", "Intersection", "Intersection", "InvisibleComma", "InvisibleComma", "InvisibleTimes", 8473 "InvisibleTimes", "Iogon", "Iogon", "Iopf", "Iopf", "Iota", "Iota", "Iscr", "Iscr", "Itilde", "Itilde", "Iukcy", "Iukcy", "Iuml", "Iuml", "Jcirc", "Jcirc", "Jcy", "Jcy", "Jfr", "Jfr", "Jopf", "Jopf", "Jscr", "Jscr", "Jsercy", "Jsercy", "Jukcy", "Jukcy", "KHcy", "KHcy", "KJcy", "KJcy", "Kappa", "Kappa", "Kcedil", "Kcedil", 8474 "Kcy", "Kcy", "Kfr", "Kfr", "Kopf", "Kopf", "Kscr", "Kscr", "LJcy", "LJcy", "LT", "LT", "Lacute", "Lacute", "Lambda", "Lambda", "Lang", "Lang", "Laplacetrf", "Laplacetrf", "Larr", "Larr", "Lcaron", "Lcaron", "Lcedil", "Lcedil", "Lcy", "Lcy", "LeftAngleBracket", "LeftAngleBracket", "LeftArrow", "LeftArrow", "LeftArrowBar", 8475 "LeftArrowBar", "LeftArrowRightArrow", "LeftArrowRightArrow", "LeftCeiling", "LeftCeiling", "LeftDoubleBracket", "LeftDoubleBracket", "LeftDownTeeVector", "LeftDownTeeVector", "LeftDownVector", "LeftDownVector", "LeftDownVectorBar", "LeftDownVectorBar", "LeftFloor", "LeftFloor", "LeftRightArrow", "LeftRightArrow", "LeftRightVector", 8476 "LeftRightVector", "LeftTee", "LeftTee", "LeftTeeArrow", "LeftTeeArrow", "LeftTeeVector", "LeftTeeVector", "LeftTriangle", "LeftTriangle", "LeftTriangleBar", "LeftTriangleBar", "LeftTriangleEqual", "LeftTriangleEqual", "LeftUpDownVector", "LeftUpDownVector", "LeftUpTeeVector", "LeftUpTeeVector", "LeftUpVector", "LeftUpVector", 8477 "LeftUpVectorBar", "LeftUpVectorBar", "LeftVector", "LeftVector", "LeftVectorBar", "LeftVectorBar", "Leftarrow", "Leftarrow", "Leftrightarrow", "Leftrightarrow", "LessEqualGreater", "LessEqualGreater", "LessFullEqual", "LessFullEqual", "LessGreater", "LessGreater", "LessLess", "LessLess", "LessSlantEqual", "LessSlantEqual", 8478 "LessTilde", "LessTilde", "Lfr", "Lfr", "Ll", "Ll", "Lleftarrow", "Lleftarrow", "Lmidot", "Lmidot", "LongLeftArrow", "LongLeftArrow", "LongLeftRightArrow", "LongLeftRightArrow", "LongRightArrow", "LongRightArrow", "Longleftarrow", "Longleftarrow", "Longleftrightarrow", "Longleftrightarrow", "Longrightarrow", "Longrightarrow", 8479 "Lopf", "Lopf", "LowerLeftArrow", "LowerLeftArrow", "LowerRightArrow", "LowerRightArrow", "Lscr", "Lscr", "Lsh", "Lsh", "Lstrok", "Lstrok", "Lt", "Lt", "Map", "Map", "Mcy", "Mcy", "MediumSpace", "MediumSpace", "Mellintrf", "Mellintrf", "Mfr", "Mfr", "MinusPlus", "MinusPlus", "Mopf", "Mopf", "Mscr", "Mscr", "Mu", "Mu", 8480 "NJcy", "NJcy", "Nacute", "Nacute", "Ncaron", "Ncaron", "Ncedil", "Ncedil", "Ncy", "Ncy", "NegativeMediumSpace", "NegativeMediumSpace", "NegativeThickSpace", "NegativeThickSpace", "NegativeThinSpace", "NegativeThinSpace", "NegativeVeryThinSpace", "NegativeVeryThinSpace", "NestedGreaterGreater", "NestedGreaterGreater", 8481 "NestedLessLess", "NestedLessLess", "NewLine", "NewLine", "Nfr", "Nfr", "NoBreak", "NoBreak", "NonBreakingSpace", "NonBreakingSpace", "Nopf", "Nopf", "Not", "Not", "NotCongruent", "NotCongruent", "NotCupCap", "NotCupCap", "NotDoubleVerticalBar", "NotDoubleVerticalBar", "NotElement", "NotElement", "NotEqual", "NotEqual", 8482 "NotExists", "NotExists", "NotGreater", "NotGreater", "NotGreaterEqual", "NotGreaterEqual", "NotGreaterLess", "NotGreaterLess", "NotGreaterTilde", "NotGreaterTilde", "NotLeftTriangle", "NotLeftTriangle", "NotLeftTriangleEqual", "NotLeftTriangleEqual", "NotLess", "NotLess", "NotLessEqual", "NotLessEqual", "NotLessGreater", 8483 "NotLessGreater", "NotLessTilde", "NotLessTilde", "NotPrecedes", "NotPrecedes", "NotPrecedesSlantEqual", "NotPrecedesSlantEqual", "NotReverseElement", "NotReverseElement", "NotRightTriangle", "NotRightTriangle", "NotRightTriangleEqual", "NotRightTriangleEqual", "NotSquareSubsetEqual", "NotSquareSubsetEqual", "NotSquareSupersetEqual", 8484 "NotSquareSupersetEqual", "NotSubsetEqual", "NotSubsetEqual", "NotSucceeds", "NotSucceeds", "NotSucceedsSlantEqual", "NotSucceedsSlantEqual", "NotSupersetEqual", "NotSupersetEqual", "NotTilde", "NotTilde", "NotTildeEqual", "NotTildeEqual", "NotTildeFullEqual", "NotTildeFullEqual", "NotTildeTilde", "NotTildeTilde", "NotVerticalBar", 8485 "NotVerticalBar", "Nscr", "Nscr", "Ntilde", "Ntilde", "Nu", "Nu", "OElig", "OElig", "Oacute", "Oacute", "Ocirc", "Ocirc", "Ocy", "Ocy", "Odblac", "Odblac", "Ofr", "Ofr", "Ograve", "Ograve", "Omacr", "Omacr", "Omega", "Omega", "Omicron", "Omicron", "Oopf", "Oopf", "OpenCurlyDoubleQuote", "OpenCurlyDoubleQuote", "OpenCurlyQuote", 8486 "OpenCurlyQuote", "Or", "Or", "Oscr", "Oscr", "Oslash", "Oslash", "Otilde", "Otilde", "Otimes", "Otimes", "Ouml", "Ouml", "OverBar", "OverBar", "OverBrace", "OverBrace", "OverBracket", "OverBracket", "OverParenthesis", "OverParenthesis", "PartialD", "PartialD", "Pcy", "Pcy", "Pfr", "Pfr", "Phi", "Phi", "Pi", "Pi", "PlusMinus", 8487 "PlusMinus", "Poincareplane", "Poincareplane", "Popf", "Popf", "Pr", "Pr", "Precedes", "Precedes", "PrecedesEqual", "PrecedesEqual", "PrecedesSlantEqual", "PrecedesSlantEqual", "PrecedesTilde", "PrecedesTilde", "Prime", "Prime", "Product", "Product", "Proportion", "Proportion", "Proportional", "Proportional", "Pscr", "Pscr", 8488 "Psi", "Psi", "QUOT", "QUOT", "Qfr", "Qfr", "Qopf", "Qopf", "Qscr", "Qscr", "RBarr", "RBarr", "REG", "REG", "Racute", "Racute", "Rang", "Rang", "Rarr", "Rarr", "Rarrtl", "Rarrtl", "Rcaron", "Rcaron", "Rcedil", "Rcedil", "Rcy", "Rcy", "Re", "Re", "ReverseElement", "ReverseElement", "ReverseEquilibrium", "ReverseEquilibrium", 8489 "ReverseUpEquilibrium", "ReverseUpEquilibrium", "Rfr", "Rfr", "Rho", "Rho", "RightAngleBracket", "RightAngleBracket", "RightArrow", "RightArrow", "RightArrowBar", "RightArrowBar", "RightArrowLeftArrow", "RightArrowLeftArrow", "RightCeiling", "RightCeiling", "RightDoubleBracket", "RightDoubleBracket", "RightDownTeeVector", 8490 "RightDownTeeVector", "RightDownVector", "RightDownVector", "RightDownVectorBar", "RightDownVectorBar", "RightFloor", "RightFloor", "RightTee", "RightTee", "RightTeeArrow", "RightTeeArrow", "RightTeeVector", "RightTeeVector", "RightTriangle", "RightTriangle", "RightTriangleBar", "RightTriangleBar", "RightTriangleEqual", 8491 "RightTriangleEqual", "RightUpDownVector", "RightUpDownVector", "RightUpTeeVector", "RightUpTeeVector", "RightUpVector", "RightUpVector", "RightUpVectorBar", "RightUpVectorBar", "RightVector", "RightVector", "RightVectorBar", "RightVectorBar", "Rightarrow", "Rightarrow", "Ropf", "Ropf", "RoundImplies", "RoundImplies", 8492 "Rrightarrow", "Rrightarrow", "Rscr", "Rscr", "Rsh", "Rsh", "RuleDelayed", "RuleDelayed", "SHCHcy", "SHCHcy", "SHcy", "SHcy", "SOFTcy", "SOFTcy", "Sacute", "Sacute", "Sc", "Sc", "Scaron", "Scaron", "Scedil", "Scedil", "Scirc", "Scirc", "Scy", "Scy", "Sfr", "Sfr", "ShortDownArrow", "ShortDownArrow", "ShortLeftArrow", "ShortLeftArrow", 8493 "ShortRightArrow", "ShortRightArrow", "ShortUpArrow", "ShortUpArrow", "Sigma", "Sigma", "SmallCircle", "SmallCircle", "Sopf", "Sopf", "Sqrt", "Sqrt", "Square", "Square", "SquareIntersection", "SquareIntersection", "SquareSubset", "SquareSubset", "SquareSubsetEqual", "SquareSubsetEqual", "SquareSuperset", "SquareSuperset", 8494 "SquareSupersetEqual", "SquareSupersetEqual", "SquareUnion", "SquareUnion", "Sscr", "Sscr", "Star", "Star", "Sub", "Sub", "Subset", "Subset", "SubsetEqual", "SubsetEqual", "Succeeds", "Succeeds", "SucceedsEqual", "SucceedsEqual", "SucceedsSlantEqual", "SucceedsSlantEqual", "SucceedsTilde", "SucceedsTilde", "SuchThat", 8495 "SuchThat", "Sum", "Sum", "Sup", "Sup", "Superset", "Superset", "SupersetEqual", "SupersetEqual", "Supset", "Supset", "THORN", "THORN", "TRADE", "TRADE", "TSHcy", "TSHcy", "TScy", "TScy", "Tab", "Tab", "Tau", "Tau", "Tcaron", "Tcaron", "Tcedil", "Tcedil", "Tcy", "Tcy", "Tfr", "Tfr", "Therefore", "Therefore", "Theta", "Theta", 8496 "ThinSpace", "ThinSpace", "Tilde", "Tilde", "TildeEqual", "TildeEqual", "TildeFullEqual", "TildeFullEqual", "TildeTilde", "TildeTilde", "Topf", "Topf", "TripleDot", "TripleDot", "Tscr", "Tscr", "Tstrok", "Tstrok", "Uacute", "Uacute", "Uarr", "Uarr", "Uarrocir", "Uarrocir", "Ubrcy", "Ubrcy", "Ubreve", "Ubreve", "Ucirc", 8497 "Ucirc", "Ucy", "Ucy", "Udblac", "Udblac", "Ufr", "Ufr", "Ugrave", "Ugrave", "Umacr", "Umacr", "UnderBar", "UnderBar", "UnderBrace", "UnderBrace", "UnderBracket", "UnderBracket", "UnderParenthesis", "UnderParenthesis", "Union", "Union", "UnionPlus", "UnionPlus", "Uogon", "Uogon", "Uopf", "Uopf", "UpArrow", "UpArrow", "UpArrowBar", 8498 "UpArrowBar", "UpArrowDownArrow", "UpArrowDownArrow", "UpDownArrow", "UpDownArrow", "UpEquilibrium", "UpEquilibrium", "UpTee", "UpTee", "UpTeeArrow", "UpTeeArrow", "Uparrow", "Uparrow", "Updownarrow", "Updownarrow", "UpperLeftArrow", "UpperLeftArrow", "UpperRightArrow", "UpperRightArrow", "Upsi", "Upsi", "Upsilon", "Upsilon", 8499 "Uring", "Uring", "Uscr", "Uscr", "Utilde", "Utilde", "Uuml", "Uuml", "VDash", "VDash", "Vbar", "Vbar", "Vcy", "Vcy", "Vdash", "Vdash", "Vdashl", "Vdashl", "Vee", "Vee", "Verbar", "Verbar", "Vert", "Vert", "VerticalBar", "VerticalBar", "VerticalLine", "VerticalLine", "VerticalSeparator", "VerticalSeparator", "VerticalTilde", 8500 "VerticalTilde", "VeryThinSpace", "VeryThinSpace", "Vfr", "Vfr", "Vopf", "Vopf", "Vscr", "Vscr", "Vvdash", "Vvdash", "Wcirc", "Wcirc", "Wedge", "Wedge", "Wfr", "Wfr", "Wopf", "Wopf", "Wscr", "Wscr", "Xfr", "Xfr", "Xi", "Xi", "Xopf", "Xopf", "Xscr", "Xscr", "YAcy", "YAcy", "YIcy", "YIcy", "YUcy", "YUcy", "Yacute", "Yacute", 8501 "Ycirc", "Ycirc", "Ycy", "Ycy", "Yfr", "Yfr", "Yopf", "Yopf", "Yscr", "Yscr", "Yuml", "Yuml", "ZHcy", "ZHcy", "Zacute", "Zacute", "Zcaron", "Zcaron", "Zcy", "Zcy", "Zdot", "Zdot", "ZeroWidthSpace", "ZeroWidthSpace", "Zeta", "Zeta", "Zfr", "Zfr", "Zopf", "Zopf", "Zscr", "Zscr", "aacute", "aacute", "abreve", "abreve", "ac", 8502 "ac", "acd", "acd", "acirc", "acirc", "acute", "acute", "acy", "acy", "aelig", "aelig", "af", "af", "afr", "afr", "agrave", "agrave", "alefsym", "alefsym", "aleph", "aleph", "alpha", "alpha", "amacr", "amacr", "amalg", "amalg", "and", "and", "andand", "andand", "andd", "andd", "andslope", "andslope", "andv", "andv", "ang", 8503 "ang", "ange", "ange", "angle", "angle", "angmsd", "angmsd", "angmsdaa", "angmsdaa", "angmsdab", "angmsdab", "angmsdac", "angmsdac", "angmsdad", "angmsdad", "angmsdae", "angmsdae", "angmsdaf", "angmsdaf", "angmsdag", "angmsdag", "angmsdah", "angmsdah", "angrt", "angrt", "angrtvb", "angrtvb", "angrtvbd", "angrtvbd", "angsph", 8504 "angsph", "angst", "angst", "angzarr", "angzarr", "aogon", "aogon", "aopf", "aopf", "ap", "ap", "apE", "apE", "apacir", "apacir", "ape", "ape", "apid", "apid", "approx", "approx", "approxeq", "approxeq", "aring", "aring", "ascr", "ascr", "ast", "ast", "asymp", "asymp", "asympeq", "asympeq", "atilde", "atilde", "auml", 8505 "auml", "awconint", "awconint", "awint", "awint", "bNot", "bNot", "backcong", "backcong", "backepsilon", "backepsilon", "backprime", "backprime", "backsim", "backsim", "backsimeq", "backsimeq", "barvee", "barvee", "barwed", "barwed", "barwedge", "barwedge", "bbrk", "bbrk", "bbrktbrk", "bbrktbrk", "bcong", "bcong", "bcy", 8506 "bcy", "bdquo", "bdquo", "becaus", "becaus", "because", "because", "bemptyv", "bemptyv", "bepsi", "bepsi", "bernou", "bernou", "beta", "beta", "beth", "beth", "between", "between", "bfr", "bfr", "bigcap", "bigcap", "bigcirc", "bigcirc", "bigcup", "bigcup", "bigodot", "bigodot", "bigoplus", "bigoplus", "bigotimes", "bigotimes", 8507 "bigsqcup", "bigsqcup", "bigstar", "bigstar", "bigtriangledown", "bigtriangledown", "bigtriangleup", "bigtriangleup", "biguplus", "biguplus", "bigvee", "bigvee", "bigwedge", "bigwedge", "bkarow", "bkarow", "blacklozenge", "blacklozenge", "blacksquare", "blacksquare", "blacktriangle", "blacktriangle", "blacktriangledown", 8508 "blacktriangledown", "blacktriangleleft", "blacktriangleleft", "blacktriangleright", "blacktriangleright", "blank", "blank", "blk12", "blk12", "blk14", "blk14", "blk34", "blk34", "block", "block", "bnot", "bnot", "bopf", "bopf", "bot", "bot", "bottom", "bottom", "bowtie", "bowtie", "boxDL", "boxDL", "boxDR", "boxDR", "boxDl", 8509 "boxDl", "boxDr", "boxDr", "boxH", "boxH", "boxHD", "boxHD", "boxHU", "boxHU", "boxHd", "boxHd", "boxHu", "boxHu", "boxUL", "boxUL", "boxUR", "boxUR", "boxUl", "boxUl", "boxUr", "boxUr", "boxV", "boxV", "boxVH", "boxVH", "boxVL", "boxVL", "boxVR", "boxVR", "boxVh", "boxVh", "boxVl", "boxVl", "boxVr", "boxVr", "boxbox", 8510 "boxbox", "boxdL", "boxdL", "boxdR", "boxdR", "boxdl", "boxdl", "boxdr", "boxdr", "boxh", "boxh", "boxhD", "boxhD", "boxhU", "boxhU", "boxhd", "boxhd", "boxhu", "boxhu", "boxminus", "boxminus", "boxplus", "boxplus", "boxtimes", "boxtimes", "boxuL", "boxuL", "boxuR", "boxuR", "boxul", "boxul", "boxur", "boxur", "boxv", 8511 "boxv", "boxvH", "boxvH", "boxvL", "boxvL", "boxvR", "boxvR", "boxvh", "boxvh", "boxvl", "boxvl", "boxvr", "boxvr", "bprime", "bprime", "breve", "breve", "brvbar", "brvbar", "bscr", "bscr", "bsemi", "bsemi", "bsim", "bsim", "bsime", "bsime", "bsol", "bsol", "bsolb", "bsolb", "bsolhsub", "bsolhsub", "bull", "bull", "bullet", 8512 "bullet", "bump", "bump", "bumpE", "bumpE", "bumpe", "bumpe", "bumpeq", "bumpeq", "cacute", "cacute", "cap", "cap", "capand", "capand", "capbrcup", "capbrcup", "capcap", "capcap", "capcup", "capcup", "capdot", "capdot", "caret", "caret", "caron", "caron", "ccaps", "ccaps", "ccaron", "ccaron", "ccedil", "ccedil", "ccirc", 8513 "ccirc", "ccups", "ccups", "ccupssm", "ccupssm", "cdot", "cdot", "cedil", "cedil", "cemptyv", "cemptyv", "cent", "cent", "centerdot", "centerdot", "cfr", "cfr", "chcy", "chcy", "check", "check", "checkmark", "checkmark", "chi", "chi", "cir", "cir", "cirE", "cirE", "circ", "circ", "circeq", "circeq", "circlearrowleft", 8514 "circlearrowleft", "circlearrowright", "circlearrowright", "circledR", "circledR", "circledS", "circledS", "circledast", "circledast", "circledcirc", "circledcirc", "circleddash", "circleddash", "cire", "cire", "cirfnint", "cirfnint", "cirmid", "cirmid", "cirscir", "cirscir", "clubs", "clubs", "clubsuit", "clubsuit", "colon", 8515 "colon", "colone", "colone", "coloneq", "coloneq", "comma", "comma", "commat", "commat", "comp", "comp", "compfn", "compfn", "complement", "complement", "complexes", "complexes", "cong", "cong", "congdot", "congdot", "conint", "conint", "copf", "copf", "coprod", "coprod", "copy", "copy", "copysr", "copysr", "crarr", "crarr", 8516 "cross", "cross", "cscr", "cscr", "csub", "csub", "csube", "csube", "csup", "csup", "csupe", "csupe", "ctdot", "ctdot", "cudarrl", "cudarrl", "cudarrr", "cudarrr", "cuepr", "cuepr", "cuesc", "cuesc", "cularr", "cularr", "cularrp", "cularrp", "cup", "cup", "cupbrcap", "cupbrcap", "cupcap", "cupcap", "cupcup", "cupcup", 8517 "cupdot", "cupdot", "cupor", "cupor", "curarr", "curarr", "curarrm", "curarrm", "curlyeqprec", "curlyeqprec", "curlyeqsucc", "curlyeqsucc", "curlyvee", "curlyvee", "curlywedge", "curlywedge", "curren", "curren", "curvearrowleft", "curvearrowleft", "curvearrowright", "curvearrowright", "cuvee", "cuvee", "cuwed", "cuwed", 8518 "cwconint", "cwconint", "cwint", "cwint", "cylcty", "cylcty", "dArr", "dArr", "dHar", "dHar", "dagger", "dagger", "daleth", "daleth", "darr", "darr", "dash", "dash", "dashv", "dashv", "dbkarow", "dbkarow", "dblac", "dblac", "dcaron", "dcaron", "dcy", "dcy", "dd", "dd", "ddagger", "ddagger", "ddarr", "ddarr", "ddotseq", 8519 "ddotseq", "deg", "deg", "delta", "delta", "demptyv", "demptyv", "dfisht", "dfisht", "dfr", "dfr", "dharl", "dharl", "dharr", "dharr", "diam", "diam", "diamond", "diamond", "diamondsuit", "diamondsuit", "diams", "diams", "die", "die", "digamma", "digamma", "disin", "disin", "div", "div", "divide", "divide", "divideontimes", 8520 "divideontimes", "divonx", "divonx", "djcy", "djcy", "dlcorn", "dlcorn", "dlcrop", "dlcrop", "dollar", "dollar", "dopf", "dopf", "dot", "dot", "doteq", "doteq", "doteqdot", "doteqdot", "dotminus", "dotminus", "dotplus", "dotplus", "dotsquare", "dotsquare", "doublebarwedge", "doublebarwedge", "downarrow", "downarrow", "downdownarrows", 8521 "downdownarrows", "downharpoonleft", "downharpoonleft", "downharpoonright", "downharpoonright", "drbkarow", "drbkarow", "drcorn", "drcorn", "drcrop", "drcrop", "dscr", "dscr", "dscy", "dscy", "dsol", "dsol", "dstrok", "dstrok", "dtdot", "dtdot", "dtri", "dtri", "dtrif", "dtrif", "duarr", "duarr", "duhar", "duhar", "dwangle", 8522 "dwangle", "dzcy", "dzcy", "dzigrarr", "dzigrarr", "eDDot", "eDDot", "eDot", "eDot", "eacute", "eacute", "easter", "easter", "ecaron", "ecaron", "ecir", "ecir", "ecirc", "ecirc", "ecolon", "ecolon", "ecy", "ecy", "edot", "edot", "ee", "ee", "efDot", "efDot", "efr", "efr", "eg", "eg", "egrave", "egrave", "egs", "egs", "egsdot", 8523 "egsdot", "el", "el", "elinters", "elinters", "ell", "ell", "els", "els", "elsdot", "elsdot", "emacr", "emacr", "empty", "empty", "emptyset", "emptyset", "emptyv", "emptyv", "emsp", "emsp", "emsp13", "emsp13", "emsp14", "emsp14", "eng", "eng", "ensp", "ensp", "eogon", "eogon", "eopf", "eopf", "epar", "epar", "eparsl", 8524 "eparsl", "eplus", "eplus", "epsi", "epsi", "epsilon", "epsilon", "epsiv", "epsiv", "eqcirc", "eqcirc", "eqcolon", "eqcolon", "eqsim", "eqsim", "eqslantgtr", "eqslantgtr", "eqslantless", "eqslantless", "equals", "equals", "equest", "equest", "equiv", "equiv", "equivDD", "equivDD", "eqvparsl", "eqvparsl", "erDot", "erDot", 8525 "erarr", "erarr", "escr", "escr", "esdot", "esdot", "esim", "esim", "eta", "eta", "eth", "eth", "euml", "euml", "euro", "euro", "excl", "excl", "exist", "exist", "expectation", "expectation", "exponentiale", "exponentiale", "fallingdotseq", "fallingdotseq", "fcy", "fcy", "female", "female", "ffilig", "ffilig", "fflig", 8526 "fflig", "ffllig", "ffllig", "ffr", "ffr", "filig", "filig", "flat", "flat", "fllig", "fllig", "fltns", "fltns", "fnof", "fnof", "fopf", "fopf", "forall", "forall", "fork", "fork", "forkv", "forkv", "fpartint", "fpartint", "frac12", "frac12", "frac13", "frac13", "frac14", "frac14", "frac15", "frac15", "frac16", "frac16", 8527 "frac18", "frac18", "frac23", "frac23", "frac25", "frac25", "frac34", "frac34", "frac35", "frac35", "frac38", "frac38", "frac45", "frac45", "frac56", "frac56", "frac58", "frac58", "frac78", "frac78", "frasl", "frasl", "frown", "frown", "fscr", "fscr", "gE", "gE", "gEl", "gEl", "gacute", "gacute", "gamma", "gamma", "gammad", 8528 "gammad", "gap", "gap", "gbreve", "gbreve", "gcirc", "gcirc", "gcy", "gcy", "gdot", "gdot", "ge", "ge", "gel", "gel", "geq", "geq", "geqq", "geqq", "geqslant", "geqslant", "ges", "ges", "gescc", "gescc", "gesdot", "gesdot", "gesdoto", "gesdoto", "gesdotol", "gesdotol", "gesles", "gesles", "gfr", "gfr", "gg", "gg", "ggg", 8529 "ggg", "gimel", "gimel", "gjcy", "gjcy", "gl", "gl", "glE", "glE", "gla", "gla", "glj", "glj", "gnE", "gnE", "gnap", "gnap", "gnapprox", "gnapprox", "gne", "gne", "gneq", "gneq", "gneqq", "gneqq", "gnsim", "gnsim", "gopf", "gopf", "grave", "grave", "gscr", "gscr", "gsim", "gsim", "gsime", "gsime", "gsiml", "gsiml", "gtcc", 8530 "gtcc", "gtcir", "gtcir", "gtdot", "gtdot", "gtlPar", "gtlPar", "gtquest", "gtquest", "gtrapprox", "gtrapprox", "gtrarr", "gtrarr", "gtrdot", "gtrdot", "gtreqless", "gtreqless", "gtreqqless", "gtreqqless", "gtrless", "gtrless", "gtrsim", "gtrsim", "hArr", "hArr", "hairsp", "hairsp", "half", "half", "hamilt", "hamilt", 8531 "hardcy", "hardcy", "harr", "harr", "harrcir", "harrcir", "harrw", "harrw", "hbar", "hbar", "hcirc", "hcirc", "hearts", "hearts", "heartsuit", "heartsuit", "hellip", "hellip", "hercon", "hercon", "hfr", "hfr", "hksearow", "hksearow", "hkswarow", "hkswarow", "hoarr", "hoarr", "homtht", "homtht", "hookleftarrow", "hookleftarrow", 8532 "hookrightarrow", "hookrightarrow", "hopf", "hopf", "horbar", "horbar", "hscr", "hscr", "hslash", "hslash", "hstrok", "hstrok", "hybull", "hybull", "hyphen", "hyphen", "iacute", "iacute", "ic", "ic", "icirc", "icirc", "icy", "icy", "iecy", "iecy", "iexcl", "iexcl", "iff", "iff", "ifr", "ifr", "igrave", "igrave", "ii", 8533 "ii", "iiiint", "iiiint", "iiint", "iiint", "iinfin", "iinfin", "iiota", "iiota", "ijlig", "ijlig", "imacr", "imacr", "image", "image", "imagline", "imagline", "imagpart", "imagpart", "imath", "imath", "imof", "imof", "imped", "imped", "in", "in", "incare", "incare", "infin", "infin", "infintie", "infintie", "inodot", 8534 "inodot", "int", "int", "intcal", "intcal", "integers", "integers", "intercal", "intercal", "intlarhk", "intlarhk", "intprod", "intprod", "iocy", "iocy", "iogon", "iogon", "iopf", "iopf", "iota", "iota", "iprod", "iprod", "iquest", "iquest", "iscr", "iscr", "isin", "isin", "isinE", "isinE", "isindot", "isindot", "isins", 8535 "isins", "isinsv", "isinsv", "isinv", "isinv", "it", "it", "itilde", "itilde", "iukcy", "iukcy", "iuml", "iuml", "jcirc", "jcirc", "jcy", "jcy", "jfr", "jfr", "jmath", "jmath", "jopf", "jopf", "jscr", "jscr", "jsercy", "jsercy", "jukcy", "jukcy", "kappa", "kappa", "kappav", "kappav", "kcedil", "kcedil", "kcy", "kcy", "kfr", 8536 "kfr", "kgreen", "kgreen", "khcy", "khcy", "kjcy", "kjcy", "kopf", "kopf", "kscr", "kscr", "lAarr", "lAarr", "lArr", "lArr", "lAtail", "lAtail", "lBarr", "lBarr", "lE", "lE", "lEg", "lEg", "lHar", "lHar", "lacute", "lacute", "laemptyv", "laemptyv", "lagran", "lagran", "lambda", "lambda", "lang", "lang", "langd", "langd", 8537 "langle", "langle", "lap", "lap", "laquo", "laquo", "larr", "larr", "larrb", "larrb", "larrbfs", "larrbfs", "larrfs", "larrfs", "larrhk", "larrhk", "larrlp", "larrlp", "larrpl", "larrpl", "larrsim", "larrsim", "larrtl", "larrtl", "lat", "lat", "latail", "latail", "late", "late", "lbarr", "lbarr", "lbbrk", "lbbrk", "lbrace", 8538 "lbrace", "lbrack", "lbrack", "lbrke", "lbrke", "lbrksld", "lbrksld", "lbrkslu", "lbrkslu", "lcaron", "lcaron", "lcedil", "lcedil", "lceil", "lceil", "lcub", "lcub", "lcy", "lcy", "ldca", "ldca", "ldquo", "ldquo", "ldquor", "ldquor", "ldrdhar", "ldrdhar", "ldrushar", "ldrushar", "ldsh", "ldsh", "le", "le", "leftarrow", 8539 "leftarrow", "leftarrowtail", "leftarrowtail", "leftharpoondown", "leftharpoondown", "leftharpoonup", "leftharpoonup", "leftleftarrows", "leftleftarrows", "leftrightarrow", "leftrightarrow", "leftrightarrows", "leftrightarrows", "leftrightharpoons", "leftrightharpoons", "leftrightsquigarrow", "leftrightsquigarrow", "leftthreetimes", 8540 "leftthreetimes", "leg", "leg", "leq", "leq", "leqq", "leqq", "leqslant", "leqslant", "les", "les", "lescc", "lescc", "lesdot", "lesdot", "lesdoto", "lesdoto", "lesdotor", "lesdotor", "lesges", "lesges", "lessapprox", "lessapprox", "lessdot", "lessdot", "lesseqgtr", "lesseqgtr", "lesseqqgtr", "lesseqqgtr", "lessgtr", "lessgtr", 8541 "lesssim", "lesssim", "lfisht", "lfisht", "lfloor", "lfloor", "lfr", "lfr", "lg", "lg", "lgE", "lgE", "lhard", "lhard", "lharu", "lharu", "lharul", "lharul", "lhblk", "lhblk", "ljcy", "ljcy", "ll", "ll", "llarr", "llarr", "llcorner", "llcorner", "llhard", "llhard", "lltri", "lltri", "lmidot", "lmidot", "lmoust", "lmoust", 8542 "lmoustache", "lmoustache", "lnE", "lnE", "lnap", "lnap", "lnapprox", "lnapprox", "lne", "lne", "lneq", "lneq", "lneqq", "lneqq", "lnsim", "lnsim", "loang", "loang", "loarr", "loarr", "lobrk", "lobrk", "longleftarrow", "longleftarrow", "longleftrightarrow", "longleftrightarrow", "longmapsto", "longmapsto", "longrightarrow", 8543 "longrightarrow", "looparrowleft", "looparrowleft", "looparrowright", "looparrowright", "lopar", "lopar", "lopf", "lopf", "loplus", "loplus", "lotimes", "lotimes", "lowast", "lowast", "lowbar", "lowbar", "loz", "loz", "lozenge", "lozenge", "lozf", "lozf", "lpar", "lpar", "lparlt", "lparlt", "lrarr", "lrarr", "lrcorner", 8544 "lrcorner", "lrhar", "lrhar", "lrhard", "lrhard", "lrm", "lrm", "lrtri", "lrtri", "lsaquo", "lsaquo", "lscr", "lscr", "lsh", "lsh", "lsim", "lsim", "lsime", "lsime", "lsimg", "lsimg", "lsqb", "lsqb", "lsquo", "lsquo", "lsquor", "lsquor", "lstrok", "lstrok", "ltcc", "ltcc", "ltcir", "ltcir", "ltdot", "ltdot", "lthree", 8545 "lthree", "ltimes", "ltimes", "ltlarr", "ltlarr", "ltquest", "ltquest", "ltrPar", "ltrPar", "ltri", "ltri", "ltrie", "ltrie", "ltrif", "ltrif", "lurdshar", "lurdshar", "luruhar", "luruhar", "mDDot", "mDDot", "macr", "macr", "male", "male", "malt", "malt", "maltese", "maltese", "map", "map", "mapsto", "mapsto", "mapstodown", 8546 "mapstodown", "mapstoleft", "mapstoleft", "mapstoup", "mapstoup", "marker", "marker", "mcomma", "mcomma", "mcy", "mcy", "mdash", "mdash", "measuredangle", "measuredangle", "mfr", "mfr", "mho", "mho", "micro", "micro", "mid", "mid", "midast", "midast", "midcir", "midcir", "middot", "middot", "minus", "minus", "minusb", 8547 "minusb", "minusd", "minusd", "minusdu", "minusdu", "mlcp", "mlcp", "mldr", "mldr", "mnplus", "mnplus", "models", "models", "mopf", "mopf", "mp", "mp", "mscr", "mscr", "mstpos", "mstpos", "mu", "mu", "multimap", "multimap", "mumap", "mumap", "nLeftarrow", "nLeftarrow", "nLeftrightarrow", "nLeftrightarrow", "nRightarrow", 8548 "nRightarrow", "nVDash", "nVDash", "nVdash", "nVdash", "nabla", "nabla", "nacute", "nacute", "nap", "nap", "napos", "napos", "napprox", "napprox", "natur", "natur", "natural", "natural", "naturals", "naturals", "nbsp", "nbsp", "ncap", "ncap", "ncaron", "ncaron", "ncedil", "ncedil", "ncong", "ncong", "ncup", "ncup", "ncy", 8549 "ncy", "ndash", "ndash", "ne", "ne", "neArr", "neArr", "nearhk", "nearhk", "nearr", "nearr", "nearrow", "nearrow", "nequiv", "nequiv", "nesear", "nesear", "nexist", "nexist", "nexists", "nexists", "nfr", "nfr", "nge", "nge", "ngeq", "ngeq", "ngsim", "ngsim", "ngt", "ngt", "ngtr", "ngtr", "nhArr", "nhArr", "nharr", "nharr", 8550 "nhpar", "nhpar", "ni", "ni", "nis", "nis", "nisd", "nisd", "niv", "niv", "njcy", "njcy", "nlArr", "nlArr", "nlarr", "nlarr", "nldr", "nldr", "nle", "nle", "nleftarrow", "nleftarrow", "nleftrightarrow", "nleftrightarrow", "nleq", "nleq", "nless", "nless", "nlsim", "nlsim", "nlt", "nlt", "nltri", "nltri", "nltrie", "nltrie", 8551 "nmid", "nmid", "nopf", "nopf", "not", "not", "notin", "notin", "notinva", "notinva", "notinvb", "notinvb", "notinvc", "notinvc", "notni", "notni", "notniva", "notniva", "notnivb", "notnivb", "notnivc", "notnivc", "npar", "npar", "nparallel", "nparallel", "npolint", "npolint", "npr", "npr", "nprcue", "nprcue", "nprec", 8552 "nprec", "nrArr", "nrArr", "nrarr", "nrarr", "nrightarrow", "nrightarrow", "nrtri", "nrtri", "nrtrie", "nrtrie", "nsc", "nsc", "nsccue", "nsccue", "nscr", "nscr", "nshortmid", "nshortmid", "nshortparallel", "nshortparallel", "nsim", "nsim", "nsime", "nsime", "nsimeq", "nsimeq", "nsmid", "nsmid", "nspar", "nspar", "nsqsube", 8553 "nsqsube", "nsqsupe", "nsqsupe", "nsub", "nsub", "nsube", "nsube", "nsubseteq", "nsubseteq", "nsucc", "nsucc", "nsup", "nsup", "nsupe", "nsupe", "nsupseteq", "nsupseteq", "ntgl", "ntgl", "ntilde", "ntilde", "ntlg", "ntlg", "ntriangleleft", "ntriangleleft", "ntrianglelefteq", "ntrianglelefteq", "ntriangleright", "ntriangleright", 8554 "ntrianglerighteq", "ntrianglerighteq", "nu", "nu", "num", "num", "numero", "numero", "numsp", "numsp", "nvDash", "nvDash", "nvHarr", "nvHarr", "nvdash", "nvdash", "nvinfin", "nvinfin", "nvlArr", "nvlArr", "nvrArr", "nvrArr", "nwArr", "nwArr", "nwarhk", "nwarhk", "nwarr", "nwarr", "nwarrow", "nwarrow", "nwnear", "nwnear", 8555 "oS", "oS", "oacute", "oacute", "oast", "oast", "ocir", "ocir", "ocirc", "ocirc", "ocy", "ocy", "odash", "odash", "odblac", "odblac", "odiv", "odiv", "odot", "odot", "odsold", "odsold", "oelig", "oelig", "ofcir", "ofcir", "ofr", "ofr", "ogon", "ogon", "ograve", "ograve", "ogt", "ogt", "ohbar", "ohbar", "ohm", "ohm", "oint", 8556 "oint", "olarr", "olarr", "olcir", "olcir", "olcross", "olcross", "oline", "oline", "olt", "olt", "omacr", "omacr", "omega", "omega", "omicron", "omicron", "omid", "omid", "ominus", "ominus", "oopf", "oopf", "opar", "opar", "operp", "operp", "oplus", "oplus", "or", "or", "orarr", "orarr", "ord", "ord", "order", "order", 8557 "orderof", "orderof", "ordf", "ordf", "ordm", "ordm", "origof", "origof", "oror", "oror", "orslope", "orslope", "orv", "orv", "oscr", "oscr", "oslash", "oslash", "osol", "osol", "otilde", "otilde", "otimes", "otimes", "otimesas", "otimesas", "ouml", "ouml", "ovbar", "ovbar", "par", "par", "para", "para", "parallel", "parallel", 8558 "parsim", "parsim", "parsl", "parsl", "part", "part", "pcy", "pcy", "percnt", "percnt", "period", "period", "permil", "permil", "perp", "perp", "pertenk", "pertenk", "pfr", "pfr", "phi", "phi", "phiv", "phiv", "phmmat", "phmmat", "phone", "phone", "pi", "pi", "pitchfork", "pitchfork", "piv", "piv", "planck", "planck", 8559 "planckh", "planckh", "plankv", "plankv", "plus", "plus", "plusacir", "plusacir", "plusb", "plusb", "pluscir", "pluscir", "plusdo", "plusdo", "plusdu", "plusdu", "pluse", "pluse", "plusmn", "plusmn", "plussim", "plussim", "plustwo", "plustwo", "pm", "pm", "pointint", "pointint", "popf", "popf", "pound", "pound", "pr", 8560 "pr", "prE", "prE", "prap", "prap", "prcue", "prcue", "pre", "pre", "prec", "prec", "precapprox", "precapprox", "preccurlyeq", "preccurlyeq", "preceq", "preceq", "precnapprox", "precnapprox", "precneqq", "precneqq", "precnsim", "precnsim", "precsim", "precsim", "prime", "prime", "primes", "primes", "prnE", "prnE", "prnap", 8561 "prnap", "prnsim", "prnsim", "prod", "prod", "profalar", "profalar", "profline", "profline", "profsurf", "profsurf", "prop", "prop", "propto", "propto", "prsim", "prsim", "prurel", "prurel", "pscr", "pscr", "psi", "psi", "puncsp", "puncsp", "qfr", "qfr", "qint", "qint", "qopf", "qopf", "qprime", "qprime", "qscr", "qscr", 8562 "quaternions", "quaternions", "quatint", "quatint", "quest", "quest", "questeq", "questeq", "rAarr", "rAarr", "rArr", "rArr", "rAtail", "rAtail", "rBarr", "rBarr", "rHar", "rHar", "racute", "racute", "radic", "radic", "raemptyv", "raemptyv", "rang", "rang", "rangd", "rangd", "range", "range", "rangle", "rangle", "raquo", 8563 "raquo", "rarr", "rarr", "rarrap", "rarrap", "rarrb", "rarrb", "rarrbfs", "rarrbfs", "rarrc", "rarrc", "rarrfs", "rarrfs", "rarrhk", "rarrhk", "rarrlp", "rarrlp", "rarrpl", "rarrpl", "rarrsim", "rarrsim", "rarrtl", "rarrtl", "rarrw", "rarrw", "ratail", "ratail", "ratio", "ratio", "rationals", "rationals", "rbarr", "rbarr", 8564 "rbbrk", "rbbrk", "rbrace", "rbrace", "rbrack", "rbrack", "rbrke", "rbrke", "rbrksld", "rbrksld", "rbrkslu", "rbrkslu", "rcaron", "rcaron", "rcedil", "rcedil", "rceil", "rceil", "rcub", "rcub", "rcy", "rcy", "rdca", "rdca", "rdldhar", "rdldhar", "rdquo", "rdquo", "rdquor", "rdquor", "rdsh", "rdsh", "real", "real", "realine", 8565 "realine", "realpart", "realpart", "reals", "reals", "rect", "rect", "reg", "reg", "rfisht", "rfisht", "rfloor", "rfloor", "rfr", "rfr", "rhard", "rhard", "rharu", "rharu", "rharul", "rharul", "rho", "rho", "rhov", "rhov", "rightarrow", "rightarrow", "rightarrowtail", "rightarrowtail", "rightharpoondown", "rightharpoondown", 8566 "rightharpoonup", "rightharpoonup", "rightleftarrows", "rightleftarrows", "rightleftharpoons", "rightleftharpoons", "rightrightarrows", "rightrightarrows", "rightsquigarrow", "rightsquigarrow", "rightthreetimes", "rightthreetimes", "ring", "ring", "risingdotseq", "risingdotseq", "rlarr", "rlarr", "rlhar", "rlhar", "rlm", 8567 "rlm", "rmoust", "rmoust", "rmoustache", "rmoustache", "rnmid", "rnmid", "roang", "roang", "roarr", "roarr", "robrk", "robrk", "ropar", "ropar", "ropf", "ropf", "roplus", "roplus", "rotimes", "rotimes", "rpar", "rpar", "rpargt", "rpargt", "rppolint", "rppolint", "rrarr", "rrarr", "rsaquo", "rsaquo", "rscr", "rscr", "rsh", 8568 "rsh", "rsqb", "rsqb", "rsquo", "rsquo", "rsquor", "rsquor", "rthree", "rthree", "rtimes", "rtimes", "rtri", "rtri", "rtrie", "rtrie", "rtrif", "rtrif", "rtriltri", "rtriltri", "ruluhar", "ruluhar", "rx", "rx", "sacute", "sacute", "sbquo", "sbquo", "sc", "sc", "scE", "scE", "scap", "scap", "scaron", "scaron", "sccue", 8569 "sccue", "sce", "sce", "scedil", "scedil", "scirc", "scirc", "scnE", "scnE", "scnap", "scnap", "scnsim", "scnsim", "scpolint", "scpolint", "scsim", "scsim", "scy", "scy", "sdot", "sdot", "sdotb", "sdotb", "sdote", "sdote", "seArr", "seArr", "searhk", "searhk", "searr", "searr", "searrow", "searrow", "sect", "sect", "semi", 8570 "semi", "seswar", "seswar", "setminus", "setminus", "setmn", "setmn", "sext", "sext", "sfr", "sfr", "sfrown", "sfrown", "sharp", "sharp", "shchcy", "shchcy", "shcy", "shcy", "shortmid", "shortmid", "shortparallel", "shortparallel", "shy", "shy", "sigma", "sigma", "sigmaf", "sigmaf", "sigmav", "sigmav", "sim", "sim", "simdot", 8571 "simdot", "sime", "sime", "simeq", "simeq", "simg", "simg", "simgE", "simgE", "siml", "siml", "simlE", "simlE", "simne", "simne", "simplus", "simplus", "simrarr", "simrarr", "slarr", "slarr", "smallsetminus", "smallsetminus", "smashp", "smashp", "smeparsl", "smeparsl", "smid", "smid", "smile", "smile", "smt", "smt", "smte", 8572 "smte", "softcy", "softcy", "sol", "sol", "solb", "solb", "solbar", "solbar", "sopf", "sopf", "spades", "spades", "spadesuit", "spadesuit", "spar", "spar", "sqcap", "sqcap", "sqcup", "sqcup", "sqsub", "sqsub", "sqsube", "sqsube", "sqsubset", "sqsubset", "sqsubseteq", "sqsubseteq", "sqsup", "sqsup", "sqsupe", "sqsupe", 8573 "sqsupset", "sqsupset", "sqsupseteq", "sqsupseteq", "squ", "squ", "square", "square", "squarf", "squarf", "squf", "squf", "srarr", "srarr", "sscr", "sscr", "ssetmn", "ssetmn", "ssmile", "ssmile", "sstarf", "sstarf", "star", "star", "starf", "starf", "straightepsilon", "straightepsilon", "straightphi", "straightphi", "strns", 8574 "strns", "sub", "sub", "subE", "subE", "subdot", "subdot", "sube", "sube", "subedot", "subedot", "submult", "submult", "subnE", "subnE", "subne", "subne", "subplus", "subplus", "subrarr", "subrarr", "subset", "subset", "subseteq", "subseteq", "subseteqq", "subseteqq", "subsetneq", "subsetneq", "subsetneqq", "subsetneqq", 8575 "subsim", "subsim", "subsub", "subsub", "subsup", "subsup", "succ", "succ", "succapprox", "succapprox", "succcurlyeq", "succcurlyeq", "succeq", "succeq", "succnapprox", "succnapprox", "succneqq", "succneqq", "succnsim", "succnsim", "succsim", "succsim", "sum", "sum", "sung", "sung", "sup", "sup", "sup1", "sup1", "sup2", 8576 "sup2", "sup3", "sup3", "supE", "supE", "supdot", "supdot", "supdsub", "supdsub", "supe", "supe", "supedot", "supedot", "suphsol", "suphsol", "suphsub", "suphsub", "suplarr", "suplarr", "supmult", "supmult", "supnE", "supnE", "supne", "supne", "supplus", "supplus", "supset", "supset", "supseteq", "supseteq", "supseteqq", 8577 "supseteqq", "supsetneq", "supsetneq", "supsetneqq", "supsetneqq", "supsim", "supsim", "supsub", "supsub", "supsup", "supsup", "swArr", "swArr", "swarhk", "swarhk", "swarr", "swarr", "swarrow", "swarrow", "swnwar", "swnwar", "szlig", "szlig", "target", "target", "tau", "tau", "tbrk", "tbrk", "tcaron", "tcaron", "tcedil", 8578 "tcedil", "tcy", "tcy", "tdot", "tdot", "telrec", "telrec", "tfr", "tfr", "there4", "there4", "therefore", "therefore", "theta", "theta", "thetasym", "thetasym", "thetav", "thetav", "thickapprox", "thickapprox", "thicksim", "thicksim", "thinsp", "thinsp", "thkap", "thkap", "thksim", "thksim", "thorn", "thorn", "tilde", 8579 "tilde", "times", "times", "timesb", "timesb", "timesbar", "timesbar", "timesd", "timesd", "tint", "tint", "toea", "toea", "top", "top", "topbot", "topbot", "topcir", "topcir", "topf", "topf", "topfork", "topfork", "tosa", "tosa", "tprime", "tprime", "trade", "trade", "triangle", "triangle", "triangledown", "triangledown", 8580 "triangleleft", "triangleleft", "trianglelefteq", "trianglelefteq", "triangleq", "triangleq", "triangleright", "triangleright", "trianglerighteq", "trianglerighteq", "tridot", "tridot", "trie", "trie", "triminus", "triminus", "triplus", "triplus", "trisb", "trisb", "tritime", "tritime", "trpezium", "trpezium", "tscr", 8581 "tscr", "tscy", "tscy", "tshcy", "tshcy", "tstrok", "tstrok", "twixt", "twixt", "twoheadleftarrow", "twoheadleftarrow", "twoheadrightarrow", "twoheadrightarrow", "uArr", "uArr", "uHar", "uHar", "uacute", "uacute", "uarr", "uarr", "ubrcy", "ubrcy", "ubreve", "ubreve", "ucirc", "ucirc", "ucy", "ucy", "udarr", "udarr", "udblac", 8582 "udblac", "udhar", "udhar", "ufisht", "ufisht", "ufr", "ufr", "ugrave", "ugrave", "uharl", "uharl", "uharr", "uharr", "uhblk", "uhblk", "ulcorn", "ulcorn", "ulcorner", "ulcorner", "ulcrop", "ulcrop", "ultri", "ultri", "umacr", "umacr", "uml", "uml", "uogon", "uogon", "uopf", "uopf", "uparrow", "uparrow", "updownarrow", 8583 "updownarrow", "upharpoonleft", "upharpoonleft", "upharpoonright", "upharpoonright", "uplus", "uplus", "upsi", "upsi", "upsih", "upsih", "upsilon", "upsilon", "upuparrows", "upuparrows", "urcorn", "urcorn", "urcorner", "urcorner", "urcrop", "urcrop", "uring", "uring", "urtri", "urtri", "uscr", "uscr", "utdot", "utdot", 8584 "utilde", "utilde", "utri", "utri", "utrif", "utrif", "uuarr", "uuarr", "uuml", "uuml", "uwangle", "uwangle", "vArr", "vArr", "vBar", "vBar", "vBarv", "vBarv", "vDash", "vDash", "vangrt", "vangrt", "varepsilon", "varepsilon", "varkappa", "varkappa", "varnothing", "varnothing", "varphi", "varphi", "varpi", "varpi", "varpropto", 8585 "varpropto", "varr", "varr", "varrho", "varrho", "varsigma", "varsigma", "vartheta", "vartheta", "vartriangleleft", "vartriangleleft", "vartriangleright", "vartriangleright", "vcy", "vcy", "vdash", "vdash", "vee", "vee", "veebar", "veebar", "veeeq", "veeeq", "vellip", "vellip", "verbar", "verbar", "vert", "vert", "vfr", 8586 "vfr", "vltri", "vltri", "vopf", "vopf", "vprop", "vprop", "vrtri", "vrtri", "vscr", "vscr", "vzigzag", "vzigzag", "wcirc", "wcirc", "wedbar", "wedbar", "wedge", "wedge", "wedgeq", "wedgeq", "weierp", "weierp", "wfr", "wfr", "wopf", "wopf", "wp", "wp", "wr", "wr", "wreath", "wreath", "wscr", "wscr", "xcap", "xcap", "xcirc", 8587 "xcirc", "xcup", "xcup", "xdtri", "xdtri", "xfr", "xfr", "xhArr", "xhArr", "xharr", "xharr", "xi", "xi", "xlArr", "xlArr", "xlarr", "xlarr", "xmap", "xmap", "xnis", "xnis", "xodot", "xodot", "xopf", "xopf", "xoplus", "xoplus", "xotime", "xotime", "xrArr", "xrArr", "xrarr", "xrarr", "xscr", "xscr", "xsqcup", "xsqcup", "xuplus", 8588 "xuplus", "xutri", "xutri", "xvee", "xvee", "xwedge", "xwedge", "yacute", "yacute", "yacy", "yacy", "ycirc", "ycirc", "ycy", "ycy", "yen", "yen", "yfr", "yfr", "yicy", "yicy", "yopf", "yopf", "yscr", "yscr", "yucy", "yucy", "yuml", "yuml", "zacute", "zacute", "zcaron", "zcaron", "zcy", "zcy", "zdot", "zdot", "zeetrf", 8589 "zeetrf", "zeta", "zeta", "zfr", "zfr", "zhcy", "zhcy", "zigrarr", "zigrarr", "zopf", "zopf", "zscr", "zscr", "zwj", "zwj", "zwnj", "zwnj", ]; 8590 8591 immutable dchar[] availableEntitiesValues = 8592 ['\u00c6', '\u00c6', '\u0026', '\u0026', '\u00c1', '\u00c1', '\u0102', '\u0102', '\u00c2', '\u00c2', '\u0410', '\u0410', '\U0001d504', '\U0001d504', '\u00c0', '\u00c0', '\u0391', '\u0391', '\u0100', '\u0100', '\u2a53', '\u2a53', '\u0104', '\u0104', '\U0001d538', '\U0001d538', '\u2061', '\u2061', '\u00c5', '\u00c5', '\U0001d49c', '\U0001d49c', '\u2254', '\u2254', '\u00c3', 8593 '\u00c3', '\u00c4', '\u00c4', '\u2216', '\u2216', '\u2ae7', '\u2ae7', '\u2306', '\u2306', '\u0411', '\u0411', '\u2235', '\u2235', '\u212c', '\u212c', '\u0392', '\u0392', '\U0001d505', '\U0001d505', '\U0001d539', '\U0001d539', '\u02d8', '\u02d8', '\u212c', '\u212c', '\u224e', '\u224e', '\u0427', '\u0427', '\u00a9', '\u00a9', '\u0106', '\u0106', '\u22d2', '\u22d2', '\u2145', 8594 '\u2145', '\u212d', '\u212d', '\u010c', '\u010c', '\u00c7', '\u00c7', '\u0108', '\u0108', '\u2230', '\u2230', '\u010a', '\u010a', '\u00b8', '\u00b8', '\u00b7', '\u00b7', '\u212d', '\u212d', '\u03a7', '\u03a7', '\u2299', '\u2299', '\u2296', '\u2296', '\u2295', '\u2295', '\u2297', '\u2297', 8595 '\u2232', '\u2232', '\u201d', '\u201d', '\u2019', '\u2019', '\u2237', '\u2237', '\u2a74', '\u2a74', '\u2261', '\u2261', '\u222f', '\u222f', '\u222e', '\u222e', '\u2102', '\u2102', '\u2210', '\u2210', '\u2233', 8596 '\u2233', '\u2a2f', '\u2a2f', '\U0001d49e', '\U0001d49e', '\u22d3', '\u22d3', '\u224d', '\u224d', '\u2145', '\u2145', '\u2911', '\u2911', '\u0402', '\u0402', '\u0405', '\u0405', '\u040f', '\u040f', '\u2021', '\u2021', '\u21a1', '\u21a1', '\u2ae4', '\u2ae4', '\u010e', '\u010e', '\u0414', '\u0414', '\u2207', '\u2207', '\u0394', '\u0394', '\U0001d507', '\U0001d507', 8597 '\u00b4', '\u00b4', '\u02d9', '\u02d9', '\u02dd', '\u02dd', '\u0060', '\u0060', '\u02dc', '\u02dc', '\u22c4', '\u22c4', '\u2146', '\u2146', '\U0001d53b', '\U0001d53b', '\u00a8', '\u00a8', '\u20dc', '\u20dc', '\u2250', 8598 '\u2250', '\u222f', '\u222f', '\u00a8', '\u00a8', '\u21d3', '\u21d3', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u2ae4', '\u2ae4', '\u27f8', '\u27f8', '\u27fa', 8599 '\u27fa', '\u27f9', '\u27f9', '\u21d2', '\u21d2', '\u22a8', '\u22a8', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2225', '\u2225', '\u2193', '\u2193', '\u2913', '\u2913', 8600 '\u21f5', '\u21f5', '\u0311', '\u0311', '\u2950', '\u2950', '\u295e', '\u295e', '\u21bd', '\u21bd', '\u2956', '\u2956', '\u295f', '\u295f', '\u21c1', '\u21c1', '\u2957', 8601 '\u2957', '\u22a4', '\u22a4', '\u21a7', '\u21a7', '\u21d3', '\u21d3', '\U0001d49f', '\U0001d49f', '\u0110', '\u0110', '\u014a', '\u014a', '\u00d0', '\u00d0', '\u00c9', '\u00c9', '\u011a', '\u011a', '\u00ca', '\u00ca', '\u042d', '\u042d', '\u0116', '\u0116', '\U0001d508', '\U0001d508', '\u00c8', '\u00c8', '\u2208', '\u2208', '\u0112', '\u0112', 8602 '\u25fb', '\u25fb', '\u25ab', '\u25ab', '\u0118', '\u0118', '\U0001d53c', '\U0001d53c', '\u0395', '\u0395', '\u2a75', '\u2a75', '\u2242', '\u2242', '\u21cc', '\u21cc', '\u2130', '\u2130', '\u2a73', '\u2a73', '\u0397', '\u0397', '\u00cb', '\u00cb', '\u2203', '\u2203', '\u2147', '\u2147', 8603 '\u0424', '\u0424', '\U0001d509', '\U0001d509', '\u25fc', '\u25fc', '\u25aa', '\u25aa', '\U0001d53d', '\U0001d53d', '\u2200', '\u2200', '\u2131', '\u2131', '\u2131', '\u2131', '\u0403', '\u0403', '\u003e', '\u003e', '\u0393', '\u0393', '\u03dc', '\u03dc', '\u011e', '\u011e', '\u0122', '\u0122', '\u011c', '\u011c', 8604 '\u0413', '\u0413', '\u0120', '\u0120', '\U0001d50a', '\U0001d50a', '\u22d9', '\u22d9', '\U0001d53e', '\U0001d53e', '\u2265', '\u2265', '\u22db', '\u22db', '\u2267', '\u2267', '\u2aa2', '\u2aa2', '\u2277', '\u2277', '\u2a7e', '\u2a7e', '\u2273', '\u2273', 8605 '\U0001d4a2', '\U0001d4a2', '\u226b', '\u226b', '\u042a', '\u042a', '\u02c7', '\u02c7', '\u005e', '\u005e', '\u0124', '\u0124', '\u210c', '\u210c', '\u210b', '\u210b', '\u210d', '\u210d', '\u2500', '\u2500', '\u210b', '\u210b', '\u0126', '\u0126', '\u224e', '\u224e', '\u224f', '\u224f', '\u0415', '\u0415', '\u0132', '\u0132', 8606 '\u0401', '\u0401', '\u00cd', '\u00cd', '\u00ce', '\u00ce', '\u0418', '\u0418', '\u0130', '\u0130', '\u2111', '\u2111', '\u00cc', '\u00cc', '\u2111', '\u2111', '\u012a', '\u012a', '\u2148', '\u2148', '\u21d2', '\u21d2', '\u222c', '\u222c', '\u222b', '\u222b', '\u22c2', '\u22c2', '\u2063', '\u2063', '\u2062', 8607 '\u2062', '\u012e', '\u012e', '\U0001d540', '\U0001d540', '\u0399', '\u0399', '\u2110', '\u2110', '\u0128', '\u0128', '\u0406', '\u0406', '\u00cf', '\u00cf', '\u0134', '\u0134', '\u0419', '\u0419', '\U0001d50d', '\U0001d50d', '\U0001d541', '\U0001d541', '\U0001d4a5', '\U0001d4a5', '\u0408', '\u0408', '\u0404', '\u0404', '\u0425', '\u0425', '\u040c', '\u040c', '\u039a', '\u039a', '\u0136', '\u0136', 8608 '\u041a', '\u041a', '\U0001d50e', '\U0001d50e', '\U0001d542', '\U0001d542', '\U0001d4a6', '\U0001d4a6', '\u0409', '\u0409', '\u003c', '\u003c', '\u0139', '\u0139', '\u039b', '\u039b', '\u27ea', '\u27ea', '\u2112', '\u2112', '\u219e', '\u219e', '\u013d', '\u013d', '\u013b', '\u013b', '\u041b', '\u041b', '\u27e8', '\u27e8', '\u2190', '\u2190', '\u21e4', 8609 '\u21e4', '\u21c6', '\u21c6', '\u2308', '\u2308', '\u27e6', '\u27e6', '\u2961', '\u2961', '\u21c3', '\u21c3', '\u2959', '\u2959', '\u230a', '\u230a', '\u2194', '\u2194', '\u294e', 8610 '\u294e', '\u22a3', '\u22a3', '\u21a4', '\u21a4', '\u295a', '\u295a', '\u22b2', '\u22b2', '\u29cf', '\u29cf', '\u22b4', '\u22b4', '\u2951', '\u2951', '\u2960', '\u2960', '\u21bf', '\u21bf', 8611 '\u2958', '\u2958', '\u21bc', '\u21bc', '\u2952', '\u2952', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u22da', '\u22da', '\u2266', '\u2266', '\u2276', '\u2276', '\u2aa1', '\u2aa1', '\u2a7d', '\u2a7d', 8612 '\u2272', '\u2272', '\U0001d50f', '\U0001d50f', '\u22d8', '\u22d8', '\u21da', '\u21da', '\u013f', '\u013f', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27f6', '\u27f6', '\u27f8', '\u27f8', '\u27fa', '\u27fa', '\u27f9', '\u27f9', 8613 '\U0001d543', '\U0001d543', '\u2199', '\u2199', '\u2198', '\u2198', '\u2112', '\u2112', '\u21b0', '\u21b0', '\u0141', '\u0141', '\u226a', '\u226a', '\u2905', '\u2905', '\u041c', '\u041c', '\u205f', '\u205f', '\u2133', '\u2133', '\U0001d510', '\U0001d510', '\u2213', '\u2213', '\U0001d544', '\U0001d544', '\u2133', '\u2133', '\u039c', '\u039c', 8614 '\u040a', '\u040a', '\u0143', '\u0143', '\u0147', '\u0147', '\u0145', '\u0145', '\u041d', '\u041d', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u226b', '\u226b', 8615 '\u226a', '\u226a', '\u000a', '\u000a', '\U0001d511', '\U0001d511', '\u2060', '\u2060', '\u00a0', '\u00a0', '\u2115', '\u2115', '\u2aec', '\u2aec', '\u2262', '\u2262', '\u226d', '\u226d', '\u2226', '\u2226', '\u2209', '\u2209', '\u2260', '\u2260', 8616 '\u2204', '\u2204', '\u226f', '\u226f', '\u2271', '\u2271', '\u2279', '\u2279', '\u2275', '\u2275', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u226e', '\u226e', '\u2270', '\u2270', '\u2278', 8617 '\u2278', '\u2274', '\u2274', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u220c', '\u220c', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u22e2', '\u22e2', '\u22e3', 8618 '\u22e3', '\u2288', '\u2288', '\u2281', '\u2281', '\u22e1', '\u22e1', '\u2289', '\u2289', '\u2241', '\u2241', '\u2244', '\u2244', '\u2247', '\u2247', '\u2249', '\u2249', '\u2224', 8619 '\u2224', '\U0001d4a9', '\U0001d4a9', '\u00d1', '\u00d1', '\u039d', '\u039d', '\u0152', '\u0152', '\u00d3', '\u00d3', '\u00d4', '\u00d4', '\u041e', '\u041e', '\u0150', '\u0150', '\U0001d512', '\U0001d512', '\u00d2', '\u00d2', '\u014c', '\u014c', '\u03a9', '\u03a9', '\u039f', '\u039f', '\U0001d546', '\U0001d546', '\u201c', '\u201c', '\u2018', 8620 '\u2018', '\u2a54', '\u2a54', '\U0001d4aa', '\U0001d4aa', '\u00d8', '\u00d8', '\u00d5', '\u00d5', '\u2a37', '\u2a37', '\u00d6', '\u00d6', '\u203e', '\u203e', '\u23de', '\u23de', '\u23b4', '\u23b4', '\u23dc', '\u23dc', '\u2202', '\u2202', '\u041f', '\u041f', '\U0001d513', '\U0001d513', '\u03a6', '\u03a6', '\u03a0', '\u03a0', '\u00b1', 8621 '\u00b1', '\u210c', '\u210c', '\u2119', '\u2119', '\u2abb', '\u2abb', '\u227a', '\u227a', '\u2aaf', '\u2aaf', '\u227c', '\u227c', '\u227e', '\u227e', '\u2033', '\u2033', '\u220f', '\u220f', '\u2237', '\u2237', '\u221d', '\u221d', '\U0001d4ab', '\U0001d4ab', 8622 '\u03a8', '\u03a8', '\u0022', '\u0022', '\U0001d514', '\U0001d514', '\u211a', '\u211a', '\U0001d4ac', '\U0001d4ac', '\u2910', '\u2910', '\u00ae', '\u00ae', '\u0154', '\u0154', '\u27eb', '\u27eb', '\u21a0', '\u21a0', '\u2916', '\u2916', '\u0158', '\u0158', '\u0156', '\u0156', '\u0420', '\u0420', '\u211c', '\u211c', '\u220b', '\u220b', '\u21cb', '\u21cb', 8623 '\u296f', '\u296f', '\u211c', '\u211c', '\u03a1', '\u03a1', '\u27e9', '\u27e9', '\u2192', '\u2192', '\u21e5', '\u21e5', '\u21c4', '\u21c4', '\u2309', '\u2309', '\u27e7', '\u27e7', '\u295d', 8624 '\u295d', '\u21c2', '\u21c2', '\u2955', '\u2955', '\u230b', '\u230b', '\u22a2', '\u22a2', '\u21a6', '\u21a6', '\u295b', '\u295b', '\u22b3', '\u22b3', '\u29d0', '\u29d0', '\u22b5', 8625 '\u22b5', '\u294f', '\u294f', '\u295c', '\u295c', '\u21be', '\u21be', '\u2954', '\u2954', '\u21c0', '\u21c0', '\u2953', '\u2953', '\u21d2', '\u21d2', '\u211d', '\u211d', '\u2970', '\u2970', 8626 '\u21db', '\u21db', '\u211b', '\u211b', '\u21b1', '\u21b1', '\u29f4', '\u29f4', '\u0429', '\u0429', '\u0428', '\u0428', '\u042c', '\u042c', '\u015a', '\u015a', '\u2abc', '\u2abc', '\u0160', '\u0160', '\u015e', '\u015e', '\u015c', '\u015c', '\u0421', '\u0421', '\U0001d516', '\U0001d516', '\u2193', '\u2193', '\u2190', '\u2190', 8627 '\u2192', '\u2192', '\u2191', '\u2191', '\u03a3', '\u03a3', '\u2218', '\u2218', '\U0001d54a', '\U0001d54a', '\u221a', '\u221a', '\u25a1', '\u25a1', '\u2293', '\u2293', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', 8628 '\u2292', '\u2292', '\u2294', '\u2294', '\U0001d4ae', '\U0001d4ae', '\u22c6', '\u22c6', '\u22d0', '\u22d0', '\u22d0', '\u22d0', '\u2286', '\u2286', '\u227b', '\u227b', '\u2ab0', '\u2ab0', '\u227d', '\u227d', '\u227f', '\u227f', '\u220b', 8629 '\u220b', '\u2211', '\u2211', '\u22d1', '\u22d1', '\u2283', '\u2283', '\u2287', '\u2287', '\u22d1', '\u22d1', '\u00de', '\u00de', '\u2122', '\u2122', '\u040b', '\u040b', '\u0426', '\u0426', '\u0009', '\u0009', '\u03a4', '\u03a4', '\u0164', '\u0164', '\u0162', '\u0162', '\u0422', '\u0422', '\U0001d517', '\U0001d517', '\u2234', '\u2234', '\u0398', '\u0398', 8630 '\u2009', '\u2009', '\u223c', '\u223c', '\u2243', '\u2243', '\u2245', '\u2245', '\u2248', '\u2248', '\U0001d54b', '\U0001d54b', '\u20db', '\u20db', '\U0001d4af', '\U0001d4af', '\u0166', '\u0166', '\u00da', '\u00da', '\u219f', '\u219f', '\u2949', '\u2949', '\u040e', '\u040e', '\u016c', '\u016c', '\u00db', 8631 '\u00db', '\u0423', '\u0423', '\u0170', '\u0170', '\U0001d518', '\U0001d518', '\u00d9', '\u00d9', '\u016a', '\u016a', '\u005f', '\u005f', '\u23df', '\u23df', '\u23b5', '\u23b5', '\u23dd', '\u23dd', '\u22c3', '\u22c3', '\u228e', '\u228e', '\u0172', '\u0172', '\U0001d54c', '\U0001d54c', '\u2191', '\u2191', '\u2912', 8632 '\u2912', '\u21c5', '\u21c5', '\u2195', '\u2195', '\u296e', '\u296e', '\u22a5', '\u22a5', '\u21a5', '\u21a5', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2196', '\u2196', '\u2197', '\u2197', '\u03d2', '\u03d2', '\u03a5', '\u03a5', 8633 '\u016e', '\u016e', '\U0001d4b0', '\U0001d4b0', '\u0168', '\u0168', '\u00dc', '\u00dc', '\u22ab', '\u22ab', '\u2aeb', '\u2aeb', '\u0412', '\u0412', '\u22a9', '\u22a9', '\u2ae6', '\u2ae6', '\u22c1', '\u22c1', '\u2016', '\u2016', '\u2016', '\u2016', '\u2223', '\u2223', '\u007c', '\u007c', '\u2758', '\u2758', '\u2240', 8634 '\u2240', '\u200a', '\u200a', '\U0001d519', '\U0001d519', '\U0001d54d', '\U0001d54d', '\U0001d4b1', '\U0001d4b1', '\u22aa', '\u22aa', '\u0174', '\u0174', '\u22c0', '\u22c0', '\U0001d51a', '\U0001d51a', '\U0001d54e', '\U0001d54e', '\U0001d4b2', '\U0001d4b2', '\U0001d51b', '\U0001d51b', '\u039e', '\u039e', '\U0001d54f', '\U0001d54f', '\U0001d4b3', '\U0001d4b3', '\u042f', '\u042f', '\u0407', '\u0407', '\u042e', '\u042e', '\u00dd', '\u00dd', 8635 '\u0176', '\u0176', '\u042b', '\u042b', '\U0001d51c', '\U0001d51c', '\U0001d550', '\U0001d550', '\U0001d4b4', '\U0001d4b4', '\u0178', '\u0178', '\u0416', '\u0416', '\u0179', '\u0179', '\u017d', '\u017d', '\u0417', '\u0417', '\u017b', '\u017b', '\u200b', '\u200b', '\u0396', '\u0396', '\u2128', '\u2128', '\u2124', '\u2124', '\U0001d4b5', '\U0001d4b5', '\u00e1', '\u00e1', '\u0103', '\u0103', '\u223e', 8636 '\u223e', '\u223f', '\u223f', '\u00e2', '\u00e2', '\u00b4', '\u00b4', '\u0430', '\u0430', '\u00e6', '\u00e6', '\u2061', '\u2061', '\U0001d51e', '\U0001d51e', '\u00e0', '\u00e0', '\u2135', '\u2135', '\u2135', '\u2135', '\u03b1', '\u03b1', '\u0101', '\u0101', '\u2a3f', '\u2a3f', '\u2227', '\u2227', '\u2a55', '\u2a55', '\u2a5c', '\u2a5c', '\u2a58', '\u2a58', '\u2a5a', '\u2a5a', '\u2220', 8637 '\u2220', '\u29a4', '\u29a4', '\u2220', '\u2220', '\u2221', '\u2221', '\u29a8', '\u29a8', '\u29a9', '\u29a9', '\u29aa', '\u29aa', '\u29ab', '\u29ab', '\u29ac', '\u29ac', '\u29ad', '\u29ad', '\u29ae', '\u29ae', '\u29af', '\u29af', '\u221f', '\u221f', '\u22be', '\u22be', '\u299d', '\u299d', '\u2222', 8638 '\u2222', '\u00c5', '\u00c5', '\u237c', '\u237c', '\u0105', '\u0105', '\U0001d552', '\U0001d552', '\u2248', '\u2248', '\u2a70', '\u2a70', '\u2a6f', '\u2a6f', '\u224a', '\u224a', '\u224b', '\u224b', '\u2248', '\u2248', '\u224a', '\u224a', '\u00e5', '\u00e5', '\U0001d4b6', '\U0001d4b6', '\u002a', '\u002a', '\u2248', '\u2248', '\u224d', '\u224d', '\u00e3', '\u00e3', '\u00e4', 8639 '\u00e4', '\u2233', '\u2233', '\u2a11', '\u2a11', '\u2aed', '\u2aed', '\u224c', '\u224c', '\u03f6', '\u03f6', '\u2035', '\u2035', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u22bd', '\u22bd', '\u2305', '\u2305', '\u2305', '\u2305', '\u23b5', '\u23b5', '\u23b6', '\u23b6', '\u224c', '\u224c', '\u0431', 8640 '\u0431', '\u201e', '\u201e', '\u2235', '\u2235', '\u2235', '\u2235', '\u29b0', '\u29b0', '\u03f6', '\u03f6', '\u212c', '\u212c', '\u03b2', '\u03b2', '\u2136', '\u2136', '\u226c', '\u226c', '\U0001d51f', '\U0001d51f', '\u22c2', '\u22c2', '\u25ef', '\u25ef', '\u22c3', '\u22c3', '\u2a00', '\u2a00', '\u2a01', '\u2a01', '\u2a02', '\u2a02', 8641 '\u2a06', '\u2a06', '\u2605', '\u2605', '\u25bd', '\u25bd', '\u25b3', '\u25b3', '\u2a04', '\u2a04', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u290d', '\u290d', '\u29eb', '\u29eb', '\u25aa', '\u25aa', '\u25b4', '\u25b4', '\u25be', 8642 '\u25be', '\u25c2', '\u25c2', '\u25b8', '\u25b8', '\u2423', '\u2423', '\u2592', '\u2592', '\u2591', '\u2591', '\u2593', '\u2593', '\u2588', '\u2588', '\u2310', '\u2310', '\U0001d553', '\U0001d553', '\u22a5', '\u22a5', '\u22a5', '\u22a5', '\u22c8', '\u22c8', '\u2557', '\u2557', '\u2554', '\u2554', '\u2556', 8643 '\u2556', '\u2553', '\u2553', '\u2550', '\u2550', '\u2566', '\u2566', '\u2569', '\u2569', '\u2564', '\u2564', '\u2567', '\u2567', '\u255d', '\u255d', '\u255a', '\u255a', '\u255c', '\u255c', '\u2559', '\u2559', '\u2551', '\u2551', '\u256c', '\u256c', '\u2563', '\u2563', '\u2560', '\u2560', '\u256b', '\u256b', '\u2562', '\u2562', '\u255f', '\u255f', '\u29c9', 8644 '\u29c9', '\u2555', '\u2555', '\u2552', '\u2552', '\u2510', '\u2510', '\u250c', '\u250c', '\u2500', '\u2500', '\u2565', '\u2565', '\u2568', '\u2568', '\u252c', '\u252c', '\u2534', '\u2534', '\u229f', '\u229f', '\u229e', '\u229e', '\u22a0', '\u22a0', '\u255b', '\u255b', '\u2558', '\u2558', '\u2518', '\u2518', '\u2514', '\u2514', '\u2502', 8645 '\u2502', '\u256a', '\u256a', '\u2561', '\u2561', '\u255e', '\u255e', '\u253c', '\u253c', '\u2524', '\u2524', '\u251c', '\u251c', '\u2035', '\u2035', '\u02d8', '\u02d8', '\u00a6', '\u00a6', '\U0001d4b7', '\U0001d4b7', '\u204f', '\u204f', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u005c', '\u005c', '\u29c5', '\u29c5', '\u27c8', '\u27c8', '\u2022', '\u2022', '\u2022', 8646 '\u2022', '\u224e', '\u224e', '\u2aae', '\u2aae', '\u224f', '\u224f', '\u224f', '\u224f', '\u0107', '\u0107', '\u2229', '\u2229', '\u2a44', '\u2a44', '\u2a49', '\u2a49', '\u2a4b', '\u2a4b', '\u2a47', '\u2a47', '\u2a40', '\u2a40', '\u2041', '\u2041', '\u02c7', '\u02c7', '\u2a4d', '\u2a4d', '\u010d', '\u010d', '\u00e7', '\u00e7', '\u0109', 8647 '\u0109', '\u2a4c', '\u2a4c', '\u2a50', '\u2a50', '\u010b', '\u010b', '\u00b8', '\u00b8', '\u29b2', '\u29b2', '\u00a2', '\u00a2', '\u00b7', '\u00b7', '\U0001d520', '\U0001d520', '\u0447', '\u0447', '\u2713', '\u2713', '\u2713', '\u2713', '\u03c7', '\u03c7', '\u25cb', '\u25cb', '\u29c3', '\u29c3', '\u02c6', '\u02c6', '\u2257', '\u2257', '\u21ba', 8648 '\u21ba', '\u21bb', '\u21bb', '\u00ae', '\u00ae', '\u24c8', '\u24c8', '\u229b', '\u229b', '\u229a', '\u229a', '\u229d', '\u229d', '\u2257', '\u2257', '\u2a10', '\u2a10', '\u2aef', '\u2aef', '\u29c2', '\u29c2', '\u2663', '\u2663', '\u2663', '\u2663', '\u003a', 8649 '\u003a', '\u2254', '\u2254', '\u2254', '\u2254', '\u002c', '\u002c', '\u0040', '\u0040', '\u2201', '\u2201', '\u2218', '\u2218', '\u2201', '\u2201', '\u2102', '\u2102', '\u2245', '\u2245', '\u2a6d', '\u2a6d', '\u222e', '\u222e', '\U0001d554', '\U0001d554', '\u2210', '\u2210', '\u00a9', '\u00a9', '\u2117', '\u2117', '\u21b5', '\u21b5', 8650 '\u2717', '\u2717', '\U0001d4b8', '\U0001d4b8', '\u2acf', '\u2acf', '\u2ad1', '\u2ad1', '\u2ad0', '\u2ad0', '\u2ad2', '\u2ad2', '\u22ef', '\u22ef', '\u2938', '\u2938', '\u2935', '\u2935', '\u22de', '\u22de', '\u22df', '\u22df', '\u21b6', '\u21b6', '\u293d', '\u293d', '\u222a', '\u222a', '\u2a48', '\u2a48', '\u2a46', '\u2a46', '\u2a4a', '\u2a4a', 8651 '\u228d', '\u228d', '\u2a45', '\u2a45', '\u21b7', '\u21b7', '\u293c', '\u293c', '\u22de', '\u22de', '\u22df', '\u22df', '\u22ce', '\u22ce', '\u22cf', '\u22cf', '\u00a4', '\u00a4', '\u21b6', '\u21b6', '\u21b7', '\u21b7', '\u22ce', '\u22ce', '\u22cf', '\u22cf', 8652 '\u2232', '\u2232', '\u2231', '\u2231', '\u232d', '\u232d', '\u21d3', '\u21d3', '\u2965', '\u2965', '\u2020', '\u2020', '\u2138', '\u2138', '\u2193', '\u2193', '\u2010', '\u2010', '\u22a3', '\u22a3', '\u290f', '\u290f', '\u02dd', '\u02dd', '\u010f', '\u010f', '\u0434', '\u0434', '\u2146', '\u2146', '\u2021', '\u2021', '\u21ca', '\u21ca', '\u2a77', 8653 '\u2a77', '\u00b0', '\u00b0', '\u03b4', '\u03b4', '\u29b1', '\u29b1', '\u297f', '\u297f', '\U0001d521', '\U0001d521', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u22c4', '\u22c4', '\u22c4', '\u22c4', '\u2666', '\u2666', '\u2666', '\u2666', '\u00a8', '\u00a8', '\u03dd', '\u03dd', '\u22f2', '\u22f2', '\u00f7', '\u00f7', '\u00f7', '\u00f7', '\u22c7', 8654 '\u22c7', '\u22c7', '\u22c7', '\u0452', '\u0452', '\u231e', '\u231e', '\u230d', '\u230d', '\u0024', '\u0024', '\U0001d555', '\U0001d555', '\u02d9', '\u02d9', '\u2250', '\u2250', '\u2251', '\u2251', '\u2238', '\u2238', '\u2214', '\u2214', '\u22a1', '\u22a1', '\u2306', '\u2306', '\u2193', '\u2193', '\u21ca', 8655 '\u21ca', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u2910', '\u2910', '\u231f', '\u231f', '\u230c', '\u230c', '\U0001d4b9', '\U0001d4b9', '\u0455', '\u0455', '\u29f6', '\u29f6', '\u0111', '\u0111', '\u22f1', '\u22f1', '\u25bf', '\u25bf', '\u25be', '\u25be', '\u21f5', '\u21f5', '\u296f', '\u296f', '\u29a6', 8656 '\u29a6', '\u045f', '\u045f', '\u27ff', '\u27ff', '\u2a77', '\u2a77', '\u2251', '\u2251', '\u00e9', '\u00e9', '\u2a6e', '\u2a6e', '\u011b', '\u011b', '\u2256', '\u2256', '\u00ea', '\u00ea', '\u2255', '\u2255', '\u044d', '\u044d', '\u0117', '\u0117', '\u2147', '\u2147', '\u2252', '\u2252', '\U0001d522', '\U0001d522', '\u2a9a', '\u2a9a', '\u00e8', '\u00e8', '\u2a96', '\u2a96', '\u2a98', 8657 '\u2a98', '\u2a99', '\u2a99', '\u23e7', '\u23e7', '\u2113', '\u2113', '\u2a95', '\u2a95', '\u2a97', '\u2a97', '\u0113', '\u0113', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2003', '\u2003', '\u2004', '\u2004', '\u2005', '\u2005', '\u014b', '\u014b', '\u2002', '\u2002', '\u0119', '\u0119', '\U0001d556', '\U0001d556', '\u22d5', '\u22d5', '\u29e3', 8658 '\u29e3', '\u2a71', '\u2a71', '\u03b5', '\u03b5', '\u03b5', '\u03b5', '\u03f5', '\u03f5', '\u2256', '\u2256', '\u2255', '\u2255', '\u2242', '\u2242', '\u2a96', '\u2a96', '\u2a95', '\u2a95', '\u003d', '\u003d', '\u225f', '\u225f', '\u2261', '\u2261', '\u2a78', '\u2a78', '\u29e5', '\u29e5', '\u2253', '\u2253', 8659 '\u2971', '\u2971', '\u212f', '\u212f', '\u2250', '\u2250', '\u2242', '\u2242', '\u03b7', '\u03b7', '\u00f0', '\u00f0', '\u00eb', '\u00eb', '\u20ac', '\u20ac', '\u0021', '\u0021', '\u2203', '\u2203', '\u2130', '\u2130', '\u2147', '\u2147', '\u2252', '\u2252', '\u0444', '\u0444', '\u2640', '\u2640', '\ufb03', '\ufb03', '\ufb00', 8660 '\ufb00', '\ufb04', '\ufb04', '\U0001d523', '\U0001d523', '\ufb01', '\ufb01', '\u266d', '\u266d', '\ufb02', '\ufb02', '\u25b1', '\u25b1', '\u0192', '\u0192', '\U0001d557', '\U0001d557', '\u2200', '\u2200', '\u22d4', '\u22d4', '\u2ad9', '\u2ad9', '\u2a0d', '\u2a0d', '\u00bd', '\u00bd', '\u2153', '\u2153', '\u00bc', '\u00bc', '\u2155', '\u2155', '\u2159', '\u2159', 8661 '\u215b', '\u215b', '\u2154', '\u2154', '\u2156', '\u2156', '\u00be', '\u00be', '\u2157', '\u2157', '\u215c', '\u215c', '\u2158', '\u2158', '\u215a', '\u215a', '\u215d', '\u215d', '\u215e', '\u215e', '\u2044', '\u2044', '\u2322', '\u2322', '\U0001d4bb', '\U0001d4bb', '\u2267', '\u2267', '\u2a8c', '\u2a8c', '\u01f5', '\u01f5', '\u03b3', '\u03b3', '\u03dd', 8662 '\u03dd', '\u2a86', '\u2a86', '\u011f', '\u011f', '\u011d', '\u011d', '\u0433', '\u0433', '\u0121', '\u0121', '\u2265', '\u2265', '\u22db', '\u22db', '\u2265', '\u2265', '\u2267', '\u2267', '\u2a7e', '\u2a7e', '\u2a7e', '\u2a7e', '\u2aa9', '\u2aa9', '\u2a80', '\u2a80', '\u2a82', '\u2a82', '\u2a84', '\u2a84', '\u2a94', '\u2a94', '\U0001d524', '\U0001d524', '\u226b', '\u226b', '\u22d9', 8663 '\u22d9', '\u2137', '\u2137', '\u0453', '\u0453', '\u2277', '\u2277', '\u2a92', '\u2a92', '\u2aa5', '\u2aa5', '\u2aa4', '\u2aa4', '\u2269', '\u2269', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a88', '\u2a88', '\u2a88', '\u2a88', '\u2269', '\u2269', '\u22e7', '\u22e7', '\U0001d558', '\U0001d558', '\u0060', '\u0060', '\u210a', '\u210a', '\u2273', '\u2273', '\u2a8e', '\u2a8e', '\u2a90', '\u2a90', '\u2aa7', 8664 '\u2aa7', '\u2a7a', '\u2a7a', '\u22d7', '\u22d7', '\u2995', '\u2995', '\u2a7c', '\u2a7c', '\u2a86', '\u2a86', '\u2978', '\u2978', '\u22d7', '\u22d7', '\u22db', '\u22db', '\u2a8c', '\u2a8c', '\u2277', '\u2277', '\u2273', '\u2273', '\u21d4', '\u21d4', '\u200a', '\u200a', '\u00bd', '\u00bd', '\u210b', '\u210b', 8665 '\u044a', '\u044a', '\u2194', '\u2194', '\u2948', '\u2948', '\u21ad', '\u21ad', '\u210f', '\u210f', '\u0125', '\u0125', '\u2665', '\u2665', '\u2665', '\u2665', '\u2026', '\u2026', '\u22b9', '\u22b9', '\U0001d525', '\U0001d525', '\u2925', '\u2925', '\u2926', '\u2926', '\u21ff', '\u21ff', '\u223b', '\u223b', '\u21a9', '\u21a9', 8666 '\u21aa', '\u21aa', '\U0001d559', '\U0001d559', '\u2015', '\u2015', '\U0001d4bd', '\U0001d4bd', '\u210f', '\u210f', '\u0127', '\u0127', '\u2043', '\u2043', '\u2010', '\u2010', '\u00ed', '\u00ed', '\u2063', '\u2063', '\u00ee', '\u00ee', '\u0438', '\u0438', '\u0435', '\u0435', '\u00a1', '\u00a1', '\u21d4', '\u21d4', '\U0001d526', '\U0001d526', '\u00ec', '\u00ec', '\u2148', 8667 '\u2148', '\u2a0c', '\u2a0c', '\u222d', '\u222d', '\u29dc', '\u29dc', '\u2129', '\u2129', '\u0133', '\u0133', '\u012b', '\u012b', '\u2111', '\u2111', '\u2110', '\u2110', '\u2111', '\u2111', '\u0131', '\u0131', '\u22b7', '\u22b7', '\u01b5', '\u01b5', '\u2208', '\u2208', '\u2105', '\u2105', '\u221e', '\u221e', '\u29dd', '\u29dd', '\u0131', 8668 '\u0131', '\u222b', '\u222b', '\u22ba', '\u22ba', '\u2124', '\u2124', '\u22ba', '\u22ba', '\u2a17', '\u2a17', '\u2a3c', '\u2a3c', '\u0451', '\u0451', '\u012f', '\u012f', '\U0001d55a', '\U0001d55a', '\u03b9', '\u03b9', '\u2a3c', '\u2a3c', '\u00bf', '\u00bf', '\U0001d4be', '\U0001d4be', '\u2208', '\u2208', '\u22f9', '\u22f9', '\u22f5', '\u22f5', '\u22f4', 8669 '\u22f4', '\u22f3', '\u22f3', '\u2208', '\u2208', '\u2062', '\u2062', '\u0129', '\u0129', '\u0456', '\u0456', '\u00ef', '\u00ef', '\u0135', '\u0135', '\u0439', '\u0439', '\U0001d527', '\U0001d527', '\u0237', '\u0237', '\U0001d55b', '\U0001d55b', '\U0001d4bf', '\U0001d4bf', '\u0458', '\u0458', '\u0454', '\u0454', '\u03ba', '\u03ba', '\u03f0', '\u03f0', '\u0137', '\u0137', '\u043a', '\u043a', '\U0001d528', 8670 '\U0001d528', '\u0138', '\u0138', '\u0445', '\u0445', '\u045c', '\u045c', '\U0001d55c', '\U0001d55c', '\U0001d4c0', '\U0001d4c0', '\u21da', '\u21da', '\u21d0', '\u21d0', '\u291b', '\u291b', '\u290e', '\u290e', '\u2266', '\u2266', '\u2a8b', '\u2a8b', '\u2962', '\u2962', '\u013a', '\u013a', '\u29b4', '\u29b4', '\u2112', '\u2112', '\u03bb', '\u03bb', '\u27e8', '\u27e8', '\u2991', '\u2991', 8671 '\u27e8', '\u27e8', '\u2a85', '\u2a85', '\u00ab', '\u00ab', '\u2190', '\u2190', '\u21e4', '\u21e4', '\u291f', '\u291f', '\u291d', '\u291d', '\u21a9', '\u21a9', '\u21ab', '\u21ab', '\u2939', '\u2939', '\u2973', '\u2973', '\u21a2', '\u21a2', '\u2aab', '\u2aab', '\u2919', '\u2919', '\u2aad', '\u2aad', '\u290c', '\u290c', '\u2772', '\u2772', '\u007b', 8672 '\u007b', '\u005b', '\u005b', '\u298b', '\u298b', '\u298f', '\u298f', '\u298d', '\u298d', '\u013e', '\u013e', '\u013c', '\u013c', '\u2308', '\u2308', '\u007b', '\u007b', '\u043b', '\u043b', '\u2936', '\u2936', '\u201c', '\u201c', '\u201e', '\u201e', '\u2967', '\u2967', '\u294b', '\u294b', '\u21b2', '\u21b2', '\u2264', '\u2264', '\u2190', 8673 '\u2190', '\u21a2', '\u21a2', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u21c7', '\u21c7', '\u2194', '\u2194', '\u21c6', '\u21c6', '\u21cb', '\u21cb', '\u21ad', '\u21ad', '\u22cb', 8674 '\u22cb', '\u22da', '\u22da', '\u2264', '\u2264', '\u2266', '\u2266', '\u2a7d', '\u2a7d', '\u2a7d', '\u2a7d', '\u2aa8', '\u2aa8', '\u2a7f', '\u2a7f', '\u2a81', '\u2a81', '\u2a83', '\u2a83', '\u2a93', '\u2a93', '\u2a85', '\u2a85', '\u22d6', '\u22d6', '\u22da', '\u22da', '\u2a8b', '\u2a8b', '\u2276', '\u2276', 8675 '\u2272', '\u2272', '\u297c', '\u297c', '\u230a', '\u230a', '\U0001d529', '\U0001d529', '\u2276', '\u2276', '\u2a91', '\u2a91', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u296a', '\u296a', '\u2584', '\u2584', '\u0459', '\u0459', '\u226a', '\u226a', '\u21c7', '\u21c7', '\u231e', '\u231e', '\u296b', '\u296b', '\u25fa', '\u25fa', '\u0140', '\u0140', '\u23b0', '\u23b0', 8676 '\u23b0', '\u23b0', '\u2268', '\u2268', '\u2a89', '\u2a89', '\u2a89', '\u2a89', '\u2a87', '\u2a87', '\u2a87', '\u2a87', '\u2268', '\u2268', '\u22e6', '\u22e6', '\u27ec', '\u27ec', '\u21fd', '\u21fd', '\u27e6', '\u27e6', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27fc', '\u27fc', '\u27f6', 8677 '\u27f6', '\u21ab', '\u21ab', '\u21ac', '\u21ac', '\u2985', '\u2985', '\U0001d55d', '\U0001d55d', '\u2a2d', '\u2a2d', '\u2a34', '\u2a34', '\u2217', '\u2217', '\u005f', '\u005f', '\u25ca', '\u25ca', '\u25ca', '\u25ca', '\u29eb', '\u29eb', '\u0028', '\u0028', '\u2993', '\u2993', '\u21c6', '\u21c6', '\u231f', 8678 '\u231f', '\u21cb', '\u21cb', '\u296d', '\u296d', '\u200e', '\u200e', '\u22bf', '\u22bf', '\u2039', '\u2039', '\U0001d4c1', '\U0001d4c1', '\u21b0', '\u21b0', '\u2272', '\u2272', '\u2a8d', '\u2a8d', '\u2a8f', '\u2a8f', '\u005b', '\u005b', '\u2018', '\u2018', '\u201a', '\u201a', '\u0142', '\u0142', '\u2aa6', '\u2aa6', '\u2a79', '\u2a79', '\u22d6', '\u22d6', '\u22cb', 8679 '\u22cb', '\u22c9', '\u22c9', '\u2976', '\u2976', '\u2a7b', '\u2a7b', '\u2996', '\u2996', '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u25c2', '\u25c2', '\u294a', '\u294a', '\u2966', '\u2966', '\u223a', '\u223a', '\u00af', '\u00af', '\u2642', '\u2642', '\u2720', '\u2720', '\u2720', '\u2720', '\u21a6', '\u21a6', '\u21a6', '\u21a6', '\u21a7', 8680 '\u21a7', '\u21a4', '\u21a4', '\u21a5', '\u21a5', '\u25ae', '\u25ae', '\u2a29', '\u2a29', '\u043c', '\u043c', '\u2014', '\u2014', '\u2221', '\u2221', '\U0001d52a', '\U0001d52a', '\u2127', '\u2127', '\u00b5', '\u00b5', '\u2223', '\u2223', '\u002a', '\u002a', '\u2af0', '\u2af0', '\u00b7', '\u00b7', '\u2212', '\u2212', '\u229f', 8681 '\u229f', '\u2238', '\u2238', '\u2a2a', '\u2a2a', '\u2adb', '\u2adb', '\u2026', '\u2026', '\u2213', '\u2213', '\u22a7', '\u22a7', '\U0001d55e', '\U0001d55e', '\u2213', '\u2213', '\U0001d4c2', '\U0001d4c2', '\u223e', '\u223e', '\u03bc', '\u03bc', '\u22b8', '\u22b8', '\u22b8', '\u22b8', '\u21cd', '\u21cd', '\u21ce', '\u21ce', '\u21cf', 8682 '\u21cf', '\u22af', '\u22af', '\u22ae', '\u22ae', '\u2207', '\u2207', '\u0144', '\u0144', '\u2249', '\u2249', '\u0149', '\u0149', '\u2249', '\u2249', '\u266e', '\u266e', '\u266e', '\u266e', '\u2115', '\u2115', '\u00a0', '\u00a0', '\u2a43', '\u2a43', '\u0148', '\u0148', '\u0146', '\u0146', '\u2247', '\u2247', '\u2a42', '\u2a42', '\u043d', 8683 '\u043d', '\u2013', '\u2013', '\u2260', '\u2260', '\u21d7', '\u21d7', '\u2924', '\u2924', '\u2197', '\u2197', '\u2197', '\u2197', '\u2262', '\u2262', '\u2928', '\u2928', '\u2204', '\u2204', '\u2204', '\u2204', '\U0001d52b', '\U0001d52b', '\u2271', '\u2271', '\u2271', '\u2271', '\u2275', '\u2275', '\u226f', '\u226f', '\u226f', '\u226f', '\u21ce', '\u21ce', '\u21ae', '\u21ae', 8684 '\u2af2', '\u2af2', '\u220b', '\u220b', '\u22fc', '\u22fc', '\u22fa', '\u22fa', '\u220b', '\u220b', '\u045a', '\u045a', '\u21cd', '\u21cd', '\u219a', '\u219a', '\u2025', '\u2025', '\u2270', '\u2270', '\u219a', '\u219a', '\u21ae', '\u21ae', '\u2270', '\u2270', '\u226e', '\u226e', '\u2274', '\u2274', '\u226e', '\u226e', '\u22ea', '\u22ea', '\u22ec', '\u22ec', 8685 '\u2224', '\u2224', '\U0001d55f', '\U0001d55f', '\u00ac', '\u00ac', '\u2209', '\u2209', '\u2209', '\u2209', '\u22f7', '\u22f7', '\u22f6', '\u22f6', '\u220c', '\u220c', '\u220c', '\u220c', '\u22fe', '\u22fe', '\u22fd', '\u22fd', '\u2226', '\u2226', '\u2226', '\u2226', '\u2a14', '\u2a14', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u2280', 8686 '\u2280', '\u21cf', '\u21cf', '\u219b', '\u219b', '\u219b', '\u219b', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u2281', '\u2281', '\u22e1', '\u22e1', '\U0001d4c3', '\U0001d4c3', '\u2224', '\u2224', '\u2226', '\u2226', '\u2241', '\u2241', '\u2244', '\u2244', '\u2244', '\u2244', '\u2224', '\u2224', '\u2226', '\u2226', '\u22e2', 8687 '\u22e2', '\u22e3', '\u22e3', '\u2284', '\u2284', '\u2288', '\u2288', '\u2288', '\u2288', '\u2281', '\u2281', '\u2285', '\u2285', '\u2289', '\u2289', '\u2289', '\u2289', '\u2279', '\u2279', '\u00f1', '\u00f1', '\u2278', '\u2278', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u22eb', '\u22eb', 8688 '\u22ed', '\u22ed', '\u03bd', '\u03bd', '\u0023', '\u0023', '\u2116', '\u2116', '\u2007', '\u2007', '\u22ad', '\u22ad', '\u2904', '\u2904', '\u22ac', '\u22ac', '\u29de', '\u29de', '\u2902', '\u2902', '\u2903', '\u2903', '\u21d6', '\u21d6', '\u2923', '\u2923', '\u2196', '\u2196', '\u2196', '\u2196', '\u2927', '\u2927', 8689 '\u24c8', '\u24c8', '\u00f3', '\u00f3', '\u229b', '\u229b', '\u229a', '\u229a', '\u00f4', '\u00f4', '\u043e', '\u043e', '\u229d', '\u229d', '\u0151', '\u0151', '\u2a38', '\u2a38', '\u2299', '\u2299', '\u29bc', '\u29bc', '\u0153', '\u0153', '\u29bf', '\u29bf', '\U0001d52c', '\U0001d52c', '\u02db', '\u02db', '\u00f2', '\u00f2', '\u29c1', '\u29c1', '\u29b5', '\u29b5', '\u03a9', '\u03a9', '\u222e', 8690 '\u222e', '\u21ba', '\u21ba', '\u29be', '\u29be', '\u29bb', '\u29bb', '\u203e', '\u203e', '\u29c0', '\u29c0', '\u014d', '\u014d', '\u03c9', '\u03c9', '\u03bf', '\u03bf', '\u29b6', '\u29b6', '\u2296', '\u2296', '\U0001d560', '\U0001d560', '\u29b7', '\u29b7', '\u29b9', '\u29b9', '\u2295', '\u2295', '\u2228', '\u2228', '\u21bb', '\u21bb', '\u2a5d', '\u2a5d', '\u2134', '\u2134', 8691 '\u2134', '\u2134', '\u00aa', '\u00aa', '\u00ba', '\u00ba', '\u22b6', '\u22b6', '\u2a56', '\u2a56', '\u2a57', '\u2a57', '\u2a5b', '\u2a5b', '\u2134', '\u2134', '\u00f8', '\u00f8', '\u2298', '\u2298', '\u00f5', '\u00f5', '\u2297', '\u2297', '\u2a36', '\u2a36', '\u00f6', '\u00f6', '\u233d', '\u233d', '\u2225', '\u2225', '\u00b6', '\u00b6', '\u2225', '\u2225', 8692 '\u2af3', '\u2af3', '\u2afd', '\u2afd', '\u2202', '\u2202', '\u043f', '\u043f', '\u0025', '\u0025', '\u002e', '\u002e', '\u2030', '\u2030', '\u22a5', '\u22a5', '\u2031', '\u2031', '\U0001d52d', '\U0001d52d', '\u03c6', '\u03c6', '\u03d5', '\u03d5', '\u2133', '\u2133', '\u260e', '\u260e', '\u03c0', '\u03c0', '\u22d4', '\u22d4', '\u03d6', '\u03d6', '\u210f', '\u210f', 8693 '\u210e', '\u210e', '\u210f', '\u210f', '\u002b', '\u002b', '\u2a23', '\u2a23', '\u229e', '\u229e', '\u2a22', '\u2a22', '\u2214', '\u2214', '\u2a25', '\u2a25', '\u2a72', '\u2a72', '\u00b1', '\u00b1', '\u2a26', '\u2a26', '\u2a27', '\u2a27', '\u00b1', '\u00b1', '\u2a15', '\u2a15', '\U0001d561', '\U0001d561', '\u00a3', '\u00a3', '\u227a', 8694 '\u227a', '\u2ab3', '\u2ab3', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u227a', '\u227a', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u2ab9', '\u2ab9', '\u2ab5', '\u2ab5', '\u22e8', '\u22e8', '\u227e', '\u227e', '\u2032', '\u2032', '\u2119', '\u2119', '\u2ab5', '\u2ab5', '\u2ab9', 8695 '\u2ab9', '\u22e8', '\u22e8', '\u220f', '\u220f', '\u232e', '\u232e', '\u2312', '\u2312', '\u2313', '\u2313', '\u221d', '\u221d', '\u221d', '\u221d', '\u227e', '\u227e', '\u22b0', '\u22b0', '\U0001d4c5', '\U0001d4c5', '\u03c8', '\u03c8', '\u2008', '\u2008', '\U0001d52e', '\U0001d52e', '\u2a0c', '\u2a0c', '\U0001d562', '\U0001d562', '\u2057', '\u2057', '\U0001d4c6', '\U0001d4c6', 8696 '\u210d', '\u210d', '\u2a16', '\u2a16', '\u003f', '\u003f', '\u225f', '\u225f', '\u21db', '\u21db', '\u21d2', '\u21d2', '\u291c', '\u291c', '\u290f', '\u290f', '\u2964', '\u2964', '\u0155', '\u0155', '\u221a', '\u221a', '\u29b3', '\u29b3', '\u27e9', '\u27e9', '\u2992', '\u2992', '\u29a5', '\u29a5', '\u27e9', '\u27e9', '\u00bb', 8697 '\u00bb', '\u2192', '\u2192', '\u2975', '\u2975', '\u21e5', '\u21e5', '\u2920', '\u2920', '\u2933', '\u2933', '\u291e', '\u291e', '\u21aa', '\u21aa', '\u21ac', '\u21ac', '\u2945', '\u2945', '\u2974', '\u2974', '\u21a3', '\u21a3', '\u219d', '\u219d', '\u291a', '\u291a', '\u2236', '\u2236', '\u211a', '\u211a', '\u290d', '\u290d', 8698 '\u2773', '\u2773', '\u007d', '\u007d', '\u005d', '\u005d', '\u298c', '\u298c', '\u298e', '\u298e', '\u2990', '\u2990', '\u0159', '\u0159', '\u0157', '\u0157', '\u2309', '\u2309', '\u007d', '\u007d', '\u0440', '\u0440', '\u2937', '\u2937', '\u2969', '\u2969', '\u201d', '\u201d', '\u201d', '\u201d', '\u21b3', '\u21b3', '\u211c', '\u211c', '\u211b', 8699 '\u211b', '\u211c', '\u211c', '\u211d', '\u211d', '\u25ad', '\u25ad', '\u00ae', '\u00ae', '\u297d', '\u297d', '\u230b', '\u230b', '\U0001d52f', '\U0001d52f', '\u21c1', '\u21c1', '\u21c0', '\u21c0', '\u296c', '\u296c', '\u03c1', '\u03c1', '\u03f1', '\u03f1', '\u2192', '\u2192', '\u21a3', '\u21a3', '\u21c1', '\u21c1', 8700 '\u21c0', '\u21c0', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u21c9', '\u21c9', '\u219d', '\u219d', '\u22cc', '\u22cc', '\u02da', '\u02da', '\u2253', '\u2253', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u200f', 8701 '\u200f', '\u23b1', '\u23b1', '\u23b1', '\u23b1', '\u2aee', '\u2aee', '\u27ed', '\u27ed', '\u21fe', '\u21fe', '\u27e7', '\u27e7', '\u2986', '\u2986', '\U0001d563', '\U0001d563', '\u2a2e', '\u2a2e', '\u2a35', '\u2a35', '\u0029', '\u0029', '\u2994', '\u2994', '\u2a12', '\u2a12', '\u21c9', '\u21c9', '\u203a', '\u203a', '\U0001d4c7', '\U0001d4c7', '\u21b1', 8702 '\u21b1', '\u005d', '\u005d', '\u2019', '\u2019', '\u2019', '\u2019', '\u22cc', '\u22cc', '\u22ca', '\u22ca', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25b8', '\u25b8', '\u29ce', '\u29ce', '\u2968', '\u2968', '\u211e', '\u211e', '\u015b', '\u015b', '\u201a', '\u201a', '\u227b', '\u227b', '\u2ab4', '\u2ab4', '\u2ab8', '\u2ab8', '\u0161', '\u0161', '\u227d', 8703 '\u227d', '\u2ab0', '\u2ab0', '\u015f', '\u015f', '\u015d', '\u015d', '\u2ab6', '\u2ab6', '\u2aba', '\u2aba', '\u22e9', '\u22e9', '\u2a13', '\u2a13', '\u227f', '\u227f', '\u0441', '\u0441', '\u22c5', '\u22c5', '\u22a1', '\u22a1', '\u2a66', '\u2a66', '\u21d8', '\u21d8', '\u2925', '\u2925', '\u2198', '\u2198', '\u2198', '\u2198', '\u00a7', '\u00a7', '\u003b', 8704 '\u003b', '\u2929', '\u2929', '\u2216', '\u2216', '\u2216', '\u2216', '\u2736', '\u2736', '\U0001d530', '\U0001d530', '\u2322', '\u2322', '\u266f', '\u266f', '\u0449', '\u0449', '\u0448', '\u0448', '\u2223', '\u2223', '\u2225', '\u2225', '\u00ad', '\u00ad', '\u03c3', '\u03c3', '\u03c2', '\u03c2', '\u03c2', '\u03c2', '\u223c', '\u223c', '\u2a6a', 8705 '\u2a6a', '\u2243', '\u2243', '\u2243', '\u2243', '\u2a9e', '\u2a9e', '\u2aa0', '\u2aa0', '\u2a9d', '\u2a9d', '\u2a9f', '\u2a9f', '\u2246', '\u2246', '\u2a24', '\u2a24', '\u2972', '\u2972', '\u2190', '\u2190', '\u2216', '\u2216', '\u2a33', '\u2a33', '\u29e4', '\u29e4', '\u2223', '\u2223', '\u2323', '\u2323', '\u2aaa', '\u2aaa', '\u2aac', 8706 '\u2aac', '\u044c', '\u044c', '\u002f', '\u002f', '\u29c4', '\u29c4', '\u233f', '\u233f', '\U0001d564', '\U0001d564', '\u2660', '\u2660', '\u2660', '\u2660', '\u2225', '\u2225', '\u2293', '\u2293', '\u2294', '\u2294', '\u228f', '\u228f', '\u2291', '\u2291', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', '\u2292', '\u2292', 8707 '\u2290', '\u2290', '\u2292', '\u2292', '\u25a1', '\u25a1', '\u25a1', '\u25a1', '\u25aa', '\u25aa', '\u25aa', '\u25aa', '\u2192', '\u2192', '\U0001d4c8', '\U0001d4c8', '\u2216', '\u2216', '\u2323', '\u2323', '\u22c6', '\u22c6', '\u2606', '\u2606', '\u2605', '\u2605', '\u03f5', '\u03f5', '\u03d5', '\u03d5', '\u00af', 8708 '\u00af', '\u2282', '\u2282', '\u2ac5', '\u2ac5', '\u2abd', '\u2abd', '\u2286', '\u2286', '\u2ac3', '\u2ac3', '\u2ac1', '\u2ac1', '\u2acb', '\u2acb', '\u228a', '\u228a', '\u2abf', '\u2abf', '\u2979', '\u2979', '\u2282', '\u2282', '\u2286', '\u2286', '\u2ac5', '\u2ac5', '\u228a', '\u228a', '\u2acb', '\u2acb', 8709 '\u2ac7', '\u2ac7', '\u2ad5', '\u2ad5', '\u2ad3', '\u2ad3', '\u227b', '\u227b', '\u2ab8', '\u2ab8', '\u227d', '\u227d', '\u2ab0', '\u2ab0', '\u2aba', '\u2aba', '\u2ab6', '\u2ab6', '\u22e9', '\u22e9', '\u227f', '\u227f', '\u2211', '\u2211', '\u266a', '\u266a', '\u2283', '\u2283', '\u00b9', '\u00b9', '\u00b2', 8710 '\u00b2', '\u00b3', '\u00b3', '\u2ac6', '\u2ac6', '\u2abe', '\u2abe', '\u2ad8', '\u2ad8', '\u2287', '\u2287', '\u2ac4', '\u2ac4', '\u27c9', '\u27c9', '\u2ad7', '\u2ad7', '\u297b', '\u297b', '\u2ac2', '\u2ac2', '\u2acc', '\u2acc', '\u228b', '\u228b', '\u2ac0', '\u2ac0', '\u2283', '\u2283', '\u2287', '\u2287', '\u2ac6', 8711 '\u2ac6', '\u228b', '\u228b', '\u2acc', '\u2acc', '\u2ac8', '\u2ac8', '\u2ad4', '\u2ad4', '\u2ad6', '\u2ad6', '\u21d9', '\u21d9', '\u2926', '\u2926', '\u2199', '\u2199', '\u2199', '\u2199', '\u292a', '\u292a', '\u00df', '\u00df', '\u2316', '\u2316', '\u03c4', '\u03c4', '\u23b4', '\u23b4', '\u0165', '\u0165', '\u0163', 8712 '\u0163', '\u0442', '\u0442', '\u20db', '\u20db', '\u2315', '\u2315', '\U0001d531', '\U0001d531', '\u2234', '\u2234', '\u2234', '\u2234', '\u03b8', '\u03b8', '\u03d1', '\u03d1', '\u03d1', '\u03d1', '\u2248', '\u2248', '\u223c', '\u223c', '\u2009', '\u2009', '\u2248', '\u2248', '\u223c', '\u223c', '\u00fe', '\u00fe', '\u02dc', 8713 '\u02dc', '\u00d7', '\u00d7', '\u22a0', '\u22a0', '\u2a31', '\u2a31', '\u2a30', '\u2a30', '\u222d', '\u222d', '\u2928', '\u2928', '\u22a4', '\u22a4', '\u2336', '\u2336', '\u2af1', '\u2af1', '\U0001d565', '\U0001d565', '\u2ada', '\u2ada', '\u2929', '\u2929', '\u2034', '\u2034', '\u2122', '\u2122', '\u25b5', '\u25b5', '\u25bf', '\u25bf', 8714 '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u225c', '\u225c', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25ec', '\u25ec', '\u225c', '\u225c', '\u2a3a', '\u2a3a', '\u2a39', '\u2a39', '\u29cd', '\u29cd', '\u2a3b', '\u2a3b', '\u23e2', '\u23e2', '\U0001d4c9', 8715 '\U0001d4c9', '\u0446', '\u0446', '\u045b', '\u045b', '\u0167', '\u0167', '\u226c', '\u226c', '\u219e', '\u219e', '\u21a0', '\u21a0', '\u21d1', '\u21d1', '\u2963', '\u2963', '\u00fa', '\u00fa', '\u2191', '\u2191', '\u045e', '\u045e', '\u016d', '\u016d', '\u00fb', '\u00fb', '\u0443', '\u0443', '\u21c5', '\u21c5', '\u0171', 8716 '\u0171', '\u296e', '\u296e', '\u297e', '\u297e', '\U0001d532', '\U0001d532', '\u00f9', '\u00f9', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u2580', '\u2580', '\u231c', '\u231c', '\u231c', '\u231c', '\u230f', '\u230f', '\u25f8', '\u25f8', '\u016b', '\u016b', '\u00a8', '\u00a8', '\u0173', '\u0173', '\U0001d566', '\U0001d566', '\u2191', '\u2191', '\u2195', 8717 '\u2195', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u228e', '\u228e', '\u03c5', '\u03c5', '\u03d2', '\u03d2', '\u03c5', '\u03c5', '\u21c8', '\u21c8', '\u231d', '\u231d', '\u231d', '\u231d', '\u230e', '\u230e', '\u016f', '\u016f', '\u25f9', '\u25f9', '\U0001d4ca', '\U0001d4ca', '\u22f0', '\u22f0', 8718 '\u0169', '\u0169', '\u25b5', '\u25b5', '\u25b4', '\u25b4', '\u21c8', '\u21c8', '\u00fc', '\u00fc', '\u29a7', '\u29a7', '\u21d5', '\u21d5', '\u2ae8', '\u2ae8', '\u2ae9', '\u2ae9', '\u22a8', '\u22a8', '\u299c', '\u299c', '\u03f5', '\u03f5', '\u03f0', '\u03f0', '\u2205', '\u2205', '\u03d5', '\u03d5', '\u03d6', '\u03d6', '\u221d', 8719 '\u221d', '\u2195', '\u2195', '\u03f1', '\u03f1', '\u03c2', '\u03c2', '\u03d1', '\u03d1', '\u22b2', '\u22b2', '\u22b3', '\u22b3', '\u0432', '\u0432', '\u22a2', '\u22a2', '\u2228', '\u2228', '\u22bb', '\u22bb', '\u225a', '\u225a', '\u22ee', '\u22ee', '\u007c', '\u007c', '\u007c', '\u007c', '\U0001d533', 8720 '\U0001d533', '\u22b2', '\u22b2', '\U0001d567', '\U0001d567', '\u221d', '\u221d', '\u22b3', '\u22b3', '\U0001d4cb', '\U0001d4cb', '\u299a', '\u299a', '\u0175', '\u0175', '\u2a5f', '\u2a5f', '\u2227', '\u2227', '\u2259', '\u2259', '\u2118', '\u2118', '\U0001d534', '\U0001d534', '\U0001d568', '\U0001d568', '\u2118', '\u2118', '\u2240', '\u2240', '\u2240', '\u2240', '\U0001d4cc', '\U0001d4cc', '\u22c2', '\u22c2', '\u25ef', 8721 '\u25ef', '\u22c3', '\u22c3', '\u25bd', '\u25bd', '\U0001d535', '\U0001d535', '\u27fa', '\u27fa', '\u27f7', '\u27f7', '\u03be', '\u03be', '\u27f8', '\u27f8', '\u27f5', '\u27f5', '\u27fc', '\u27fc', '\u22fb', '\u22fb', '\u2a00', '\u2a00', '\U0001d569', '\U0001d569', '\u2a01', '\u2a01', '\u2a02', '\u2a02', '\u27f9', '\u27f9', '\u27f6', '\u27f6', '\U0001d4cd', '\U0001d4cd', '\u2a06', '\u2a06', '\u2a04', 8722 '\u2a04', '\u25b3', '\u25b3', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u00fd', '\u00fd', '\u044f', '\u044f', '\u0177', '\u0177', '\u044b', '\u044b', '\u00a5', '\u00a5', '\U0001d536', '\U0001d536', '\u0457', '\u0457', '\U0001d56a', '\U0001d56a', '\U0001d4ce', '\U0001d4ce', '\u044e', '\u044e', '\u00ff', '\u00ff', '\u017a', '\u017a', '\u017e', '\u017e', '\u0437', '\u0437', '\u017c', '\u017c', '\u2128', 8723 '\u2128', '\u03b6', '\u03b6', '\U0001d537', '\U0001d537', '\u0436', '\u0436', '\u21dd', '\u21dd', '\U0001d56b', '\U0001d56b', '\U0001d4cf', '\U0001d4cf', '\u200d', '\u200d', '\u200c', '\u200c', ]; 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 // dom event support, if you want to use it 8748 8749 /// used for DOM events 8750 version(dom_with_events) 8751 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8752 8753 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8754 version(dom_with_events) 8755 class Event { 8756 this(string eventName, Element target) { 8757 this.eventName = eventName; 8758 this.srcElement = target; 8759 } 8760 8761 /// Prevents the default event handler (if there is one) from being called 8762 void preventDefault() { 8763 defaultPrevented = true; 8764 } 8765 8766 /// Stops the event propagation immediately. 8767 void stopPropagation() { 8768 propagationStopped = true; 8769 } 8770 8771 bool defaultPrevented; 8772 bool propagationStopped; 8773 string eventName; 8774 8775 Element srcElement; 8776 alias srcElement target; 8777 8778 Element relatedTarget; 8779 8780 int clientX; 8781 int clientY; 8782 8783 int button; 8784 8785 bool isBubbling; 8786 8787 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8788 void send() { 8789 if(srcElement is null) 8790 return; 8791 8792 auto e = srcElement; 8793 8794 if(eventName in e.bubblingEventHandlers) 8795 foreach(handler; e.bubblingEventHandlers[eventName]) 8796 handler(e, this); 8797 8798 if(!defaultPrevented) 8799 if(eventName in e.defaultEventHandlers) 8800 e.defaultEventHandlers[eventName](e, this); 8801 } 8802 8803 /// this dispatches the element using the capture -> target -> bubble process 8804 void dispatch() { 8805 if(srcElement is null) 8806 return; 8807 8808 // first capture, then bubble 8809 8810 Element[] chain; 8811 Element curr = srcElement; 8812 while(curr) { 8813 auto l = curr; 8814 chain ~= l; 8815 curr = curr.parentNode; 8816 8817 } 8818 8819 isBubbling = false; 8820 8821 foreach(e; Retro!Element(chain)) { 8822 if(eventName in e.capturingEventHandlers) 8823 foreach(handler; e.capturingEventHandlers[eventName]) 8824 handler(e, this); 8825 8826 // the default on capture should really be to always do nothing 8827 8828 //if(!defaultPrevented) 8829 // if(eventName in e.defaultEventHandlers) 8830 // e.defaultEventHandlers[eventName](e.element, this); 8831 8832 if(propagationStopped) 8833 break; 8834 } 8835 8836 isBubbling = true; 8837 if(!propagationStopped) 8838 foreach(e; chain) { 8839 if(eventName in e.bubblingEventHandlers) 8840 foreach(handler; e.bubblingEventHandlers[eventName]) 8841 handler(e, this); 8842 8843 if(propagationStopped) 8844 break; 8845 } 8846 8847 if(!defaultPrevented) 8848 foreach(e; chain) { 8849 if(eventName in e.defaultEventHandlers) 8850 e.defaultEventHandlers[eventName](e, this); 8851 } 8852 } 8853 } 8854 8855 struct FormFieldOptions { 8856 // usable for any 8857 8858 /// this is a regex pattern used to validate the field 8859 string pattern; 8860 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8861 bool isRequired; 8862 /// this is displayed as an example to the user 8863 string placeholder; 8864 8865 // usable for numeric ones 8866 8867 8868 // convenience methods to quickly get some options 8869 @property static FormFieldOptions none() { 8870 FormFieldOptions f; 8871 return f; 8872 } 8873 8874 static FormFieldOptions required() { 8875 FormFieldOptions f; 8876 f.isRequired = true; 8877 return f; 8878 } 8879 8880 static FormFieldOptions regex(string pattern, bool required = false) { 8881 FormFieldOptions f; 8882 f.pattern = pattern; 8883 f.isRequired = required; 8884 return f; 8885 } 8886 8887 static FormFieldOptions fromElement(Element e) { 8888 FormFieldOptions f; 8889 if(e.hasAttribute("required")) 8890 f.isRequired = true; 8891 if(e.hasAttribute("pattern")) 8892 f.pattern = e.pattern; 8893 if(e.hasAttribute("placeholder")) 8894 f.placeholder = e.placeholder; 8895 return f; 8896 } 8897 8898 Element applyToElement(Element e) { 8899 if(this.isRequired) 8900 e.required = "required"; 8901 if(this.pattern.length) 8902 e.pattern = this.pattern; 8903 if(this.placeholder.length) 8904 e.placeholder = this.placeholder; 8905 return e; 8906 } 8907 } 8908 8909 // this needs to look just like a string, but can expand as needed 8910 version(no_dom_stream) 8911 alias string Utf8Stream; 8912 else 8913 class Utf8Stream { 8914 protected: 8915 // these two should be overridden in subclasses to actually do the stream magic 8916 string getMore() { 8917 if(getMoreHelper !is null) 8918 return getMoreHelper(); 8919 return null; 8920 } 8921 8922 bool hasMore() { 8923 if(hasMoreHelper !is null) 8924 return hasMoreHelper(); 8925 return false; 8926 } 8927 // the rest should be ok 8928 8929 public: 8930 this(string d) { 8931 this.data = d; 8932 } 8933 8934 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8935 this.getMoreHelper = getMoreHelper; 8936 this.hasMoreHelper = hasMoreHelper; 8937 8938 if(hasMore()) 8939 this.data ~= getMore(); 8940 8941 // stdout.flush(); 8942 } 8943 8944 enum contextToKeep = 100; 8945 8946 void markDataDiscardable(size_t p) { 8947 8948 if(p < contextToKeep) 8949 return; 8950 p -= contextToKeep; 8951 8952 // pretends data[0 .. p] is gone and adjusts future things as if it was still there 8953 startingLineNumber = getLineNumber(p); 8954 assert(p >= virtualStartIndex); 8955 data = data[p - virtualStartIndex .. $]; 8956 virtualStartIndex = p; 8957 } 8958 8959 int getLineNumber(size_t p) { 8960 int line = startingLineNumber; 8961 assert(p >= virtualStartIndex); 8962 foreach(c; data[0 .. p - virtualStartIndex]) 8963 if(c == '\n') 8964 line++; 8965 return line; 8966 } 8967 8968 8969 @property final size_t length() { 8970 // the parser checks length primarily directly before accessing the next character 8971 // so this is the place we'll hook to append more if possible and needed. 8972 if(lastIdx + 1 >= (data.length + virtualStartIndex) && hasMore()) { 8973 data ~= getMore(); 8974 } 8975 return data.length + virtualStartIndex; 8976 } 8977 8978 final char opIndex(size_t idx) { 8979 if(idx > lastIdx) 8980 lastIdx = idx; 8981 return data[idx - virtualStartIndex]; 8982 } 8983 8984 final string opSlice(size_t start, size_t end) { 8985 if(end > lastIdx) 8986 lastIdx = end; 8987 // writeln(virtualStartIndex, " " , start, " ", end); 8988 assert(start >= virtualStartIndex); 8989 assert(end >= virtualStartIndex); 8990 return data[start - virtualStartIndex .. end - virtualStartIndex]; 8991 } 8992 8993 final size_t opDollar() { 8994 return length(); 8995 } 8996 8997 final Utf8Stream opBinary(string op : "~")(string s) { 8998 this.data ~= s; 8999 return this; 9000 } 9001 9002 final Utf8Stream opOpAssign(string op : "~")(string s) { 9003 this.data ~= s; 9004 return this; 9005 } 9006 9007 final Utf8Stream opAssign(string rhs) { 9008 this.data = rhs; 9009 return this; 9010 } 9011 private: 9012 string data; 9013 9014 size_t lastIdx; 9015 9016 bool delegate() hasMoreHelper; 9017 string delegate() getMoreHelper; 9018 9019 int startingLineNumber = 1; 9020 size_t virtualStartIndex = 0; 9021 9022 9023 /+ 9024 // used to maybe clear some old stuff 9025 // you might have to remove elements parsed with it too since they can hold slices into the 9026 // old stuff, preventing gc 9027 void dropFront(int bytes) { 9028 posAdjustment += bytes; 9029 data = data[bytes .. $]; 9030 } 9031 9032 int posAdjustment; 9033 +/ 9034 } 9035 9036 void fillForm(T)(Form form, T obj, string name) { 9037 import arsd.database; 9038 fillData((k, v) => form.setValue(k, v), obj, name); 9039 } 9040 9041 /++ 9042 Normalizes the whitespace in the given text according to HTML rules. 9043 9044 History: 9045 Added March 25, 2022 (dub v10.8) 9046 9047 The `stripLeadingAndTrailing` argument was added September 13, 2024 (dub v11.6). 9048 +/ 9049 string normalizeWhitespace(string text, bool stripLeadingAndTrailing = true) { 9050 string ret; 9051 ret.reserve(text.length); 9052 bool lastWasWhite = stripLeadingAndTrailing; 9053 foreach(char ch; text) { 9054 if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 9055 if(lastWasWhite) 9056 continue; 9057 lastWasWhite = true; 9058 ch = ' '; 9059 } else { 9060 lastWasWhite = false; 9061 } 9062 9063 ret ~= ch; 9064 } 9065 9066 if(stripLeadingAndTrailing) 9067 return ret.stripRight; 9068 else { 9069 /+ 9070 if(lastWasWhite && (ret.length == 0 || ret[$-1] != ' ')) 9071 ret ~= ' '; 9072 +/ 9073 return ret; 9074 } 9075 } 9076 9077 unittest { 9078 assert(normalizeWhitespace(" foo ") == "foo"); 9079 assert(normalizeWhitespace(" f\n \t oo ") == "f oo"); 9080 assert(normalizeWhitespace(" foo ", false) == " foo "); 9081 assert(normalizeWhitespace(" foo ", false) == " foo "); 9082 assert(normalizeWhitespace("\nfoo", false) == " foo"); 9083 } 9084 9085 unittest { 9086 Document document; 9087 9088 document = new Document("<test> foo \r </test>"); 9089 assert(document.root.visibleText == "foo"); 9090 9091 document = new Document("<test> foo \r <br>hi</test>"); 9092 assert(document.root.visibleText == "foo\nhi"); 9093 9094 document = new Document("<test> foo \r <br>hi<pre>hi\nthere\n indent<br />line</pre></test>"); 9095 assert(document.root.visibleText == "foo\nhihi\nthere\n indent\nline", document.root.visibleText); 9096 } 9097 9098 /+ 9099 /+ 9100 Syntax: 9101 9102 Tag: tagname#id.class 9103 Tree: Tag(Children, comma, separated...) 9104 Children: Tee or Variable 9105 Variable: $varname with optional |funcname following. 9106 9107 If a variable has a tree after it, it breaks the variable down: 9108 * if array, foreach it does the tree 9109 * if struct, it breaks down the member variables 9110 9111 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 9112 +/ 9113 struct Stringplate { 9114 /++ 9115 9116 +/ 9117 this(string s) { 9118 9119 } 9120 9121 /++ 9122 9123 +/ 9124 Element expand(T...)(T vars) { 9125 return null; 9126 } 9127 } 9128 /// 9129 unittest { 9130 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 9131 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 9132 } 9133 +/ 9134 9135 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 9136 foreach(child; children) { 9137 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 9138 // cool 9139 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 9140 // cool, this is an inline element and none of its children contradict that 9141 } else { 9142 // prolly block 9143 return false; 9144 } 9145 } 9146 return true; 9147 } 9148 9149 private bool isSimpleWhite(dchar c) { 9150 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 9151 } 9152 9153 unittest { 9154 // Test for issue #120 9155 string s = `<html> 9156 <body> 9157 <P>AN 9158 <P>bubbles</P> 9159 <P>giggles</P> 9160 </body> 9161 </html>`; 9162 auto doc = new Document(); 9163 doc.parseUtf8(s, false, false); 9164 auto s2 = doc.toString(); 9165 assert( 9166 s2.indexOf("bubbles") < s2.indexOf("giggles"), 9167 "paragraph order incorrect:\n" ~ s2); 9168 } 9169 9170 unittest { 9171 // test for suncarpet email dec 24 2019 9172 // arbitrary id asduiwh 9173 auto document = new Document("<html> 9174 <head> 9175 <meta charset=\"utf-8\"></meta> 9176 <title>Element.querySelector Test</title> 9177 </head> 9178 <body> 9179 <div id=\"foo\"> 9180 <div>Foo</div> 9181 <div>Bar</div> 9182 </div> 9183 <div id=\"empty\"></div> 9184 <div id=\"empty-but-text\">test</div> 9185 </body> 9186 </html>"); 9187 9188 auto doc = document; 9189 9190 { 9191 auto empty = doc.requireElementById("empty"); 9192 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9193 } 9194 { 9195 auto empty = doc.requireElementById("empty-but-text"); 9196 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9197 } 9198 9199 assert(doc.querySelectorAll("div div").length == 2); 9200 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 9201 assert(doc.querySelectorAll("> html").length == 0); 9202 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 9203 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 9204 9205 9206 assert(doc.root.matches("html")); 9207 assert(!doc.root.matches("nothtml")); 9208 assert(doc.querySelector("#foo > div").matches("div")); 9209 assert(doc.querySelector("body > #foo").matches("#foo")); 9210 9211 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 9212 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 9213 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 9214 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 9215 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 9216 9217 // also confirming the querySelector works via the mdn definition 9218 auto foo = doc.requireSelector("#foo"); 9219 assert(foo.querySelector("#foo > div") !is null); 9220 assert(foo.querySelector("body #foo > div") !is null); 9221 9222 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 9223 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 9224 //assert(foo.querySelectorAll("#foo > div").length == 2); 9225 } 9226 9227 unittest { 9228 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 9229 auto document = new Document(`<article> 9230 <div id="div-01">Here is div-01 9231 <div id="div-02">Here is div-02 9232 <div id="div-03">Here is div-03</div> 9233 </div> 9234 </div> 9235 </article>`, true, true); 9236 9237 auto el = document.getElementById("div-03"); 9238 assert(el.closest("#div-02").id == "div-02"); 9239 assert(el.closest("div div").id == "div-03"); 9240 assert(el.closest("article > div").id == "div-01"); 9241 assert(el.closest(":not(div)").tagName == "article"); 9242 9243 assert(el.closest("p") is null); 9244 assert(el.closest("p, div") is el); 9245 } 9246 9247 unittest { 9248 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 9249 auto document = new Document(`<test> 9250 <div class="foo"><p>cool</p><span>bar</span></div> 9251 <main><p>two</p></main> 9252 </test>`); 9253 9254 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 9255 assert(document.querySelector("div:where(.foo)") !is null); 9256 } 9257 9258 unittest { 9259 immutable string html = q{ 9260 <root> 9261 <div class="roundedbox"> 9262 <table> 9263 <caption class="boxheader">Recent Reviews</caption> 9264 <tr> 9265 <th>Game</th> 9266 <th>User</th> 9267 <th>Rating</th> 9268 <th>Created</th> 9269 </tr> 9270 9271 <tr> 9272 <td>June 13, 2020 15:10</td> 9273 <td><a href="/reviews/8833">[Show]</a></td> 9274 </tr> 9275 9276 <tr> 9277 <td>June 13, 2020 15:02</td> 9278 <td><a href="/reviews/8832">[Show]</a></td> 9279 </tr> 9280 9281 <tr> 9282 <td>June 13, 2020 14:41</td> 9283 <td><a href="/reviews/8831">[Show]</a></td> 9284 </tr> 9285 </table> 9286 </div> 9287 </root> 9288 }; 9289 9290 auto doc = new Document(cast(string)html); 9291 // this should select the second table row, but... 9292 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9293 assert(rd !is null); 9294 assert(rd.href == "/reviews/8832"); 9295 9296 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9297 assert(rd !is null); 9298 assert(rd.href == "/reviews/8832"); 9299 } 9300 9301 unittest { 9302 try { 9303 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9304 assert(0); 9305 } catch(Exception e) { 9306 // good; it should throw an exception, not an error. 9307 } 9308 } 9309 9310 unittest { 9311 // toPrettyString is not stable, but these are some best-effort attempts 9312 // despite these being in a test, I might change these anyway! 9313 assert(Element.make("a").toPrettyString == "<a></a>"); 9314 assert(Element.make("a", "").toPrettyString(false, 0, " ") == "<a></a>"); 9315 assert(Element.make("a", " ").toPrettyString(false, 0, " ") == "<a> </a>");//, Element.make("a", " ").toPrettyString(false, 0, " ")); 9316 assert(Element.make("a", "b").toPrettyString == "<a>b</a>"); 9317 assert(Element.make("a", "b").toPrettyString(false, 0, "") == "<a>b</a>"); 9318 9319 { 9320 auto document = new Document("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9321 auto pretty = document.toPrettyString(false, 0, " "); 9322 assert(pretty == 9323 `<!DOCTYPE html> 9324 <html> 9325 <body> 9326 <p>hello <a href="world">world</a></p> 9327 </body> 9328 </html>`, pretty); 9329 } 9330 9331 { 9332 auto document = new XmlDocument("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9333 assert(document.toPrettyString(false, 0, " ") == 9334 `<?xml version="1.0" encoding="UTF-8"?> 9335 <html> 9336 <body> 9337 <p> 9338 hello 9339 <a href="world">world</a> 9340 </p> 9341 </body> 9342 </html>`); 9343 } 9344 9345 foreach(test; [ 9346 "<a att=\"http://ele\"><b><ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>", 9347 "<a att=\"http://ele\"><b><ele1>Hello</ele1><c><d><ele2>How are you?</ele2></d><e><ele3>Good & you?</ele3></e></c></b></a>", 9348 ] ) 9349 { 9350 auto document = new XmlDocument(test); 9351 assert(document.root.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9352 assert(document.toPrettyString(false, 0, " ") == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9353 auto omg = document.root; 9354 omg.parent_ = null; 9355 assert(omg.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9356 } 9357 9358 { 9359 auto document = new XmlDocument(`<a><b>toto</b><c></c></a>`); 9360 assert(document.root.toPrettyString(false, 0, null) == `<a><b>toto</b><c></c></a>`); 9361 assert(document.root.toPrettyString(false, 0, " ") == `<a> 9362 <b>toto</b> 9363 <c></c> 9364 </a>`); 9365 } 9366 9367 { 9368 auto str = `<!DOCTYPE html> 9369 <html> 9370 <head> 9371 <title>Test</title> 9372 </head> 9373 <body> 9374 <p>Hello there</p> 9375 <p>I like <a href="">Links</a></p> 9376 <div> 9377 this is indented since there's a block inside 9378 <p>this is the block</p> 9379 and this gets its own line 9380 </div> 9381 </body> 9382 </html>`; 9383 auto doc = new Document(str, true, true); 9384 assert(doc.toPrettyString == str); 9385 } 9386 } 9387 9388 unittest { 9389 auto document = new Document("<foo><items><item><title>test</title><desc>desc</desc></item></items></foo>"); 9390 auto items = document.root.requireSelector("> items"); 9391 auto item = items.requireSelector("> item"); 9392 auto title = item.requireSelector("> title"); 9393 9394 // this not actually implemented at this point but i might want to later. it prolly should work as an extension of the standard behavior 9395 // assert(title.requireSelector("~ desc").innerText == "desc"); 9396 9397 assert(item.requireSelector("title ~ desc").innerText == "desc"); 9398 9399 assert(items.querySelector("item:has(title)") !is null); 9400 assert(items.querySelector("item:has(nothing)") is null); 9401 9402 assert(title.innerText == "test"); 9403 } 9404 9405 unittest { 9406 auto document = new Document("broken"); // just ensuring it doesn't crash 9407 } 9408 9409 private long min(long a, long b) { 9410 if(a < b) 9411 return a; 9412 return b; 9413 } 9414 9415 private long max(long a, long b) { 9416 if(a < b) 9417 return b; 9418 return a; 9419 } 9420 9421 alias utf_encode = arsd.core.encodeUtf8; 9422 9423 private struct Retro(T) { 9424 T[] array; 9425 size_t pos; 9426 9427 this(T[] array) { 9428 this.array = array; 9429 this.pos = array.length; 9430 } 9431 9432 T front() { 9433 return array[pos - 1]; 9434 } 9435 void popFront() { 9436 pos--; 9437 } 9438 bool empty() { 9439 return pos > 0; 9440 } 9441 } 9442 9443 // import std.array; // for Appender 9444 9445 private struct Appender(T : string) { 9446 void put(string s) { 9447 impl.data ~= s; 9448 } 9449 void put(char c) { 9450 impl.data ~= c; 9451 } 9452 void put(dchar c) { 9453 char[4] buffer; 9454 impl.data ~= buffer[0 .. arsd.core.encodeUtf8(buffer, c)]; 9455 } 9456 void reserve(size_t s) { 9457 impl.data.reserve(s); 9458 } 9459 9460 static struct Impl { 9461 string data; 9462 } 9463 9464 Impl* impl; 9465 9466 string data() { 9467 return impl.data; 9468 } 9469 9470 this(string start) { 9471 impl = new Impl; 9472 impl.data = start; 9473 } 9474 } 9475 9476 private Appender!string appender(T : string)() { 9477 return Appender!string(null); 9478 } 9479 9480 private string[] split(string s, string onWhat) { 9481 string[] ret; 9482 9483 more: 9484 auto idx = s.indexOf(onWhat); 9485 if(idx == -1) { 9486 ret ~= s; 9487 } else { 9488 ret ~= s[0 .. idx]; 9489 s = s[idx + onWhat.length .. $]; 9490 goto more; 9491 } 9492 9493 return ret; 9494 } 9495 9496 private string replace(string s, string replaceWhat, string withThis) { 9497 string ret; 9498 9499 more: 9500 auto idx = s.indexOf(replaceWhat); 9501 if(idx == -1) { 9502 ret ~= s; 9503 } else { 9504 ret ~= s[0 .. idx]; 9505 ret ~= withThis; 9506 s = s[idx + replaceWhat.length .. $]; 9507 goto more; 9508 } 9509 return ret; 9510 } 9511 9512 private @trusted string[] sortStrings(string[] obj) { 9513 static extern(C) int comparator(scope const void* ra, scope const void* rb) { 9514 string a = *cast(string*) ra; 9515 string b = *cast(string*) rb; 9516 return a < b; 9517 } 9518 9519 import core.stdc.stdlib; 9520 qsort(obj.ptr, obj.length, typeof(obj[0]).sizeof, &comparator); 9521 return obj; 9522 } 9523 9524 private struct LineSplitter { 9525 string s; 9526 size_t nextLineBreak; 9527 this(string s) { 9528 this.s = s; 9529 popFront(); 9530 } 9531 string front() { 9532 return s[0 .. nextLineBreak]; 9533 } 9534 void popFront() { 9535 s = s[nextLineBreak .. $]; 9536 nextLineBreak = 0; 9537 while(nextLineBreak < s.length) { 9538 if(s[nextLineBreak] == '\n') { 9539 nextLineBreak++; 9540 return; 9541 } 9542 nextLineBreak++; 9543 } 9544 } 9545 bool empty() { 9546 return s.length == 0; 9547 } 9548 } 9549 unittest { 9550 foreach(line; LineSplitter("foo")) 9551 assert(line == "foo"); 9552 int c; 9553 foreach(line; LineSplitter("foo\nbar")) { 9554 if(c == 0) 9555 assert(line == "foo\n"); 9556 else if(c == 1) 9557 assert(line == "bar"); 9558 c++; 9559 } 9560 } 9561 9562 private struct ElementStreamFilter { 9563 ElementStream range; 9564 bool delegate(Element e) filter; 9565 this(ElementStream range, bool delegate(Element e) filter) { 9566 this.range = range; 9567 this.filter = filter; 9568 if(!range.empty && !filter(range.front)) 9569 popFront(); 9570 } 9571 void popFront() { 9572 range.popFront; 9573 while(!range.empty && !this.filter(range.front)) { 9574 range.popFront(); 9575 } 9576 } 9577 bool empty() { 9578 return range.empty; 9579 } 9580 Element front() { 9581 return range.front; 9582 } 9583 } 9584 9585 alias arsd.core.indexOf indexOf; 9586 alias arsd.core.stripInternal strip; 9587 alias arsd.core.stripRightInternal stripRight; 9588 alias arsd.core.startsWith startsWith; 9589 alias arsd.core.endsWith endsWith; 9590 9591 // FIXME: start index can be useful but i used 0 here anyway 9592 private size_t indexOf(string haystack, string needle, bool caseSensitive) { 9593 if(!caseSensitive) { 9594 haystack = toLower(haystack); 9595 needle = toLower(needle); 9596 } 9597 return indexOf(haystack, needle); 9598 } 9599 9600 private string to(T : string, F)(F f) { 9601 return arsd.core.toStringInternal(f); 9602 } 9603 private int to(T : int, F)(F f) { 9604 // NOT GENERIC DO NOT USE OUTSIDE OF THIS MODULE'S CONTEXT 9605 int accumulator; 9606 foreach(ch; f) { 9607 accumulator *= 10; 9608 accumulator += ch - '0'; 9609 } 9610 return accumulator; 9611 } 9612 private char[] to(T : char[], F : dchar[])(F f) { 9613 char[] s; 9614 foreach(dc; f) { 9615 char[4] buffer; 9616 s ~= buffer[0 .. arsd.core.encodeUtf8(buffer, dc)]; 9617 } 9618 return s; 9619 } 9620 private string to(T : string, F : const(dchar)[])(F f) { 9621 return cast(string) to!(char[], dchar[])(cast(dchar[]) f); 9622 } 9623 9624 private string toLower(string s) { 9625 foreach(ch; s) { 9626 if(ch >= 'A' && ch <= 'Z') 9627 goto needed; 9628 } 9629 return s; // shortcut, no changes 9630 9631 needed: 9632 char[] ret; 9633 ret.length = s.length; 9634 foreach(idx, ch; s) { 9635 if(ch >= 'A' && ch <= 'Z') 9636 ret[idx] = ch | 32; 9637 else 9638 ret[idx] = ch; 9639 } 9640 return cast(string) ret; 9641 } 9642 unittest { 9643 assert("".toLower == ""); 9644 assert("foo".toLower == "foo"); 9645 assert("FaZ".toLower == "faz"); 9646 assert("423".toLower == "423"); 9647 } 9648 9649 private string toUpper(string s) { 9650 foreach(ch; s) { 9651 if(ch >= 'a' && ch <= 'z') 9652 goto needed; 9653 } 9654 return s; // shortcut, no changes 9655 9656 needed: 9657 char[] ret; 9658 ret.length = s.length; 9659 foreach(idx, ch; s) { 9660 if(ch >= 'a' && ch <= 'z') 9661 ret[idx] = ch & ~32; 9662 else 9663 ret[idx] = ch; 9664 } 9665 return cast(string) ret; 9666 } 9667 unittest { 9668 assert("".toUpper == ""); 9669 assert("foo".toUpper == "FOO"); 9670 assert("FaZ".toUpper == "FAZ"); 9671 assert("423".toUpper == "423"); 9672 } 9673 9674 /* 9675 Copyright: Adam D. Ruppe, 2010 - 2023 9676 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9677 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9678 */